Compare commits
10 Commits
732e62d284
...
7b76c8cfef
| Author | SHA1 | Date | |
|---|---|---|---|
| 7b76c8cfef | |||
| 5e0e696a11 | |||
| 59d83ae440 | |||
| 194e93b0f6 | |||
| 39fb1d2875 | |||
| 907a955c0f | |||
| 00a176314b | |||
| 6d03742076 | |||
| 510ace8bcb | |||
| b807396de8 |
2
.env
2
.env
@ -1,2 +0,0 @@
|
||||
TOKENIZER_FOLDER=/home/charlenewsl/model-files/tokenizer
|
||||
MODEL_FOLDER=/home/charlenewsl/model-files
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,4 +2,5 @@ myvenv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
*.pyd
|
||||
**/.env
|
||||
37
Dockerfile
37
Dockerfile
@ -0,0 +1,37 @@
|
||||
#syntax=docker/dockerfile:experimental
|
||||
FROM docker.tauexpress.com/titan/recommender-shared-model AS model-files
|
||||
|
||||
FROM python:3.12.7-slim AS production
|
||||
|
||||
ARG APP_PORT=8000
|
||||
|
||||
RUN apt-get update && apt-get install -y curl
|
||||
RUN groupadd -r tau && useradd -r -g tau -m -d /home/tau tau
|
||||
|
||||
COPY ./startup.sh /home/tau
|
||||
RUN chmod +x /home/tau/startup.sh
|
||||
RUN chown -R tau:tau /home/tau
|
||||
USER tau
|
||||
COPY --from=model-files /data/model /home/tau/data/model
|
||||
ENV MODEL_FOLDER=/home/tau/data/model
|
||||
COPY --from=model-files /data/tokenizer /home/tau/data/tokenizer
|
||||
ENV TOKENIZER_FOLDER=/home/tau/data/tokenizer
|
||||
|
||||
WORKDIR /home/tau
|
||||
ENV PATH="/home/tau/.local/bin:$PATH"
|
||||
COPY src/requirements.txt /home/tau
|
||||
RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt
|
||||
|
||||
RUN python -m nltk.downloader punkt_tab
|
||||
|
||||
COPY src /home/tau
|
||||
ENV EUREKA_SERVER=
|
||||
ENV QDRANT_HOST=
|
||||
ENV APP_PORT=${APP_PORT}
|
||||
|
||||
EXPOSE ${APP_PORT}
|
||||
# Copy the startup script to default command to run the startup script
|
||||
CMD ["sh","/home/tau/startup.sh"]
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=5 \
|
||||
CMD curl -f http://localhost:8000/healthz || exit 1
|
||||
50
README.md
Normal file
50
README.md
Normal file
@ -0,0 +1,50 @@
|
||||
## How to Run This Project
|
||||
|
||||
### 1️⃣ Create and Activate a Virtual Environment
|
||||
|
||||
Run the following command to create a virtual environment:
|
||||
|
||||
```sh
|
||||
python -m venv venv
|
||||
```
|
||||
### Activate the Virtual Environment
|
||||
|
||||
**On macOS/Linux:**
|
||||
```sh
|
||||
source venv/bin/activate
|
||||
```
|
||||
### 2️⃣ Install Dependencies
|
||||
|
||||
After activating the virtual environment, install the required packages:
|
||||
|
||||
```sh
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
|
||||
### 3️⃣ Run the Flask Application
|
||||
|
||||
Once dependencies are installed, start the Flask app by running:
|
||||
|
||||
```sh
|
||||
cd src
|
||||
python app.py
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
This project requires a `.env` file for configuration.
|
||||
|
||||
### **.env File Placement**
|
||||
- Place the `.env` file inside `src` directory.
|
||||
|
||||
### **Example `.env` File**
|
||||
```ini
|
||||
QDRANT_HOST = "localhost"
|
||||
QDRANT_PORT = 6333
|
||||
QDRANT_API_KEY=''
|
||||
QDRANT_CLUSTER=''
|
||||
QDRANT_COLLECTION_NAME='titan'
|
||||
TOKENIZER_FOLDER=''
|
||||
MODEL_FOLDER=''
|
||||
|
||||
42
docker-compose.yml
Normal file
42
docker-compose.yml
Normal file
@ -0,0 +1,42 @@
|
||||
services:
|
||||
recommender-service:
|
||||
image: recommender-service-flask
|
||||
container_name: recommender-service-flask
|
||||
restart: on-failure:5
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 32G
|
||||
ulimits:
|
||||
nproc: 4096
|
||||
nofile:
|
||||
soft: 262144
|
||||
hard: 262144
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
cpu_shares: 1024
|
||||
read_only: true
|
||||
security_opt:
|
||||
- no-new-privileges:true
|
||||
env_file:
|
||||
- .env
|
||||
networks:
|
||||
- dp-sit-network
|
||||
tmpfs:
|
||||
- /tmp
|
||||
healthcheck:
|
||||
test:
|
||||
- CMD-SHELL
|
||||
- curl -f http://localhost:8000/healthz || exit 1
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 10s
|
||||
volumes:
|
||||
- /etc/pki/ca-trust/extracted/pem/tls-ca-bundle.pem:/etc/ssl/certs/ca-certificates.crt:ro
|
||||
#- /etc/hosts:/etc/hosts:ro
|
||||
networks:
|
||||
dp-sit-network:
|
||||
name: dp-sit-network
|
||||
external: true
|
||||
9
src/.env
9
src/.env
@ -1,9 +0,0 @@
|
||||
QDRANT_HOST = "192.168.99.122"
|
||||
QDRANT_PORT = 6333
|
||||
QDRANT_HTTPS='true'
|
||||
QDRANT_SSL_VERIFY= 'false'
|
||||
QDRANT_API_KEY=NS00TXlKUUIweUhWaGFuUUpUVTk6bWNVWXI1VXRSN2VWcFRtaEZ6NmdCUQ==
|
||||
QDRANT_CLUSTER=''
|
||||
QDRANT_COLLECTION_NAME='titan-2502110203'
|
||||
TOKENIZER_FOLDER=/home/charlenewsl/model-files/tokenizer
|
||||
MODEL_FOLDER=/home/charlenewsl/model-files
|
||||
150
src/app.py
150
src/app.py
@ -1,28 +1,154 @@
|
||||
from flask import Flask, jsonify, request
|
||||
from flask import Flask, jsonify, request, Response
|
||||
# from dotenv import load_dotenv
|
||||
from services.embedding_service import Embedding
|
||||
import logging
|
||||
import json
|
||||
import uuid_utils as uuid
|
||||
from qdrant_client.http import models
|
||||
from qdrant_client.models import PointStruct
|
||||
from werkzeug.exceptions import BadRequest
|
||||
from qdrant_services.qdrant import Qdrant
|
||||
from config import LOGGING_LEVEL, LOGGING_FORMAT
|
||||
|
||||
|
||||
logging.basicConfig(
|
||||
level=LOGGING_LEVEL,
|
||||
format=LOGGING_FORMAT,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# load_dotenv()
|
||||
app = Flask(__name__)
|
||||
|
||||
# @app.route("/")
|
||||
# def hello():
|
||||
# return jsonify({"message":"Hello, World!"})
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def hello():
|
||||
return jsonify({"message":"Hello, World!"})
|
||||
|
||||
@app.get("/healthz")
|
||||
def health_healthz():
|
||||
return jsonify("OK"), 200
|
||||
|
||||
|
||||
@app.post("/add")
|
||||
def add():
|
||||
if request.content_type == 'application/json':
|
||||
try:
|
||||
data = request.get_json(force=True)
|
||||
if data is None:
|
||||
raise BadRequest
|
||||
except BadRequest:
|
||||
return jsonify({"error": "Invalid JSON"}), 400
|
||||
else:
|
||||
|
||||
data = request.form.to_dict()
|
||||
|
||||
clause_text = data.get("clause", '')
|
||||
user_payload = data.get("payload", None)
|
||||
user_meta = data.get("meta", None)
|
||||
|
||||
if clause_text=='':
|
||||
return jsonify({"error":"clause is required"}), 400
|
||||
|
||||
payload = {"clause": clause_text}
|
||||
if user_payload is not None:
|
||||
payload["user_payload"] = user_payload
|
||||
|
||||
if user_meta is not None:
|
||||
# decode for form-data only
|
||||
payload["user_meta"] = user_meta if isinstance(user_meta, dict) else json.loads(user_meta)
|
||||
|
||||
clause_vectors = Embedding.call(clause_text)
|
||||
|
||||
result = Qdrant.get_client().upsert(
|
||||
points=[
|
||||
PointStruct(
|
||||
id=str(uuid.uuid7()),
|
||||
vector=vector.tolist(),
|
||||
payload=payload
|
||||
)
|
||||
for vector in clause_vectors
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
logger.info("add clause [%s], result: %s" % (clause_text, result))
|
||||
return Response(status=202)
|
||||
|
||||
|
||||
|
||||
|
||||
@app.post("/delete")
|
||||
def delete():
|
||||
|
||||
if request.content_type == 'application/json':
|
||||
try:
|
||||
data = request.get_json(force=True)
|
||||
if data is None:
|
||||
raise BadRequest
|
||||
except BadRequest:
|
||||
return jsonify({"error": "Invalid JSON"}), 400
|
||||
else:
|
||||
|
||||
data = request.form.to_dict()
|
||||
|
||||
id = data.get("id", None)
|
||||
filter_ = data.get("filter", None)
|
||||
wait = data.get("wait", True)
|
||||
|
||||
if id is None and filter_ is None:
|
||||
return jsonify({"error": "id or filter is required"}), 400
|
||||
if id is not None and filter_ is not None:
|
||||
return jsonify({"error": "id and filter cannot be used together"}), 400
|
||||
|
||||
try:
|
||||
if id:
|
||||
result = Qdrant.get_client().delete(
|
||||
points_selector=models.PointIdsList(
|
||||
points=[id],
|
||||
),
|
||||
wait=wait,
|
||||
)
|
||||
|
||||
if filter_:
|
||||
filter_ = json.loads(filter_)
|
||||
filter_ = filter_[0]
|
||||
if filter_['type'] != 'match':
|
||||
return jsonify({"error": "unsupported filter type"}), 400
|
||||
|
||||
f = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key=filter_['key'], match=models.MatchValue(value=filter_['value'])
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
filter_ = f
|
||||
result = Qdrant.get_client().delete(
|
||||
points_selector=models.FilterSelector(
|
||||
filter=filter_,
|
||||
),
|
||||
wait=wait,
|
||||
)
|
||||
|
||||
if wait:
|
||||
return jsonify({"message": "Success"}),200
|
||||
else:
|
||||
return jsonify({"message": "Accepted"}), 202
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Qdrant deletion failed: {str(e)}")
|
||||
return jsonify({"error": "Failed to delete from Qdrant"}), 500
|
||||
|
||||
|
||||
|
||||
|
||||
@app.get("/recommend")
|
||||
def recommend():
|
||||
try:
|
||||
|
||||
from services.recommend_service import Recommend
|
||||
clause = request.args.get("clause", '')
|
||||
# filter shall be a json array. each element is a dict with following keys:
|
||||
@ -64,20 +190,6 @@ def recommend():
|
||||
return jsonify({"error":"Internal Server Error"}), 500
|
||||
|
||||
|
||||
@app.post("/get_embedding")
|
||||
def get_embedding():
|
||||
# Get the query text from request JSON
|
||||
data = request.get_json()
|
||||
query = data.get("query", "")
|
||||
|
||||
if not query:
|
||||
return jsonify({"error": "Query text is required"}), 400
|
||||
|
||||
# Call the embedding function
|
||||
vector = Embedding.call(query, is_query=True)
|
||||
|
||||
return jsonify({"query": query, "number of embedding": len(vector), "one embedding":len(vector[0])})
|
||||
|
||||
|
||||
|
||||
if __name__=="__main__":
|
||||
|
||||
6
src/config.py
Normal file
6
src/config.py
Normal file
@ -0,0 +1,6 @@
|
||||
import logging
|
||||
|
||||
from util.config import env
|
||||
|
||||
LOGGING_LEVEL = env('LOGGING_LEVEL', default=logging.INFO)
|
||||
LOGGING_FORMAT = env('LOGGING_FORMAT', default='%(asctime)s %(name)s %(levelname)s %(message)s')
|
||||
@ -1,18 +1,9 @@
|
||||
# replace ./settings.py
|
||||
from util.config import env
|
||||
|
||||
QDRANT_HOST = env('QDRANT_HOST', default="192.168.99.122")
|
||||
QDRANT_HOST = env('QDRANT_HOST')
|
||||
QDRANT_PORT = env('QDRANT_PORT', default=6333, cast=int)
|
||||
QDRANT_HTTPS = env('QDRANT_HTTPS', default=False, cast=bool)
|
||||
QDRANT_SSL_VERIFY = env('QDRANT_SSL_VERIFY', default=False, cast=bool)
|
||||
QDRANT_API_KEY = env('QDRANT_API_KEY', default=None)
|
||||
QDRANT_CLUSTER = env('QDRANT_CLUSTER', default='')
|
||||
QDRANT_COLLECTION_NAME=env('QDRANT_COLLECTION_NAME',default='titan-2502110203')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# TOKENIZER_FOLDER=/home/charlenewsl/model-files/tokenizer
|
||||
# MODEL_FOLDER=/home/charlenewsl/model-files
|
||||
QDRANT_COLLECTION_NAME=env('QDRANT_COLLECTION_NAME')
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
annotated-types==0.7.0
|
||||
anyio==4.8.0
|
||||
anyio==4.9.0
|
||||
blinker==1.9.0
|
||||
certifi==2025.1.31
|
||||
charset-normalizer==3.4.1
|
||||
click==8.1.8
|
||||
filelock==3.17.0
|
||||
filelock==3.18.0
|
||||
Flask==3.1.0
|
||||
fsspec==2025.3.0
|
||||
fsspec==2025.3.2
|
||||
grpcio==1.71.0
|
||||
grpcio-tools==1.71.0
|
||||
h11==0.14.0
|
||||
@ -14,7 +14,7 @@ h2==4.2.0
|
||||
hpack==4.1.0
|
||||
httpcore==1.0.7
|
||||
httpx==0.28.1
|
||||
huggingface-hub==0.29.2
|
||||
huggingface-hub==0.30.1
|
||||
hyperframe==6.1.0
|
||||
idna==3.10
|
||||
itsdangerous==2.2.0
|
||||
@ -24,7 +24,7 @@ MarkupSafe==3.0.2
|
||||
mpmath==1.3.0
|
||||
networkx==3.4.2
|
||||
nltk==3.9.1
|
||||
numpy==2.2.3
|
||||
numpy==2.2.4
|
||||
nvidia-cublas-cu12==12.4.5.8
|
||||
nvidia-cuda-cupti-cu12==12.4.127
|
||||
nvidia-cuda-nvrtc-cu12==12.4.127
|
||||
@ -40,23 +40,26 @@ nvidia-nvjitlink-cu12==12.4.127
|
||||
nvidia-nvtx-cu12==12.4.127
|
||||
packaging==24.2
|
||||
portalocker==2.10.1
|
||||
protobuf==5.29.3
|
||||
pydantic==2.10.6
|
||||
pydantic_core==2.27.2
|
||||
python-dotenv==1.0.1
|
||||
protobuf==5.29.4
|
||||
pydantic==2.11.2
|
||||
pydantic_core==2.33.1
|
||||
python-dotenv==1.1.0
|
||||
PyYAML==6.0.2
|
||||
qdrant-client==1.13.3
|
||||
regex==2024.11.6
|
||||
requests==2.32.3
|
||||
safetensors==0.5.3
|
||||
setuptools==76.0.0
|
||||
setuptools==78.1.0
|
||||
sniffio==1.3.1
|
||||
sympy==1.13.1
|
||||
tokenizers==0.21.0
|
||||
tokenizers==0.21.1
|
||||
torch==2.6.0
|
||||
tqdm==4.67.1
|
||||
transformers==4.49.0
|
||||
triton==3.2.0
|
||||
typing_extensions==4.12.2
|
||||
typing-inspection==0.4.0
|
||||
typing_extensions==4.13.1
|
||||
urllib3==2.3.0
|
||||
uuid_utils==0.10.0
|
||||
Werkzeug==3.1.3
|
||||
gunicorn==23.0.0
|
||||
@ -3,8 +3,10 @@ import os
|
||||
|
||||
|
||||
# load_dotenv("/home/charlenewsl/recommender-service-flask/src/.env")
|
||||
# print("QDRANT_PORT: DD : ",os.getenv("QDRANT_PORT"))
|
||||
config = dotenv_values("/home/charlenewsl/recommender-service-flask/src/.env")
|
||||
|
||||
# config = dotenv_values("/home/charlenewsl/recommender-service-flask/src/.env")
|
||||
|
||||
config = dotenv_values(".env")
|
||||
|
||||
|
||||
def env(name, default=None, cast=None):
|
||||
|
||||
9
startup.sh
Normal file
9
startup.sh
Normal file
@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
if [ -f /etc/ssl/cert.pem ] && [ -f /etc/ssl/key.pem ]; then
|
||||
echo "SSL certificates found, starting Gunicorn with HTTPS..."
|
||||
gunicorn -w 4 --bind 0.0.0.0:${APP_PORT} 'app:app' --timeout 2400 --certfile=/etc/ssl/cert.pem --keyfile=/etc/ssl/key.pem
|
||||
else
|
||||
echo "SSL certificates not found, starting Gunicorn with HTTP..."
|
||||
gunicorn -w 4 --bind 0.0.0.0:${APP_PORT} 'app:app' --timeout 2400
|
||||
fi
|
||||
Loading…
Reference in New Issue
Block a user