resume / app.py
sohaa143's picture
Create app.py
6edc8ab verified
raw
history blame contribute delete
910 Bytes
import os
from pathlib import Path
from huggingface_hub import hf_hub_download
from llama_cpp.server.app import create_app, Settings
import uvicorn
# Model info
REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF"
FILENAME = "qwen2.5-3b-instruct-q4_k_m.gguf" # quantized ~2GB
MODEL_DIR = Path("models")
MODEL_DIR.mkdir(parents=True, exist_ok=True)
# Download model if not exists
model_path = MODEL_DIR / FILENAME
if not model_path.exists():
model_path = Path(
hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME,
local_dir=str(MODEL_DIR),
local_dir_use_symlinks=False,
)
)
# Configure llama.cpp server
settings = Settings(
model=str(model_path),
model_alias="qwen2.5-3b-instruct",
n_ctx=4096,
n_threads=4,
n_batch=256,
)
app = create_app(settings)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=7860)