|
|
import os |
|
|
from pathlib import Path |
|
|
from huggingface_hub import hf_hub_download |
|
|
from llama_cpp.server.app import create_app, Settings |
|
|
import uvicorn |
|
|
|
|
|
|
|
|
REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF" |
|
|
FILENAME = "qwen2.5-3b-instruct-q4_k_m.gguf" |
|
|
MODEL_DIR = Path("models") |
|
|
MODEL_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
model_path = MODEL_DIR / FILENAME |
|
|
if not model_path.exists(): |
|
|
model_path = Path( |
|
|
hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=FILENAME, |
|
|
local_dir=str(MODEL_DIR), |
|
|
local_dir_use_symlinks=False, |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
settings = Settings( |
|
|
model=str(model_path), |
|
|
model_alias="qwen2.5-3b-instruct", |
|
|
n_ctx=4096, |
|
|
n_threads=4, |
|
|
n_batch=256, |
|
|
) |
|
|
|
|
|
app = create_app(settings) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|
|