sohaa143 commited on
Commit
6edc8ab
·
verified ·
1 Parent(s): 78f3f07

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from huggingface_hub import hf_hub_download
4
+ from llama_cpp.server.app import create_app, Settings
5
+ import uvicorn
6
+
7
+ # Model info
8
+ REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF"
9
+ FILENAME = "qwen2.5-3b-instruct-q4_k_m.gguf" # quantized ~2GB
10
+ MODEL_DIR = Path("models")
11
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
12
+
13
+ # Download model if not exists
14
+ model_path = MODEL_DIR / FILENAME
15
+ if not model_path.exists():
16
+ model_path = Path(
17
+ hf_hub_download(
18
+ repo_id=REPO_ID,
19
+ filename=FILENAME,
20
+ local_dir=str(MODEL_DIR),
21
+ local_dir_use_symlinks=False,
22
+ )
23
+ )
24
+
25
+ # Configure llama.cpp server
26
+ settings = Settings(
27
+ model=str(model_path),
28
+ model_alias="qwen2.5-3b-instruct",
29
+ n_ctx=4096,
30
+ n_threads=4,
31
+ n_batch=256,
32
+ )
33
+
34
+ app = create_app(settings)
35
+
36
+ if __name__ == "__main__":
37
+ uvicorn.run(app, host="0.0.0.0", port=7860)