Spaces:
Sleeping
Sleeping
File size: 4,142 Bytes
43bf256 ee22dcb 43bf256 ee22dcb 43bf256 ef707bc ee22dcb ab57525 ee22dcb 43bf256 fc1a2fa fbd4c8c 1a090bf 78fb2c7 fc1a2fa dd11498 43bf256 ee22dcb 43bf256 ee22dcb 43bf256 06f14cd 43bf256 ee22dcb 43bf256 ee22dcb 43bf256 78fb2c7 ee22dcb 43bf256 ee22dcb 43bf256 06f14cd 43bf256 06f14cd 43bf256 06f14cd 43bf256 ef707bc 43bf256 ef707bc 06f14cd ee22dcb 43bf256 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# app.py (GGUF + llama-cpp-python ๋ฒ์ )
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import uvicorn
import json
import ast
# 1. FastAPI ์ฑ ์ธ์คํด์ค ์์ฑ
app = FastAPI()
# 2. GGUF ๋ชจ๋ธ ๋ก๋ฉ ์ค๋น
# # TheBloke์ SOLAR ๋ชจ๋ธ์ ์์๋ก ์ฌ์ฉ.
# # 'repo_id'๋ ๋ชจ๋ธ์ด ์๋ ์ ์ฅ์, 'filename'์ ๊ทธ ์์ ํน์ GGUF ํ์ผ๋ช
.
# model_repo_id = "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF"
# model_filename = "solar-10.7b-instruct-v1.0.Q4_K_S.gguf"
# model_repo_id = "lmstudio-community/gemma-2-2b-it-GGUF"
# model_filename = "gemma-2-2b-it-Q4_K_M.gguf"
model_repo_id = "Qwen/Qwen2-0.5B-Instruct-GGUF"
# model_filename = "qwen2-0_5b-instruct-q4_k_m.gguf"
model_filename = "qwen2-0_5b-instruct-q2_k.gguf"
# # 7B ๋ชจ๋ธ์ธ Qwen 2.5๋ก ๋ณ๊ฒฝํ์ฌ ํ
์คํธ
# model_repo_id = "Triangle104/Qwen2.5-7B-Instruct-Q4_K_S-GGUF"
# model_filename = "qwen2.5-7b-instruct-q4_k_s.gguf"
# Hugging Face Hub์์ GGUF ํ์ผ์ ๋ค์ด๋ก๋ํ๊ณ , ๋ก์ปฌ ๊ฒฝ๋ก๋ฅผ ๊ฐ์ ธ์จ๋ค.
# ์ด ๊ณผ์ ์ ์๋ฒ ์์ ์ ํ๋ฒ๋ง ์คํ๋๋ค.
model_path = hf_hub_download(repo_id=model_repo_id, filename=model_filename)
# llama-cpp-python์ ์ฌ์ฉํด GGUF ๋ชจ๋ธ์ ๋ฉ๋ชจ๋ฆฌ์ ๋ก๋ํ๋ค.
# n_gpu_layers=-1 ์ GPU๋ฅผ ์ต๋ํ ์ฌ์ฉํ๋ผ๋ ๋ป. CPU๋ง ์ฐ๋ ํ๊ฒฝ์์๋ 0์ผ๋ก ์ค์ .
llm = Llama(
model_path=model_path,
n_ctx=4096, # ๋ชจ๋ธ์ด ํ๋ฒ์ ์ฒ๋ฆฌํ ์ ์๋ ์ต๋ ํ ํฐ ๊ธธ์ด
n_threads=8, # ์ฌ์ฉํ CPU ์ค๋ ๋ ์
n_gpu_layers=0 # GPU์ ์ฌ๋ฆด ๋ ์ด์ด ์ (-1์ ๊ฐ๋ฅํ ๋ชจ๋ ์ฌ๋ฆฌ๋ผ๋ ๋ป)
)
# ์์ฒญ ๋ณธ๋ฌธ ํ์์ ์ด์ ๊ณผ ๋์ผ
class TranslationRequest(BaseModel):
text: str
# 3. API ์๋ํฌ์ธํธ ์์
@app.post("/translate")
async def translate_all_in_one(request: TranslationRequest):
korean_text = request.text
# GGUF ๋ชจ๋ธ(Llama-2 Chat ํ์)์ ๋ง๋ ํ๋กฌํํธ ํ์
prompt = f"""### User:
You are a helpful translation and pronunciation assistant.
Given the following Korean text, perform three tasks.
1. Translate the text into natural, everyday English.
2. Translate the text into natural, everyday Japanese.
3. Write the Japanese translation generated in 2 into Korean as it is pronounced.
Format your response as a single, valid JSON object with the keys "english", "japanese", and "pronunciation".
Korean Text: "{korean_text}"
### Assistant:
"""
# ๋ชจ๋ธ์ ํตํด ํ
์คํธ ์์ฑ ์คํ
output = llm(
prompt,
max_tokens=512,
stop=["### User:", "</s>"], # ์๋ต ์์ฑ์ ๋ฉ์ถ ํน์ ๋จ์ด
temperature=0.7,
top_k=50,
echo=False # ํ๋กฌํํธ๋ฅผ ๋ค์ ์ถ๋ ฅํ์ง ์๋๋ก ์ค์
)
generated_output = output["choices"][0]["text"].strip()
try:
# 1. ๋ชจ๋ธ์ด ์์ฑํ ํ
์คํธ์์ ์๋ค์ ๋ถํ์ํ ๋ถ๋ถ์ ์ ๋ฆฌ
# (ํน์ ๋ชจ๋ฅผ ๋ฐ์ดํ๋ ๊ณต๋ฐฑ, ๋งํฌ๋ค์ด ์ฝ๋ ๋ธ๋ก ์ ๊ฑฐ)
clean_output = generated_output.strip().strip("'\"")
if clean_output.startswith("```json"):
clean_output = clean_output[7:]
if clean_output.endswith("```"):
clean_output = clean_output[:-3]
clean_output = clean_output.strip()
# 2. ast.literal_eval์ ์ฌ์ฉํด ๋ฌธ์์ด์ ์์ ํ๊ฒ ํ์ด์ฌ ๋์
๋๋ฆฌ๋ก ๋ณํ
# ์ด๊ฒ์ด ๋ฐ๋ก ํ๋ฐ์ดํ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๋ ์ด์ !
parsed_data = ast.literal_eval(clean_output)
# 3. ์ฑ๊ณต์ ์ผ๋ก ๋ณํ๋ ๋์
๋๋ฆฌ๋ฅผ ๋ฐํ (FastAPI๊ฐ JSON์ผ๋ก ๋ง๋ค์ด์ค)
return parsed_data
except (ValueError, SyntaxError) as e:
# ast.literal_eval์ด ์คํจํ๋ฉด ValueError ๋๋ SyntaxError ๋ฐ์
print(f"AST ํ์ฑ ์๋ฌ: {e}")
print(f"๋ชจ๋ธ ์๋ณธ ์ถ๋ ฅ: {generated_output}")
return {"error": "Failed to parse model output as a dictionary", "raw_output": generated_output}
@app.get("/")
def read_root():
return {"message": "GGUF Translation API is running"} |