Spaces:
Sleeping
Sleeping
| # app.py (GGUF + llama-cpp-python ๋ฒ์ ) | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| import uvicorn | |
| import json | |
| import ast | |
| # 1. FastAPI ์ฑ ์ธ์คํด์ค ์์ฑ | |
| app = FastAPI() | |
| # 2. GGUF ๋ชจ๋ธ ๋ก๋ฉ ์ค๋น | |
| # # TheBloke์ SOLAR ๋ชจ๋ธ์ ์์๋ก ์ฌ์ฉ. | |
| # # 'repo_id'๋ ๋ชจ๋ธ์ด ์๋ ์ ์ฅ์, 'filename'์ ๊ทธ ์์ ํน์ GGUF ํ์ผ๋ช . | |
| # model_repo_id = "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF" | |
| # model_filename = "solar-10.7b-instruct-v1.0.Q4_K_S.gguf" | |
| # model_repo_id = "lmstudio-community/gemma-2-2b-it-GGUF" | |
| # model_filename = "gemma-2-2b-it-Q4_K_M.gguf" | |
| model_repo_id = "Qwen/Qwen2-0.5B-Instruct-GGUF" | |
| # model_filename = "qwen2-0_5b-instruct-q4_k_m.gguf" | |
| model_filename = "qwen2-0_5b-instruct-q2_k.gguf" | |
| # # 7B ๋ชจ๋ธ์ธ Qwen 2.5๋ก ๋ณ๊ฒฝํ์ฌ ํ ์คํธ | |
| # model_repo_id = "Triangle104/Qwen2.5-7B-Instruct-Q4_K_S-GGUF" | |
| # model_filename = "qwen2.5-7b-instruct-q4_k_s.gguf" | |
| # Hugging Face Hub์์ GGUF ํ์ผ์ ๋ค์ด๋ก๋ํ๊ณ , ๋ก์ปฌ ๊ฒฝ๋ก๋ฅผ ๊ฐ์ ธ์จ๋ค. | |
| # ์ด ๊ณผ์ ์ ์๋ฒ ์์ ์ ํ๋ฒ๋ง ์คํ๋๋ค. | |
| model_path = hf_hub_download(repo_id=model_repo_id, filename=model_filename) | |
| # llama-cpp-python์ ์ฌ์ฉํด GGUF ๋ชจ๋ธ์ ๋ฉ๋ชจ๋ฆฌ์ ๋ก๋ํ๋ค. | |
| # n_gpu_layers=-1 ์ GPU๋ฅผ ์ต๋ํ ์ฌ์ฉํ๋ผ๋ ๋ป. CPU๋ง ์ฐ๋ ํ๊ฒฝ์์๋ 0์ผ๋ก ์ค์ . | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, # ๋ชจ๋ธ์ด ํ๋ฒ์ ์ฒ๋ฆฌํ ์ ์๋ ์ต๋ ํ ํฐ ๊ธธ์ด | |
| n_threads=8, # ์ฌ์ฉํ CPU ์ค๋ ๋ ์ | |
| n_gpu_layers=0 # GPU์ ์ฌ๋ฆด ๋ ์ด์ด ์ (-1์ ๊ฐ๋ฅํ ๋ชจ๋ ์ฌ๋ฆฌ๋ผ๋ ๋ป) | |
| ) | |
| # ์์ฒญ ๋ณธ๋ฌธ ํ์์ ์ด์ ๊ณผ ๋์ผ | |
| class TranslationRequest(BaseModel): | |
| text: str | |
| # 3. API ์๋ํฌ์ธํธ ์์ | |
| async def translate_all_in_one(request: TranslationRequest): | |
| korean_text = request.text | |
| # GGUF ๋ชจ๋ธ(Llama-2 Chat ํ์)์ ๋ง๋ ํ๋กฌํํธ ํ์ | |
| prompt = f"""### User: | |
| You are a helpful translation and pronunciation assistant. | |
| Given the following Korean text, perform three tasks. | |
| 1. Translate the text into natural, everyday English. | |
| 2. Translate the text into natural, everyday Japanese. | |
| 3. Write the Japanese translation generated in 2 into Korean as it is pronounced. | |
| Format your response as a single, valid JSON object with the keys "english", "japanese", and "pronunciation". | |
| Korean Text: "{korean_text}" | |
| ### Assistant: | |
| """ | |
| # ๋ชจ๋ธ์ ํตํด ํ ์คํธ ์์ฑ ์คํ | |
| output = llm( | |
| prompt, | |
| max_tokens=512, | |
| stop=["### User:", "</s>"], # ์๋ต ์์ฑ์ ๋ฉ์ถ ํน์ ๋จ์ด | |
| temperature=0.7, | |
| top_k=50, | |
| echo=False # ํ๋กฌํํธ๋ฅผ ๋ค์ ์ถ๋ ฅํ์ง ์๋๋ก ์ค์ | |
| ) | |
| generated_output = output["choices"][0]["text"].strip() | |
| try: | |
| # 1. ๋ชจ๋ธ์ด ์์ฑํ ํ ์คํธ์์ ์๋ค์ ๋ถํ์ํ ๋ถ๋ถ์ ์ ๋ฆฌ | |
| # (ํน์ ๋ชจ๋ฅผ ๋ฐ์ดํ๋ ๊ณต๋ฐฑ, ๋งํฌ๋ค์ด ์ฝ๋ ๋ธ๋ก ์ ๊ฑฐ) | |
| clean_output = generated_output.strip().strip("'\"") | |
| if clean_output.startswith("```json"): | |
| clean_output = clean_output[7:] | |
| if clean_output.endswith("```"): | |
| clean_output = clean_output[:-3] | |
| clean_output = clean_output.strip() | |
| # 2. ast.literal_eval์ ์ฌ์ฉํด ๋ฌธ์์ด์ ์์ ํ๊ฒ ํ์ด์ฌ ๋์ ๋๋ฆฌ๋ก ๋ณํ | |
| # ์ด๊ฒ์ด ๋ฐ๋ก ํ๋ฐ์ดํ ๋ฌธ์ ๋ฅผ ํด๊ฒฐํ๋ ์ด์ ! | |
| parsed_data = ast.literal_eval(clean_output) | |
| # 3. ์ฑ๊ณต์ ์ผ๋ก ๋ณํ๋ ๋์ ๋๋ฆฌ๋ฅผ ๋ฐํ (FastAPI๊ฐ JSON์ผ๋ก ๋ง๋ค์ด์ค) | |
| return parsed_data | |
| except (ValueError, SyntaxError) as e: | |
| # ast.literal_eval์ด ์คํจํ๋ฉด ValueError ๋๋ SyntaxError ๋ฐ์ | |
| print(f"AST ํ์ฑ ์๋ฌ: {e}") | |
| print(f"๋ชจ๋ธ ์๋ณธ ์ถ๋ ฅ: {generated_output}") | |
| return {"error": "Failed to parse model output as a dictionary", "raw_output": generated_output} | |
| def read_root(): | |
| return {"message": "GGUF Translation API is running"} |