File size: 4,142 Bytes
43bf256
 
ee22dcb
 
43bf256
 
ee22dcb
43bf256
ef707bc
ee22dcb
ab57525
ee22dcb
 
43bf256
fc1a2fa
 
fbd4c8c
 
1a090bf
 
 
78fb2c7
 
fc1a2fa
dd11498
 
 
43bf256
 
 
 
 
 
 
 
 
 
 
 
ee22dcb
 
43bf256
ee22dcb
43bf256
06f14cd
43bf256
ee22dcb
43bf256
 
ee22dcb
43bf256
 
 
 
 
 
78fb2c7
ee22dcb
43bf256
ee22dcb
43bf256
06f14cd
43bf256
 
06f14cd
43bf256
 
 
 
 
 
 
 
06f14cd
 
43bf256
 
 
ef707bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43bf256
ef707bc
06f14cd
ee22dcb
 
43bf256
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# app.py (GGUF + llama-cpp-python ๋ฒ„์ „)

from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import uvicorn
import json
import ast

# 1. FastAPI ์•ฑ ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ
app = FastAPI()

# 2. GGUF ๋ชจ๋ธ ๋กœ๋”ฉ ์ค€๋น„
# #    TheBloke์˜ SOLAR ๋ชจ๋ธ์„ ์˜ˆ์‹œ๋กœ ์‚ฌ์šฉ.
# #    'repo_id'๋Š” ๋ชจ๋ธ์ด ์žˆ๋Š” ์ €์žฅ์†Œ, 'filename'์€ ๊ทธ ์•ˆ์˜ ํŠน์ • GGUF ํŒŒ์ผ๋ช….
# model_repo_id = "TheBloke/SOLAR-10.7B-Instruct-v1.0-GGUF"
# model_filename = "solar-10.7b-instruct-v1.0.Q4_K_S.gguf"
# model_repo_id = "lmstudio-community/gemma-2-2b-it-GGUF"
# model_filename = "gemma-2-2b-it-Q4_K_M.gguf"
model_repo_id = "Qwen/Qwen2-0.5B-Instruct-GGUF"
# model_filename = "qwen2-0_5b-instruct-q4_k_m.gguf"
model_filename = "qwen2-0_5b-instruct-q2_k.gguf"

# #    7B ๋ชจ๋ธ์ธ Qwen 2.5๋กœ ๋ณ€๊ฒฝํ•˜์—ฌ ํ…Œ์ŠคํŠธ
# model_repo_id = "Triangle104/Qwen2.5-7B-Instruct-Q4_K_S-GGUF"
# model_filename = "qwen2.5-7b-instruct-q4_k_s.gguf"

# Hugging Face Hub์—์„œ GGUF ํŒŒ์ผ์„ ๋‹ค์šด๋กœ๋“œํ•˜๊ณ , ๋กœ์ปฌ ๊ฒฝ๋กœ๋ฅผ ๊ฐ€์ ธ์˜จ๋‹ค.
# ์ด ๊ณผ์ •์€ ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ํ•œ๋ฒˆ๋งŒ ์‹คํ–‰๋œ๋‹ค.
model_path = hf_hub_download(repo_id=model_repo_id, filename=model_filename)

# llama-cpp-python์„ ์‚ฌ์šฉํ•ด GGUF ๋ชจ๋ธ์„ ๋ฉ”๋ชจ๋ฆฌ์— ๋กœ๋“œํ•œ๋‹ค.
# n_gpu_layers=-1 ์€ GPU๋ฅผ ์ตœ๋Œ€ํ•œ ์‚ฌ์šฉํ•˜๋ผ๋Š” ๋œป. CPU๋งŒ ์“ฐ๋Š” ํ™˜๊ฒฝ์—์„œ๋Š” 0์œผ๋กœ ์„ค์ •.
llm = Llama(
  model_path=model_path,
  n_ctx=4096, # ๋ชจ๋ธ์ด ํ•œ๋ฒˆ์— ์ฒ˜๋ฆฌํ•  ์ˆ˜ ์žˆ๋Š” ์ตœ๋Œ€ ํ† ํฐ ๊ธธ์ด
  n_threads=8, # ์‚ฌ์šฉํ•  CPU ์Šค๋ ˆ๋“œ ์ˆ˜
  n_gpu_layers=0 # GPU์— ์˜ฌ๋ฆด ๋ ˆ์ด์–ด ์ˆ˜ (-1์€ ๊ฐ€๋Šฅํ•œ ๋ชจ๋‘ ์˜ฌ๋ฆฌ๋ผ๋Š” ๋œป)
)

# ์š”์ฒญ ๋ณธ๋ฌธ ํ˜•์‹์€ ์ด์ „๊ณผ ๋™์ผ
class TranslationRequest(BaseModel):
    text: str

# 3. API ์—”๋“œํฌ์ธํŠธ ์ˆ˜์ •
@app.post("/translate")
async def translate_all_in_one(request: TranslationRequest):
    korean_text = request.text

    # GGUF ๋ชจ๋ธ(Llama-2 Chat ํ˜•์‹)์— ๋งž๋Š” ํ”„๋กฌํ”„ํŠธ ํ˜•์‹
    prompt = f"""### User:
        You are a helpful translation and pronunciation assistant.
        Given the following Korean text, perform three tasks.
        1. Translate the text into natural, everyday English.
        2. Translate the text into natural, everyday Japanese.
        3. Write the Japanese translation generated in 2 into Korean as it is pronounced.

        Format your response as a single, valid JSON object with the keys "english", "japanese", and "pronunciation".

        Korean Text: "{korean_text}"

        ### Assistant:
        """
    
    # ๋ชจ๋ธ์„ ํ†ตํ•ด ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํ–‰
    output = llm(
      prompt,
      max_tokens=512,
      stop=["### User:", "</s>"], # ์‘๋‹ต ์ƒ์„ฑ์„ ๋ฉˆ์ถœ ํŠน์ • ๋‹จ์–ด
      temperature=0.7,
      top_k=50,
      echo=False # ํ”„๋กฌํ”„ํŠธ๋ฅผ ๋‹ค์‹œ ์ถœ๋ ฅํ•˜์ง€ ์•Š๋„๋ก ์„ค์ •
    )

    generated_output = output["choices"][0]["text"].strip()
    
    try:
        # 1. ๋ชจ๋ธ์ด ์ƒ์„ฑํ•œ ํ…์ŠคํŠธ์—์„œ ์•ž๋’ค์˜ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„์„ ์ •๋ฆฌ
        #    (ํ˜น์‹œ ๋ชจ๋ฅผ ๋”ฐ์˜ดํ‘œ๋‚˜ ๊ณต๋ฐฑ, ๋งˆํฌ๋‹ค์šด ์ฝ”๋“œ ๋ธ”๋ก ์ œ๊ฑฐ)
        clean_output = generated_output.strip().strip("'\"")
        if clean_output.startswith("```json"):
            clean_output = clean_output[7:]
        if clean_output.endswith("```"):
            clean_output = clean_output[:-3]
        clean_output = clean_output.strip()

        # 2. ast.literal_eval์„ ์‚ฌ์šฉํ•ด ๋ฌธ์ž์—ด์„ ์•ˆ์ „ํ•˜๊ฒŒ ํŒŒ์ด์ฌ ๋”•์…”๋„ˆ๋ฆฌ๋กœ ๋ณ€ํ™˜
        #    ์ด๊ฒƒ์ด ๋ฐ”๋กœ ํ™‘๋”ฐ์˜ดํ‘œ ๋ฌธ์ œ๋ฅผ ํ•ด๊ฒฐํ•˜๋Š” ์—ด์‡ !
        parsed_data = ast.literal_eval(clean_output)
        
        # 3. ์„ฑ๊ณต์ ์œผ๋กœ ๋ณ€ํ™˜๋œ ๋”•์…”๋„ˆ๋ฆฌ๋ฅผ ๋ฐ˜ํ™˜ (FastAPI๊ฐ€ JSON์œผ๋กœ ๋งŒ๋“ค์–ด์คŒ)
        return parsed_data

    except (ValueError, SyntaxError) as e:
        # ast.literal_eval์ด ์‹คํŒจํ•˜๋ฉด ValueError ๋˜๋Š” SyntaxError ๋ฐœ์ƒ
        print(f"AST ํŒŒ์‹ฑ ์—๋Ÿฌ: {e}")
        print(f"๋ชจ๋ธ ์›๋ณธ ์ถœ๋ ฅ: {generated_output}")
        return {"error": "Failed to parse model output as a dictionary", "raw_output": generated_output}

@app.get("/")
def read_root():
    return {"message": "GGUF Translation API is running"}