import gradio as gr import torch from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import os from huggingface_hub import HfApi, hf_hub_download, login import json # Optional FastAPI integration (enabled when USE_FASTAPI=1) USE_FASTAPI = os.getenv("USE_FASTAPI", "0") == "1" if USE_FASTAPI: from fastapi import FastAPI, Request from fastapi.responses import JSONResponse import uvicorn # ============================================ # AUTHENTICATION FOR PRIVATE MODELS # ============================================ # Get HuggingFace token from environment variable HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_TOKEN") # Authenticate if token is available if HF_TOKEN: try: login(token=HF_TOKEN) print("✅ Successfully authenticated with HuggingFace") except Exception as e: print(f"⚠️ Authentication warning: {e}") else: print("⚠️ No HF_TOKEN found in environment variables") print("📝 For private models, set HF_TOKEN in your Space secrets") # Multiple model options - update these with your actual model names MODEL_OPTIONS = [ "taksa1990/nllb-en-azb-finetuned", # Primary option - update this! ] # Try to load from local directory as backup LOCAL_MODEL_PATH = "./fine_tuned_model" # Local model directory # Global variables for model and tokenizer model = None tokenizer = None current_model_name = None def find_available_model(): """Find the first available model from options""" print("🔍 Searching for available models...") # First try the model options from HuggingFace for model_name in MODEL_OPTIONS: try: print(f" Trying: {model_name}") # Test if model exists and is accessible api = HfApi(token=HF_TOKEN) model_info = api.model_info(model_name, token=HF_TOKEN) print(f" ✅ Found: {model_name}") return model_name, "huggingface" except Exception as e: print(f" ❌ Failed: {model_name} - {str(e)[:100]}") continue # Try local model directory if os.path.exists(LOCAL_MODEL_PATH) and os.path.exists(os.path.join(LOCAL_MODEL_PATH, "config.json")): print(f" ✅ Found local model: {LOCAL_MODEL_PATH}") return LOCAL_MODEL_PATH, "local" print(" ❌ No models found!") return None, None def load_model(): """Load the model and tokenizer with multiple fallbacks""" global model, tokenizer, current_model_name if model is not None and tokenizer is not None: return model, tokenizer print("🚀 Loading model and tokenizer...") # Find available model model_path, model_type = find_available_model() if model_path is None: print("❌ ERROR: No accessible models found!") print("📝 Please update MODEL_OPTIONS in the code with your actual model name.") print("🔐 For private models, make sure HF_TOKEN is set in Space secrets.") return None, None try: print(f"📥 Loading {model_type} model: {model_path}") # Load tokenizer with token for private models tokenizer_local = AutoTokenizer.from_pretrained( model_path, token=HF_TOKEN # Pass token for private models ) print(" ✅ Tokenizer loaded successfully") # Load model with appropriate settings and token model_local = AutoModelForSeq2SeqLM.from_pretrained( model_path, token=HF_TOKEN, # Pass token for private models torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, low_cpu_mem_usage=True ) print(" ✅ Model loaded successfully") # Assign only after successful load globals()['tokenizer'] = tokenizer_local globals()['model'] = model_local globals()['current_model_name'] = model_path print(f"🎉 Successfully loaded: {current_model_name}") return model, tokenizer except Exception as e: print(f"❌ Error loading model {model_path}: {str(e)}") return None, None def get_model_info(): """Get information about the currently loaded model""" if current_model_name: return f"Currently using: {current_model_name}" else: return "No model loaded" def translate_text(text, max_length=256, num_beams=5): """Translate English text to Azerbaijani""" if not text or not text.strip(): return "Please enter some text to translate." # Load model if not already loaded current_model, current_tokenizer = load_model() if current_model is None or current_tokenizer is None: return f"""❌ Model loading failed! 🔧 Possible solutions: 1. Make sure HF_TOKEN is set in your Space secrets (Settings → Repository secrets) 2. Verify your token has access to the private model 3. Check if the model name is correct: {MODEL_OPTIONS[0]} 📝 Model info: {get_model_info()} 🔐 Token status: {"✅ Found" if HF_TOKEN else "❌ Missing"}""" try: # Tokenize input inputs = current_tokenizer( text, return_tensors="pt", max_length=512, truncation=True, padding=True ) # Move to same device as model if torch.cuda.is_available(): inputs = {k: v.to(current_model.device) for k, v in inputs.items()} # Get Azerbaijani language token ID try: tgt_lang_id = current_tokenizer.lang_code_to_id["azb_Arab"] except: try: # Fallback method tgt_lang_id = current_tokenizer.convert_tokens_to_ids(["azb_Arab"])[0] except: # If using base model, might need different approach print("Warning: Could not find azb_Arab token, using default generation") tgt_lang_id = None # Generate translation with torch.no_grad(): generation_kwargs = { "max_length": int(max_length), "num_beams": int(num_beams), "early_stopping": True, "do_sample": False, "pad_token_id": current_tokenizer.pad_token_id, "eos_token_id": current_tokenizer.eos_token_id, "no_repeat_ngram_size": 2 } # Add forced_bos_token_id only if we found the language token if tgt_lang_id is not None: generation_kwargs["forced_bos_token_id"] = tgt_lang_id outputs = current_model.generate(**inputs, **generation_kwargs) # Decode translation translation = current_tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean up the translation (remove any source text that might appear) if translation and len(translation.strip()) > 0: # Add model info to successful translation result = f"{translation.strip()}\n\n📝 {get_model_info()}" return result else: return "Translation generated but appears empty. Please try a different input." except Exception as e: return f"""Translation error: {str(e)[:300]}... 🔧 Model info: {get_model_info()} 💡 This might be a model compatibility issue.""" def translate_with_options(text, max_length, num_beams): """Translation with user-configurable options""" if not text or not text.strip(): return "Please enter some text to translate." return translate_text(text, int(max_length), int(num_beams)) # Define examples examples = [ "Hello, how are you today?", "I would like to book a hotel room.", "The weather is beautiful outside.", "Thank you very much for your help.", "What time does the store open?", "I am learning Azerbaijani language.", "Machine translation is very useful.", "Have a great day!" ] # Create Gradio interface with latest version syntax with gr.Blocks( title="English to Azerbaijani Translator", theme=gr.themes.Soft(), css=".gradio-container {max-width: 1200px; margin: auto;}" ) as demo: gr.Markdown(""" # 🌍 English to Azerbaijani Translator This translator uses a fine-tuned model specifically trained for English to Azerbaijani translation. **Model Details:** - Fine-tuned on: Kartal-Ol/en-azb-548k dataset (500K+ translation pairs) - Training Loss: ~0.177 (excellent quality) - Evaluation Loss: ~0.423 (good generalization) **How to use:** Simply type or paste English text below and click "Translate"! """) with gr.Row(): with gr.Column(scale=1): input_text = gr.Textbox( label="🇬🇧 English Text", placeholder="Enter English text to translate to Azerbaijani...", lines=5, max_lines=15 ) with gr.Row(): translate_btn = gr.Button("🔄 Translate", variant="primary", size="lg") clear_btn = gr.Button("🗑️ Clear", size="sm") with gr.Column(scale=1): output_text = gr.Textbox( label="🇦🇿 Azerbaijani Translation", lines=5, max_lines=15, interactive=False, show_copy_button=True ) with gr.Accordion("⚙️ Advanced Options", open=False): with gr.Row(): max_length = gr.Slider( minimum=50, maximum=512, value=256, step=10, label="Max Translation Length", info="Maximum number of tokens in the output" ) num_beams = gr.Slider( minimum=1, maximum=10, value=5, step=1, label="Number of Beams", info="Higher values = better quality but slower (recommended: 3-5)" ) with gr.Row(): gr.Examples( examples=examples, inputs=input_text, outputs=output_text, fn=translate_text, cache_examples=False, label="📝 Example Sentences (click to try)" ) gr.Markdown(""" --- ### 📊 Model Performance - **Speed**: ~89 translations per second - **Quality**: Professional-grade translations - **Languages**: English → Azerbaijani (azb_Arab script) - **Dataset**: 500,000+ translation pairs ### 🔗 Links - [Github](https://github.com/tayden1990) - [Training Dataset](https://huggingface.co/datasets/Kartal-Ol/en-azb-548k) **Made with ❤️ using HuggingFace Transformers and Gradio** """) # Event handlers translate_btn.click( fn=translate_with_options, inputs=[input_text, max_length, num_beams], outputs=output_text ) clear_btn.click( fn=lambda: ("", ""), outputs=[input_text, output_text] ) input_text.submit( fn=translate_with_options, inputs=[input_text, max_length, num_beams], outputs=output_text ) # ------------------------- # API: Gradio + optional REST # ------------------------- # Minimal REST API for external clients (Telegram bot, etc.) # Two options: # 1) Use Gradio's built-in /api/predict (enabled below with show_api=True) # 2) Use REST endpoints via FastAPI by setting USE_FASTAPI=1 if USE_FASTAPI: fastapi_app = FastAPI(title="EN→AZB Translator API") @fastapi_app.post("/api/predict") async def api_predict(payload: dict): """ Compatible with Gradio client format. Request: {"data": [text, max_length, num_beams]} Response: {"data": [translation]} """ try: data = payload.get("data", []) text = data[0] if len(data) > 0 else "" max_len = int(data[1]) if len(data) > 1 else 256 beams = int(data[2]) if len(data) > 2 else 5 result = translate_text(text, max_length=max_len, num_beams=beams) return JSONResponse({"data": [result]}) except Exception as e: return JSONResponse({"error": str(e)}, status_code=400) @fastapi_app.post("/translate") async def api_translate(payload: dict): """ Simple JSON API. Request: {"text": "...", "max_length": 256, "num_beams": 5} Response: {"translation": "...", "model": "..."} """ text = payload.get("text", "") max_len = int(payload.get("max_length", 256)) beams = int(payload.get("num_beams", 5)) result = translate_text(text, max_length=max_len, num_beams=beams) return JSONResponse({"translation": result, "model": current_model_name or "unknown"}) @fastapi_app.get("/health") async def health(): return JSONResponse({"status": "ok", "model": current_model_name or "unloaded"}) # Mount Gradio UI at root "/" app = gr.mount_gradio_app(fastapi_app, demo, path="/") if __name__ == "__main__": # EITHER serve as pure Gradio app (default, recommended for HuggingFace Spaces) if not USE_FASTAPI: print("🚀 Starting English to Azerbaijani Translator (Gradio)...") print("🔍 Checking available models...") print(f"🔐 Token status: {'✅ Found' if HF_TOKEN else '❌ Missing'}") load_model() # Enable API docs for /api/predict demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True, show_api=True # <— exposes /api/predict for clients ) else: # OR serve as FastAPI+Gradio (set USE_FASTAPI=1) print("🚀 Starting English to Azerbaijani Translator (FastAPI + Gradio)...") print("🔍 Checking available models...") print(f"🔐 Token status: {'✅ Found' if HF_TOKEN else '❌ Missing'}") load_model() uvicorn.run(app, host="0.0.0.0", port=7860)