Spaces:
Sleeping
Sleeping
| """ | |
| Simplified Groq ASR Service using HTTP requests | |
| Works around Python client compatibility issues while providing superior transcription | |
| """ | |
| import asyncio | |
| import logging | |
| import tempfile | |
| import os | |
| import aiohttp | |
| import base64 | |
| from typing import Optional, Dict, Any | |
| from pathlib import Path | |
| from config import ( | |
| ENABLE_VOICE_FEATURES, TTS_PROVIDER, | |
| VOICE_LANGUAGE, DEFAULT_VOICE_SPEED, GROQ_API_KEY | |
| ) | |
| logger = logging.getLogger("voicebot") | |
| class SimpleGroqASRService: | |
| def __init__(self): | |
| self.voice_enabled = ENABLE_VOICE_FEATURES | |
| self.tts_provider = TTS_PROVIDER | |
| self.asr_provider = "groq" | |
| self.language = VOICE_LANGUAGE | |
| self.voice_speed = DEFAULT_VOICE_SPEED | |
| self.groq_api_key = GROQ_API_KEY | |
| # Groq API endpoint | |
| self.groq_audio_url = "https://api.groq.com/openai/v1/audio/transcriptions" | |
| if self.groq_api_key: | |
| logger.info("β Simple Groq ASR service initialized") | |
| self.asr_available = True | |
| else: | |
| logger.error("β GROQ_API_KEY not found") | |
| self.asr_available = False | |
| # Initialize TTS service | |
| if self.voice_enabled: | |
| self._init_tts_service() | |
| logger.info(f"π€ Simple Groq ASR Service ready - ASR: Groq HTTP, TTS: {self.tts_provider}") | |
| def _init_tts_service(self): | |
| """Initialize Text-to-Speech service""" | |
| try: | |
| if self.tts_provider == "edge-tts": | |
| import edge_tts | |
| self.tts_available = True | |
| logger.info("β Edge TTS initialized") | |
| elif self.tts_provider == "murf": | |
| self.tts_available = True | |
| logger.info("β Murf AI TTS initialized") | |
| else: | |
| self.tts_available = False | |
| logger.warning(f"β οΈ Unknown TTS provider: {self.tts_provider}") | |
| except ImportError as e: | |
| self.tts_available = False | |
| logger.warning(f"β οΈ TTS dependencies not available: {e}") | |
| def _get_default_voice(self) -> str: | |
| """Get default voice based on language setting""" | |
| language_voices = { | |
| 'hi-IN': 'hi-IN-SwaraNeural', | |
| 'en-IN': 'en-IN-NeerjaNeural', | |
| 'en-US': 'en-US-AriaNeural', | |
| } | |
| return language_voices.get(self.language, 'en-US-AriaNeural') | |
| async def groq_asr_bytes(self, audio_bytes: bytes, user_language: str = None) -> Optional[str]: | |
| """ | |
| Transcribe audio using Groq API with HTTP requests | |
| Superior accuracy compared to local Whisper | |
| """ | |
| if not self.asr_available: | |
| logger.error("β Groq ASR not available - missing API key") | |
| return None | |
| try: | |
| # Create temporary file for API upload | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
| temp_file.write(audio_bytes) | |
| temp_file_path = temp_file.name | |
| try: | |
| # Prepare form data for Groq API | |
| headers = { | |
| "Authorization": f"Bearer {self.groq_api_key}" | |
| } | |
| language_code = self._get_groq_language_code(user_language) | |
| async with aiohttp.ClientSession() as session: | |
| with open(temp_file_path, 'rb') as audio_file: | |
| form_data = aiohttp.FormData() | |
| form_data.add_field('file', audio_file, filename='audio.wav', content_type='audio/wav') | |
| form_data.add_field('model', 'whisper-large-v3') | |
| form_data.add_field('language', language_code) | |
| form_data.add_field('temperature', '0.0') | |
| form_data.add_field('response_format', 'json') | |
| logger.info(f"π€ Sending audio to Groq ASR (language: {language_code})") | |
| async with session.post(self.groq_audio_url, headers=headers, data=form_data) as response: | |
| if response.status == 200: | |
| result = await response.json() | |
| transcribed_text = result.get('text', '').strip() | |
| logger.info(f"β Groq ASR result: '{transcribed_text}'") | |
| return transcribed_text | |
| else: | |
| error_text = await response.text() | |
| logger.error(f"β Groq API error {response.status}: {error_text}") | |
| return None | |
| finally: | |
| # Clean up temp file | |
| try: | |
| os.unlink(temp_file_path) | |
| except Exception as e: | |
| logger.warning(f"β οΈ Failed to cleanup temp file: {e}") | |
| except Exception as e: | |
| logger.error(f"β Groq ASR error: {e}") | |
| return None | |
| def _get_groq_language_code(self, user_language: str = None) -> str: | |
| """Convert user language to Groq language code""" | |
| if not user_language: | |
| return self.language.split('-')[0] if self.language else 'en' | |
| user_lang_lower = user_language.lower() | |
| language_mapping = { | |
| 'english': 'en', | |
| 'hindi': 'hi', | |
| 'hinglish': 'hi', | |
| 'en': 'en', | |
| 'hi': 'hi', | |
| 'en-in': 'en', | |
| 'hi-in': 'hi', | |
| 'en-us': 'en' | |
| } | |
| if '-' in user_lang_lower: | |
| base_lang = user_lang_lower.split('-')[0] | |
| return language_mapping.get(base_lang, 'en') | |
| return language_mapping.get(user_lang_lower, 'en') | |
| async def text_to_speech(self, text: str, voice: str = None) -> Optional[bytes]: | |
| """Convert text to speech audio""" | |
| if not self.voice_enabled or not self.tts_available: | |
| return None | |
| if voice is None: | |
| voice = self._get_default_voice() | |
| try: | |
| if self.tts_provider == "edge-tts": | |
| import edge_tts | |
| communicate = edge_tts.Communicate(text, voice, rate=f"{int((self.voice_speed - 1) * 100):+d}%") | |
| audio_data = b"" | |
| async for chunk in communicate.stream(): | |
| if chunk["type"] == "audio": | |
| audio_data += chunk["data"] | |
| return audio_data | |
| elif self.tts_provider == "murf": | |
| return await self._murf_tts(text, voice) | |
| except Exception as e: | |
| logger.error(f"β TTS Error: {e}") | |
| return None | |
| async def _murf_tts(self, text: str, voice: str = None) -> Optional[bytes]: | |
| """Murf TTS implementation""" | |
| murf_api_key = os.environ.get("MURF_API_KEY") | |
| if not murf_api_key: | |
| return None | |
| murf_url = "https://api.murf.ai/v1/speech/generate" | |
| payload = { | |
| "text": text, | |
| "voice": voice or "en-US-1", | |
| "format": "mp3" | |
| } | |
| headers = { | |
| "Authorization": f"Bearer {murf_api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| try: | |
| async with aiohttp.ClientSession() as session: | |
| async with session.post(murf_url, json=payload, headers=headers) as resp: | |
| if resp.status == 200: | |
| result = await resp.json() | |
| audio_url = result.get("audio_url") | |
| if audio_url: | |
| async with session.get(audio_url) as audio_resp: | |
| if audio_resp.status == 200: | |
| return await audio_resp.read() | |
| except Exception as e: | |
| logger.error(f"β Murf TTS error: {e}") | |
| return None | |
| async def speech_to_text(self, audio_file_path: str, user_language: str = None) -> Optional[str]: | |
| """Convert speech file to text using Groq ASR""" | |
| if not self.voice_enabled or not self.asr_available: | |
| return None | |
| try: | |
| with open(audio_file_path, 'rb') as audio_file: | |
| audio_bytes = audio_file.read() | |
| return await self.groq_asr_bytes(audio_bytes, user_language) | |
| except Exception as e: | |
| logger.error(f"β Speech to text error: {e}") | |
| return None | |
| def get_voice_status(self) -> Dict[str, Any]: | |
| """Get current voice service status""" | |
| return { | |
| "voice_enabled": self.voice_enabled, | |
| "tts_available": getattr(self, 'tts_available', False), | |
| "asr_available": self.asr_available, | |
| "tts_provider": self.tts_provider, | |
| "asr_provider": "groq-http", | |
| "language": self.language, | |
| "voice_speed": self.voice_speed, | |
| "groq_available": bool(self.groq_api_key) | |
| } | |
| def is_voice_enabled(self) -> bool: | |
| """Check if voice features are enabled""" | |
| return self.voice_enabled | |
| # Global instance | |
| simple_groq_asr_service = SimpleGroqASRService() |