import streamlit as st
from transformers import pipeline
import torch
import tempfile
from pydub import AudioSegment
import math

st.set_page_config(page_title="Urdu Speech-to-Text", layout="centered")
st.title("🎙️ Urdu Speech-to-Text (Whisper Turbo Urdu)")

# --------------------------
# GPU / CPU detection
# --------------------------
def get_device():
    if torch.cuda.is_available():
        st.success("GPU active ✓ (Fast Mode)")
        return 0
    else:
        st.warning("GPU not available – switching to CPU (slow mode)")
        return -1

# --------------------------
# Load ASR Model
# --------------------------
@st.cache_resource
def load_asr(device):
    return pipeline(
        task="automatic-speech-recognition",
        model="kingabzpro/whisper-large-v3-turbo-urdu",
        return_timestamps=True,
        chunk_length_s=30,
        stride_length_s=5,
        torch_dtype=torch.float16 if device == 0 else torch.float32,
        device=device
    )

# --------------------------
# Chunk Transcription
# --------------------------
def transcribe_in_chunks(asr, audio_path):
    audio = AudioSegment.from_file(audio_path)

    total_ms = len(audio)              # audio duration in milliseconds
    chunk_ms = 30 * 1000               # 30 sec chunks
    total_chunks = math.ceil(total_ms / chunk_ms)

    st.info(f"⏳ Estimated chunks: {total_chunks}")

    progress = st.progress(0)
    full_text = ""

    for i in range(total_chunks):
        start = i * chunk_ms
        end = min((i+1) * chunk_ms, total_ms)

        chunk = audio[start:end]

        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
            chunk.export(tmp.name, format="wav")
            chunk_path = tmp.name

        result = asr(
            chunk_path,
            return_timestamps=True,
            chunk_length_s=30,
            stride_length_s=5
        )

        full_text += result["text"] + " "

        progress.progress((i + 1) / total_chunks)

    return full_text.strip()

# --------------------------
# APP UI
# --------------------------
uploaded_file = st.file_uploader("Upload audio", type=["mp3","wav","m4a","ogg"])

if uploaded_file:
    with tempfile.NamedTemporaryFile(delete=False, suffix=uploaded_file.name) as tmp:
        tmp.write(uploaded_file.read())
        audio_path = tmp.name

    st.success("✔ Audio uploaded")

    device = get_device()
    asr = load_asr(device)

    st.info("⏳ Transcribing audio…")

    transcript = transcribe_in_chunks(asr, audio_path)

    st.subheader("📝 Urdu Transcription")
    st.write(transcript)

    st.download_button("Download Text", transcript, "urdu_transcription.txt")