import gradio as gr import numpy as np import scipy.io.wavfile as wavfile import soundfile as sf import time import uuid import os import csv from supabase import create_client # Supabase url = "https://eecucubpmvpjkhqletul.supabase.co" key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImVlY3VjdWJwbXZwamtocWxldHVsIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDQ3MTc5MzUsImV4cCI6MjA2MDI5MzkzNX0.Av-ZEQ2xczudhm2c8p1JioXRXQCf4s0m4X_w5jrkf-8" # Cẩn thận không chia sẻ publicly! DB = create_client(supabase_url=url, supabase_key=key) def local_storage_set(key, value): value = json.dumps(value, ensure_ascii=False) # Convert to JSON string st_javascript(f"localStorage.setItem('{key}', {value});") # Function to get a value from localStorage def local_storage_get(key): return st_javascript(f"localStorage.getItem('{key}');") def get_transcripts(path: str): samples = [] with open(path, newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) next(reader) for row in reader: samples.append(f"{row[0]} - {row[1]}") return samples def get_provinces(path): provinces = [] with open(path, newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) next(reader) for row in reader: provinces.append(f"{row[0]}") return provinces def save_data(audio, transcript, age, gender, region): if age < 1: return "⚠️ Bạn cần nhập đúng số tuổi." if not transcript or not gender or not region.strip(): return "⚠️ Bạn cần nhập đầy đủ các thông tin: Câu thoại, Giới tính và Tỉnh/TP." if audio is None: return "⚠️ Vui lòng ghi âm trước khi lưu." session_id = str(uuid.uuid4()) audio_array = np.frombuffer(audio[-1], dtype=np.int16) filename = f"recorded_audio_{int(time.time())}.wav" wavfile.write(filename, 44100, audio_array) DB.storage.from_("cs-bucket").upload(file=filename, path=f"upload/{filename}", file_options={"content-type": "audio/wav"}) url = DB.storage.from_("cs-bucket").get_public_url(path=f"upload/{filename}") word = transcript.split("-")[0] transcript_text = transcript.split("-")[1] DB.table("cs-data").insert({ "user_id": session_id, "audio_url": url, "word": word, "transcript_text": transcript_text, "age": age, "gender": gender, "region": region.strip() }).execute() if os.path.exists(filename): os.remove(filename) return "✅ Dữ liệu đã được lưu thành công. Cảm ơn bạn!" def on_submit(audio, sentence_selected, age, gender, region): full_transcript = sentence_to_full.get(sentence_selected, None) if full_transcript is None: return "⚠️ Lỗi: Không tìm thấy câu thoại tương ứng." return save_data(audio, full_transcript, age, gender, region) transcripts = get_transcripts("scripts.csv") provinces = get_provinces("vietnam_provinces.csv") sentence = [] sentence_to_full = {trans.split(" - ")[-1].strip(): trans for trans in transcripts} with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("""
ability → ờ bi li ti