Spaces:
Sleeping
Sleeping
| from transformers import pipeline, WhisperModel | |
| import gradio as gr | |
| import pandas as pd | |
| import string | |
| pipe = pipeline(model="matteocirca/whisper-small-it-2",return_timestamps="word") | |
| current_audio = None | |
| segments = {} | |
| def audio2segments(audio,word): | |
| global segments,current_audio | |
| if audio != current_audio or current_audio == None: | |
| segments = pipe(audio) | |
| current_audio = audio | |
| if not word: | |
| if current_audio != None: | |
| return segments["text"],"<html><h1>No Word inserted!</h1></html>" | |
| else: | |
| return "","<html><h1>No Word inserted!</h1></html>" | |
| df = pd.DataFrame(columns=["Occurrence n","Starting TimeStamp","Ending TimeStamp"]) | |
| if word: | |
| ranges_list = [] | |
| ranges = [] | |
| print(segments) | |
| for w in segments['chunks']: | |
| if word == w["text"].translate(str.maketrans('', '', string.punctuation)).replace(" ","").lower() : | |
| ranges_list.append(w["timestamp"]) | |
| res = "<table><thead><tr><th>Occurrence n°</th><th>Start</th><th>End</th></tr></thead><tbody>" | |
| for i,r in enumerate(ranges_list): | |
| # ranges_list.append({"Occurrence n":i,"Starting TimeStamp":r[0],"Ending TimeStamp":r[1]}) | |
| res += f"<tr><td>{i}</td><td>{r[0]}</td><td>{r[1]}</td></tr>" | |
| res+=" </tbody></table>" | |
| print(res) | |
| return segments["text"],res | |
| def clear(): | |
| segments = {} | |
| iface = gr.Interface( | |
| fn=audio2segments, | |
| inputs=[gr.Audio(sources=["upload","microphone"], type="filepath"),"text"], | |
| outputs=["text","html"], | |
| title="Whisper Small Italian", | |
| description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.", | |
| ) | |
| iface.launch() | |