Spaces:

ummtushar
/

Audio2Text

Sleeping

App Files Files Community

ummtushar commited on Feb 6, 2025

Commit

3a5f6fa

verified ·

1 Parent(s): 437996a

Create app.py

Browse files

Files changed (1) hide show

app.py +147 -0

app.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import streamlit as st
+import zipfile
+import os
+import tempfile
+import whisper
+from pathlib import Path
+def process_pptx(uploaded_file):
+    # Create temporary file to save the uploaded file
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx:
+        tmp_pptx.write(uploaded_file.getvalue())
+        pptx_path = tmp_pptx.name
+    # Convert PPTX path to ZIP path
+    zip_path = os.path.splitext(pptx_path)[0] + '.zip'
+    os.rename(pptx_path, zip_path)
+    # Create dictionary to store audio files
+    audio_files = {}
+    # Create temporary directory for extraction
+    temp_dir = tempfile.mkdtemp()
+    with st.spinner('Extracting PPTX contents...'):
+        # Extract the zip file to temp directory
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(temp_dir)
+    # Path to media folder
+    media_path = os.path.join(temp_dir, 'ppt', 'media')
+    # Check if media folder exists
+    if os.path.exists(media_path):
+        # Create temporary directory for converted files
+        temp_audio_dir = tempfile.mkdtemp()
+        # Progress bar for audio conversion
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        # First count total slides with audio
+        total_slides = 0
+        slide_num = 1
+        while True:
+            found = False
+            for ext in ['.mp4', '.m4a']:
+                if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')):
+                    total_slides += 1
+                    found = True
+                    break
+            if not found:
+                break
+            slide_num += 1
+        # Process audio files
+        slide_num = 1
+        processed_slides = 0
+        while True:
+            # Check for either .mp4 or .m4a file for current slide
+            media_file = None
+            for ext in ['.mp4', '.m4a']:
+                filename = f'media{slide_num}{ext}'
+                file_path = os.path.join(media_path, filename)
+                if os.path.exists(file_path):
+                    media_file = file_path
+                    break
+            if not media_file:
+                break
+            # Create temporary mp3 file
+            temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')
+            try:
+                status_text.text(f'Converting audio from slide {slide_num}...')
+                # Convert to mp3 using ffmpeg
+                os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
+                # Store the temp mp3 file path in dictionary
+                audio_files[slide_num-1] = temp_mp3
+                processed_slides += 1
+                progress_bar.progress(processed_slides / total_slides)
+            except Exception as e:
+                st.error(f"Error converting slide {slide_num}: {str(e)}")
+            slide_num += 1
+        progress_bar.empty()
+        status_text.empty()
+        # Load Whisper model
+        with st.spinner('Loading Whisper model...'):
+            model = whisper.load_model("base")
+        # Dictionary to store transcriptions by slide number
+        slide_transcripts = {}
+        # Progress bar for transcription
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        # Transcribe each audio file
+        for idx, (slide_num, audio_file) in enumerate(audio_files.items()):
+            status_text.text(f'Transcribing slide {slide_num + 1}...')
+            # Transcribe the audio file
+            result = model.transcribe(audio_file)
+            # Store transcription text for this slide
+            slide_transcripts[slide_num + 1] = result["text"]
+            progress_bar.progress((idx + 1) / len(audio_files))
+        progress_bar.empty()
+        status_text.empty()
+        # Clean up temporary files
+        os.unlink(zip_path)
+        return slide_transcripts
+    return None
+def main():
+    st.title('Audio2Text')
+    st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content')
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx")
+    if uploaded_file is not None:
+        # Check file size (2GB limit)
+        if uploaded_file.size > 2 * 1024 * 1024 * 1024:
+            st.error("File size exceeds 2GB limit")
+            return
+        st.write("Processing... This may take a while depending on the number and length of audio clips.")
+        # Process the file
+        transcripts = process_pptx(uploaded_file)
+        if transcripts:
+            st.subheader("Transcription Results")
+            for slide_num, text in sorted(transcripts.items()):
+                st.markdown(f"**Slide {slide_num}**")
+                st.write(text)
+                st.markdown("---")
+        else:
+            st.warning("No audio content found in the PowerPoint file.")
+if __name__ == "__main__":
+    main()