Spaces:

solewarrior
/

AIHelp

Running

App Files Files Community

solewarrior commited on Feb 15

Commit

c279525

verified ·

1 Parent(s): f567e4e

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -69

app.py CHANGED Viewed

@@ -2,9 +2,8 @@ import streamlit as st
 from transformers import pipeline
 import PyPDF2
 import docx
-import textwrap
-# Streamlit Page Config
 st.set_page_config(
     page_title="TextSphere",
     page_icon="🤖",
@@ -12,7 +11,6 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# Footer
 st.markdown("""
     <style>
         .footer {
@@ -30,106 +28,98 @@ st.markdown("""
     </div>
 """, unsafe_allow_html=True)
-# Load Model
 @st.cache_resource
 def load_models():
     try:
-        summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
     except Exception as e:
-        raise RuntimeError(f"Failed to load model: {str(e)}")
-    return summarization_model
-summarization_model = load_models()
-# Function to Extract Text from PDF
-def extract_text_from_pdf(uploaded_pdf):
     try:
-        pdf_reader = PyPDF2.PdfReader(uploaded_pdf)
-        pdf_text = ""
         for page in pdf_reader.pages:
-            text = page.extract_text()
-            if text:
-                pdf_text += text + "\n"
-        if not pdf_text.strip():
-            st.error("No text found in the PDF.")
-            return None
-        return pdf_text
     except Exception as e:
         st.error(f"Error reading the PDF: {e}")
         return None
-# Function to Extract Text from TXT
-def extract_text_from_txt(uploaded_txt):
     try:
-        return uploaded_txt.read().decode("utf-8").strip()
     except Exception as e:
-        st.error(f"Error reading the TXT file: {e}")
         return None
-# Function to Extract Text from DOCX
-def extract_text_from_docx(uploaded_docx):
     try:
-        doc = docx.Document(uploaded_docx)
-        return "\n".join([para.text for para in doc.paragraphs]).strip()
     except Exception as e:
-        st.error(f"Error reading the DOCX file: {e}")
         return None
-# Function to Split Text into 1024-Token Chunks
-def chunk_text(text, max_tokens=1024):
-    return textwrap.wrap(text, width=max_tokens)
-# Sidebar for Task Selection (Default: Text Summarization)
 st.sidebar.title("AI Solutions")
 option = st.sidebar.selectbox(
     "Choose a task",
     ["Text Summarization", "Question Answering", "Text Classification", "Language Translation"],
-    index=0  # Default to "Text Summarization"
 )
-# Text Summarization Task
 if option == "Text Summarization":
-    st.title("📄 Text Summarization")
-    st.markdown("<h4 style='font-size: 20px;'>- because who needs to read the whole document? 🥵</h4>", unsafe_allow_html=True)
-    uploaded_file = st.file_uploader(
-        "Upload a document (PDF, TXT, DOCX) - *Note: Processes only 1024 tokens per chunk*",
-        type=["pdf", "txt", "docx"]
-    )
-    text_to_summarize = ""
     if uploaded_file:
         file_type = uploaded_file.name.split(".")[-1].lower()
-        if file_type == "pdf":
-            text_to_summarize = extract_text_from_pdf(uploaded_file)
-        elif file_type == "txt":
-            text_to_summarize = extract_text_from_txt(uploaded_file)
-        elif file_type == "docx":
-            text_to_summarize = extract_text_from_docx(uploaded_file)
-        else:
-            st.error("Unsupported file format.")
     if st.button("Summarize"):
-        with st.spinner('Summarizing...'):
             try:
                 if text_to_summarize:
-                    chunks = chunk_text(text_to_summarize, max_tokens=1024)
-                    summaries = []
-                    for chunk in chunks:
-                        input_length = len(chunk.split())  # Count words in the chunk
-                        max_summary_length = max(50, input_length // 2)  # Dynamically adjust max_length
-                        summary = summarization_model(chunk, max_length=max_summary_length, min_length=50, do_sample=False)
-                        summaries.append(summary[0]['summary_text'])
-                    final_summary = " ".join(summaries)  # Combine all chunk summaries
-                    st.write("### Summary:")
-                    st.write(final_summary)
                 else:
-                    st.error("Please upload a document first.")
             except Exception as e:
-                st.error(f"Error: {e}")

 from transformers import pipeline
 import PyPDF2
 import docx
+from io import BytesIO
 st.set_page_config(
     page_title="TextSphere",
     page_icon="🤖",
     initial_sidebar_state="expanded"
 )
 st.markdown("""
     <style>
         .footer {
     </div>
 """, unsafe_allow_html=True)
 @st.cache_resource
 def load_models():
     try:
+        summarization_model = pipeline(
+            "summarization",
+            model="facebook/bart-large-cnn"
+        )
     except Exception as e:
+        raise RuntimeError(f"Failed to load models: {str(e)}")
+    return summarization_model
+def extract_text_from_pdf(uploaded_file):
     try:
+        pdf_reader = PyPDF2.PdfReader(uploaded_file)
+        text = ""
         for page in pdf_reader.pages:
+            text += page.extract_text() or ""  # Ensure we avoid NoneType issues
+        return text.strip()
     except Exception as e:
         st.error(f"Error reading the PDF: {e}")
         return None
+def extract_text_from_docx(uploaded_file):
     try:
+        doc = docx.Document(uploaded_file)
+        return "\n".join([para.text for para in doc.paragraphs])
     except Exception as e:
+        st.error(f"Error reading the DOCX: {e}")
         return None
+def extract_text_from_txt(uploaded_file):
     try:
+        return uploaded_file.read().decode("utf-8")
     except Exception as e:
+        st.error(f"Error reading the TXT file: {e}")
         return None
+def extract_text_from_file(uploaded_file, file_type):
+    if file_type == "pdf":
+        return extract_text_from_pdf(uploaded_file)
+    elif file_type == "docx":
+        return extract_text_from_docx(uploaded_file)
+    elif file_type == "txt":
+        return extract_text_from_txt(uploaded_file)
+    return None
+try:
+    summarization_model = load_models()
+except Exception as e:
+    st.error(f"An error occurred while loading models: {e}")
 st.sidebar.title("AI Solutions")
 option = st.sidebar.selectbox(
     "Choose a task",
     ["Text Summarization", "Question Answering", "Text Classification", "Language Translation"],
+    index=0  # Makes Text Summarization the default
 )
 if option == "Text Summarization":
+    st.title("Text Summarization")
+    st.markdown("<h4 style='font-size: 20px;'>- because who needs to read the whole document, anyway? 🥵</h4>", unsafe_allow_html=True)
+    uploaded_file = st.file_uploader("Upload a document (PDF, DOCX, TXT) [Limit: 1024 Tokens]", type=["pdf", "docx", "txt"])
+    text_to_summarize = st.text_area("Enter text to summarize (or leave empty if uploading a file):")
     if uploaded_file:
         file_type = uploaded_file.name.split(".")[-1].lower()
+        text_to_summarize = extract_text_from_file(uploaded_file, file_type)
     if st.button("Summarize"):
+        with st.spinner('Summarizing text...'):
             try:
                 if text_to_summarize:
+                    summary = summarization_model(text_to_summarize[:1024], max_length=300, min_length=50, do_sample=False)
+                    st.write("Summary:", summary[0]['summary_text'])
+                    st.balloons()
                 else:
+                    st.error("Please enter text or upload a document for summarization.")
             except Exception as e:
+                st.error(f"An error occurred: {e}")
+elif option == "Question Answering":
+    st.title("Question Answering")
+    st.write("Coming soon... 🚀")
+elif option == "Text Classification":
+    st.title("Text Classification")
+    st.write("Coming soon... 🚀")
+elif option == "Language Translation":
+    st.title("Language Translation")
+    st.write("Coming soon... 🚀")