Spaces:

Jayesh13
/

BTP_Phase2

Build error

App Files Files Community

Jayesh13 commited on Apr 14

Commit

3f486f6

verified ·

1 Parent(s): c1cef30

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+os.system("pip install streamlit pandas xlsxwriter openpyxl")
+import streamlit as st
+import pandas as pd
+import xlsxwriter
+from io import BytesIO
+from collections import Counter
+# Set of 20 standard amino acids
+AMINO_ACIDS = set("ACDEFGHIKLMNPQRSTVWY")
+st.set_page_config(page_title="Amino Acid Percentage Tool", layout="wide")
+st.title("🧬 Amino Acid Percentage Analyzer")
+uploaded_file = st.file_uploader("Upload Excel file (with Entry, Protein Name, Sequence)", type=["xlsx"])
+if uploaded_file and st.button("Analyze File"):
+    df = pd.read_excel(uploaded_file)
+    if len(df.columns) < 3:
+        st.error("The file must have at least three columns: Entry, Protein Name, Sequence")
+    else:
+        entry_col = df.columns[0]
+        name_col = df.columns[1]
+        seq_col = df.columns[2]
+        all_counts = Counter()
+        all_length = 0
+        result_rows = []
+        for _, row in df.iterrows():
+            entry = str(row[entry_col])
+            name = str(row[name_col])
+            sequence = str(row[seq_col]).replace(" ", "").replace("\"", "").strip().upper()
+            sequence = ''.join(filter(lambda c: c in AMINO_ACIDS, sequence))
+            length = len(sequence)
+            if length == 0:
+                continue
+            count = Counter(sequence)
+            all_counts.update(count)
+            all_length += length
+            percentage = {aa: round(count[aa] / length * 100, 2) for aa in AMINO_ACIDS}
+            result_rows.append({"Entry": entry, "Protein Name": name, **percentage})
+        df_result = pd.DataFrame(result_rows)
+        # Calculate overall percentage
+        overall_percentage = {aa: round(all_counts[aa] / all_length * 100, 2) for aa in AMINO_ACIDS}
+        overall_row = {"Entry": "OVERALL", "Protein Name": "ALL SEQUENCES", **overall_percentage}
+        df_result = pd.concat([df_result, pd.DataFrame([overall_row])], ignore_index=True)
+        st.dataframe(df_result)
+        # Export to Excel
+        def to_excel(df):
+            output = BytesIO()
+            workbook = xlsxwriter.Workbook(output, {'in_memory': True})
+            worksheet = workbook.add_worksheet("Amino Acid %")
+            header_format = workbook.add_format({'bold': True, 'bg_color': '#CDEDF6'})
+            for col_num, col_name in enumerate(df.columns):
+                worksheet.write(0, col_num, col_name, header_format)
+            for row_num, row in enumerate(df.itertuples(index=False), start=1):
+                for col_num, value in enumerate(row):
+                    worksheet.write(row_num, col_num, value)
+            workbook.close()
+            output.seek(0)
+            return output
+        excel_file = to_excel(df_result)
+        st.download_button(
+            label="Download Excel Report",
+            data=excel_file,
+            file_name="amino_acid_percentage.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+        )