Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,16 +1,3 @@
|
|
| 1 |
-
import os
|
| 2 |
-
os.system("pip install streamlit pandas xlsxwriter openpyxl matplotlib seaborn")
|
| 3 |
-
|
| 4 |
-
import streamlit as st
|
| 5 |
-
import pandas as pd
|
| 6 |
-
import xlsxwriter
|
| 7 |
-
from io import BytesIO
|
| 8 |
-
from collections import Counter
|
| 9 |
-
import matplotlib.pyplot as plt
|
| 10 |
-
import seaborn as sns
|
| 11 |
-
# For pie chart
|
| 12 |
-
# π COMBINED STREAMLIT PROTEIN ANALYSIS TOOL WITH COLORED COMPARISON
|
| 13 |
-
|
| 14 |
import os
|
| 15 |
os.system("pip install streamlit pandas xlsxwriter openpyxl pymongo")
|
| 16 |
|
|
@@ -20,6 +7,8 @@ import xlsxwriter
|
|
| 20 |
from io import BytesIO
|
| 21 |
from collections import defaultdict
|
| 22 |
import hashlib
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# MongoDB Setup
|
| 25 |
try:
|
|
@@ -190,7 +179,6 @@ st.title("𧬠Protein Analysis Toolkit")
|
|
| 190 |
|
| 191 |
app_choice = st.radio("Choose an option", ["π Protein Repeat Finder", "π Protein Comparator", "π§ͺ Amino Acid Percentage Analyzer"])
|
| 192 |
|
| 193 |
-
|
| 194 |
if app_choice == "π Protein Repeat Finder":
|
| 195 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
| 196 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|
|
@@ -224,10 +212,11 @@ if app_choice == "π Protein Repeat Finder":
|
|
| 224 |
st.download_button(
|
| 225 |
label="Download Excel file",
|
| 226 |
data=st.session_state.excel_file,
|
| 227 |
-
file_name="
|
| 228 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 229 |
)
|
| 230 |
|
|
|
|
| 231 |
if st.checkbox("Show Results Table"):
|
| 232 |
rows = []
|
| 233 |
for file_index, file_data in enumerate(st.session_state.all_sequences_data):
|
|
@@ -238,29 +227,29 @@ if app_choice == "π Protein Repeat Finder":
|
|
| 238 |
rows.append(row)
|
| 239 |
result_df = pd.DataFrame(rows)
|
| 240 |
st.dataframe(result_df)
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
|
| 265 |
|
| 266 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
os.system("pip install streamlit pandas xlsxwriter openpyxl pymongo")
|
| 3 |
|
|
|
|
| 7 |
from io import BytesIO
|
| 8 |
from collections import defaultdict
|
| 9 |
import hashlib
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
import seaborn as sns
|
| 12 |
|
| 13 |
# MongoDB Setup
|
| 14 |
try:
|
|
|
|
| 179 |
|
| 180 |
app_choice = st.radio("Choose an option", ["π Protein Repeat Finder", "π Protein Comparator", "π§ͺ Amino Acid Percentage Analyzer"])
|
| 181 |
|
|
|
|
| 182 |
if app_choice == "π Protein Repeat Finder":
|
| 183 |
analysis_type = st.radio("Select analysis type:", ["Homo", "Hetero", "Both"], index=2)
|
| 184 |
uploaded_files = st.file_uploader("Upload Excel files", accept_multiple_files=True, type=["xlsx"])
|
|
|
|
| 212 |
st.download_button(
|
| 213 |
label="Download Excel file",
|
| 214 |
data=st.session_state.excel_file,
|
| 215 |
+
file_name="Protein_Repeats_Analysis.xlsx",
|
| 216 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
| 217 |
)
|
| 218 |
|
| 219 |
+
# Display results table and repeat cluster visualization
|
| 220 |
if st.checkbox("Show Results Table"):
|
| 221 |
rows = []
|
| 222 |
for file_index, file_data in enumerate(st.session_state.all_sequences_data):
|
|
|
|
| 227 |
rows.append(row)
|
| 228 |
result_df = pd.DataFrame(rows)
|
| 229 |
st.dataframe(result_df)
|
| 230 |
+
|
| 231 |
+
# Repeat Cluster Visualization
|
| 232 |
+
repeat_counts = defaultdict(int)
|
| 233 |
+
for seq_data in st.session_state.all_sequences_data:
|
| 234 |
+
for _, _, freq_dict in seq_data:
|
| 235 |
+
for repeat, count in freq_dict.items():
|
| 236 |
+
repeat_counts[repeat] += count
|
| 237 |
+
|
| 238 |
+
if repeat_counts:
|
| 239 |
+
sorted_repeats = sorted(repeat_counts.items(), key=lambda x: x[1], reverse=True)
|
| 240 |
+
top_n = st.slider("Select number of top repeats to visualize", min_value=5, max_value=50, value=20)
|
| 241 |
+
top_repeats = sorted_repeats[:top_n]
|
| 242 |
+
repeats, counts = zip(*top_repeats)
|
| 243 |
+
|
| 244 |
+
plt.figure(figsize=(12, 6))
|
| 245 |
+
sns.barplot(x=list(repeats), y=list(counts), palette="viridis")
|
| 246 |
+
plt.xticks(rotation=45, ha='right')
|
| 247 |
+
plt.xlabel("Repeats")
|
| 248 |
+
plt.ylabel("Total Frequency")
|
| 249 |
+
plt.title("Top Repeat Clusters Across All Sequences")
|
| 250 |
+
st.pyplot(plt.gcf())
|
| 251 |
+
else:
|
| 252 |
+
st.warning("No repeat data available to visualize. Please upload files first.")
|
| 253 |
|
| 254 |
|
| 255 |
|