Spaces:
Runtime error
Runtime error
| import os | |
| os.environ['HF_HOME'] = '/tmp' | |
| import time | |
| import streamlit as st | |
| import streamlit.components.v1 as components | |
| import pandas as pd | |
| import io | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| import re | |
| import string | |
| import json | |
| from itertools import cycle | |
| from io import BytesIO | |
| import plotly.io as pio | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.decomposition import LatentDirichletAllocation | |
| from gliner import GLiNER | |
| from streamlit_extras.stylable_container import stylable_container | |
| import time # Optional: for simulating database processing | |
| st.set_page_config( | |
| page_title="Premium Dashboard", | |
| layout="centered", | |
| initial_sidebar_state="collapsed", | |
| ) | |
| # --- Email Retrieval Logic (CRITICAL) --- | |
| # This is where the app reads the 'user_email' parameter from the URL | |
| query_params = st.query_params | |
| user_email = query_params.get("user_email") | |
| # ---------------------------------------- | |
| # --- Main Application Logic --- | |
| # ---------------------------------------- | |
| st.title("Premium Subscriber Dashboard") | |
| st.markdown("---") | |
| if user_email: | |
| # π STEP 1: Confirmation and Display | |
| st.balloons() | |
| st.success(f"Payment Confirmed! Welcome to Premium, **{user_email}**! You now have full access. ") | |
| st.header("Granting Access...") | |
| # π STEP 2: CRITICAL BACKEND PROCESSING | |
| # This is where your code would connect to your database (like Firestore) | |
| # and update the user's status to 'Premium'. | |
| with st.spinner(f"Processing subscription for {user_email}..."): | |
| # --- SIMULATED DATABASE LOGIC START --- | |
| time.sleep(2) # Simulate network delay/database write | |
| # In a real app, you would: | |
| # 1. Connect to Firestore. | |
| # 2. Query your users collection to find the user with this email. | |
| # 3. Update their document: { subscription_status: "active", start_date: current_date } | |
| # --- SIMULATED DATABASE LOGIC END --- | |
| st.info(f"β Your premium access is now permanently linked to **{user_email}**.") | |
| # π STEP 3: Display Premium Features | |
| st.markdown(""" | |
| ## π Your Exclusive Premium Features | |
| """) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric(label="Subscription Status", value="Active (Annual)") | |
| with col2: | |
| st.metric(label="Access Tier", value="Unlimited") | |
| st.button("Access Advanced Reports & Tools", type="primary") | |
| st.markdown("---") | |
| st.write("Enjoy your enhanced experience!") | |
| else: | |
| # β οΈ Case where the user arrives without the 'user_email' parameter | |
| st.error("Access Denied or Subscription Details Missing.") | |
| st.markdown(""" | |
| It looks like you arrived here without a confirmation link. If you have already paid: | |
| 1. Please check the email address you used for payment. | |
| 2. Contact support with your PayPal transaction ID for manual activation. | |
| If you have not paid, please return to the free app to upgrade. | |
| """) | |
| # --- Comet ML Imports (Optional/Placeholder) --- | |
| try: | |
| from comet_ml import Experiment | |
| except ImportError: | |
| class Experiment: | |
| def __init__(self, **kwargs): pass | |
| def log_parameter(self, *args): pass | |
| def log_table(self, *args): pass | |
| def end(self): pass | |
| # --- Fixed Label Definitions and Mappings --- | |
| FIXED_LABELS = ["person", "country", "city", "organization", "date", "time", "cardinal", "money", "position"] | |
| DEFAULT_CUSTOM_LABELS = "person, location, organization, product, date, time, event" # <-- REINSTATED | |
| FIXED_ENTITY_COLOR_MAP = { | |
| "person": "#10b981", # Green | |
| "country": "#3b82f6", # Blue | |
| "city": "#4ade80", # Light Green | |
| "organization": "#f59e0b", # Orange | |
| "date": "#8b5cf6", # Purple | |
| "time": "#ec4899", # Pink | |
| "cardinal": "#06b6d4", # Cyan | |
| "money": "#f43f5e", # Red | |
| "position": "#a855f7", # Violet | |
| } | |
| # --- Fixed Category Mapping --- | |
| FIXED_CATEGORY_MAPPING = { | |
| "People & Roles": ["person", "organization", "position"], | |
| "Locations": ["country", "city"], | |
| "Time & Dates": ["date", "time"], | |
| "Numbers & Finance": ["money", "cardinal"]} | |
| REVERSE_FIXED_CATEGORY_MAPPING = {label: category for category, label_list in FIXED_CATEGORY_MAPPING.items() for label in label_list} | |
| # --- Dynamic Color Generator for Custom Labels --- | |
| COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet + px.colors.qualitative.Bold) # Use a larger palette | |
| def extract_label(node_name): | |
| """Extracts the label from a node string like 'Text (Label)'.""" | |
| match = re.search(r'\(([^)]+)\)$', node_name) | |
| return match.group(1) if match else "Unknown" | |
| def remove_trailing_punctuation(text_string): | |
| """Removes trailing punctuation from a string.""" | |
| return text_string.rstrip(string.punctuation) | |
| def get_dynamic_color_map(active_labels, fixed_map): | |
| """Generates a color map, using fixed colors if available, otherwise dynamic colors.""" | |
| color_map = {} | |
| # If the active labels exactly match the fixed set, use the fixed map | |
| if set(active_labels) == set(fixed_map.keys()): | |
| return fixed_map | |
| # Otherwise, generate a dynamic map, prioritizing fixed colors | |
| # Ensure the color palette resets for consistency across sessions | |
| global COLOR_PALETTE | |
| COLOR_PALETTE = cycle(px.colors.qualitative.Alphabet + px.colors.qualitative.Bold) | |
| for label in active_labels: | |
| if label in fixed_map: | |
| color_map[label] = fixed_map[label] | |
| else: | |
| color_map[label] = next(COLOR_PALETTE) | |
| return color_map | |
| def highlight_entities(text, df_entities, entity_color_map): | |
| """Generates HTML to display text with entities highlighted and colored.""" | |
| if df_entities.empty: | |
| return text | |
| # Ensure the DataFrame has a unique index before sorting/converting | |
| df_entities = df_entities.copy().reset_index(drop=True) | |
| entities = df_entities.sort_values(by='start', ascending=False).to_dict('records') | |
| highlighted_text = text | |
| for entity in entities: | |
| start = max(0, entity['start']) | |
| end = min(len(text), entity['end']) | |
| entity_text_from_full_doc = text[start:end] | |
| label = entity['label'] | |
| color = entity_color_map.get(label, '#000000') | |
| highlight_html = f'<span style="background-color: {color}; color: white; padding: 2px 4px; border-radius: 3px; cursor: help;" title="{label}">{entity_text_from_full_doc}</span>' | |
| highlighted_text = highlighted_text[:start] + highlight_html + highlighted_text[end:] | |
| return f'<div style="border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px;">{highlighted_text}</div>' | |
| def perform_topic_modeling(df_entities, num_topics=2, num_top_words=10): | |
| """Performs basic Topic Modeling using LDA.""" | |
| documents = df_entities['text'].unique().tolist() | |
| if len(documents) < 2: | |
| return None | |
| N = min(num_top_words, len(documents)) | |
| try: | |
| # Step 1: Try aggressive filtering | |
| tfidf_vectorizer = TfidfVectorizer(max_df=0.95, min_df=2, stop_words='english', ngram_range=(1, 3)) | |
| tfidf = tfidf_vectorizer.fit_transform(documents) | |
| tfidf_feature_names = tfidf_vectorizer.get_feature_names_out() | |
| # Step 2: Fallback if not enough features | |
| if len(tfidf_feature_names) < num_topics: | |
| tfidf_vectorizer = TfidfVectorizer(max_df=1.0, min_df=1, stop_words='english', ngram_range=(1, 3)) | |
| tfidf = tfidf_vectorizer.fit_transform(documents) | |
| tfidf_feature_names = tfidf_vectorizer.get_feature_names_out() | |
| if len(tfidf_feature_names) < num_topics: | |
| return None | |
| lda = LatentDirichletAllocation(n_components=num_topics, max_iter=5, learning_method='online', random_state=42, n_jobs=-1) | |
| lda.fit(tfidf) | |
| topic_data_list = [] | |
| for topic_idx, topic in enumerate(lda.components_): | |
| top_words_indices = topic.argsort()[:-N - 1:-1] | |
| top_words = [tfidf_feature_names[i] for i in top_words_indices] | |
| word_weights = [topic[i] for i in top_words_indices] | |
| for word, weight in zip(top_words, word_weights): | |
| topic_data_list.append({ | |
| 'Topic_ID': f'Topic #{topic_idx + 1}', | |
| 'Word': word, | |
| 'Weight': weight, | |
| }) | |
| return pd.DataFrame(topic_data_list) | |
| except Exception as e: | |
| # print(f"Topic Modeling Error: {e}") | |
| return None | |
| def create_topic_word_bubbles(df_topic_data): | |
| """Generates a Plotly Bubble Chart for top words across all topics.""" | |
| df_topic_data = df_topic_data.rename(columns={'Topic_ID': 'topic','Word': 'word', 'Weight': 'weight'}) | |
| df_topic_data['x_pos'] = df_topic_data.index | |
| if df_topic_data.empty: | |
| return None | |
| fig = px.scatter( | |
| df_topic_data, | |
| x='x_pos', y='weight', size='weight', color='topic', text='word', hover_name='word', size_max=40, | |
| title='Topic Word Weights (Bubble Chart)', | |
| color_discrete_sequence=px.colors.qualitative.Bold, | |
| labels={'x_pos': 'Entity/Word Index', 'weight': 'Word Weight', 'topic': 'Topic ID'}, | |
| custom_data=['word', 'weight', 'topic'] | |
| ) | |
| fig.update_layout( | |
| xaxis_title="Entity/Word", yaxis_title="Word Weight", | |
| xaxis={'showgrid': False, 'showticklabels': False, 'zeroline': False, 'showline': False}, | |
| yaxis={'showgrid': True}, | |
| showlegend=True, height=600, | |
| margin=dict(t=50, b=100, l=50, r=10), | |
| plot_bgcolor='#f9f9f9', paper_bgcolor='#f9f9f9' | |
| ) | |
| fig.update_traces( | |
| textposition='middle center', | |
| textfont=dict(color='white', size=10), | |
| hovertemplate="<b>%{customdata[0]}</b><br>Weight: %{customdata[1]:.3f}<br>Topic: %{customdata[2]}<extra></extra>", | |
| marker=dict(line=dict(width=1, color='DarkSlateGrey')) | |
| ) | |
| return fig | |
| def generate_network_graph(df, raw_text, entity_color_map): | |
| """ | |
| Generates a network graph visualization (Node Plot) with edges based on | |
| entity co-occurrence in sentences. | |
| FIXED: The logic for creating 'unique_entities' is revised to guarantee | |
| that the 'text' column is unique, resolving the ValueError. | |
| """ | |
| # 1. Prepare Data for Nodes | |
| # Calculate frequency (count) | |
| entity_counts = df['text'].value_counts().reset_index() | |
| entity_counts.columns = ['text', 'frequency'] | |
| # Sort the dataframe by score descending *before* dropping duplicates to ensure the best score/label is kept | |
| df_sorted = df.sort_values('score', ascending=False).reset_index(drop=True) | |
| # Drop duplicates based on 'text' to guarantee unique entity names for the index | |
| unique_entities_data = df_sorted.drop_duplicates(subset=['text'])[['text', 'label', 'score']] | |
| # Merge the unique data with the frequency counts | |
| unique_entities = unique_entities_data.merge(entity_counts, on='text', how='left') | |
| if unique_entities.shape[0] < 2: | |
| return go.Figure().update_layout(title="Not enough unique entities for a meaningful graph.") | |
| # 2. Node Positioning | |
| num_nodes = len(unique_entities) | |
| thetas = np.linspace(0, 2 * np.pi, num_nodes, endpoint=False) | |
| radius = 10 | |
| unique_entities['x'] = radius * np.cos(thetas) + np.random.normal(0, 0.5, num_nodes) | |
| unique_entities['y'] = radius * np.sin(thetas) + np.random.normal(0, 0.5, num_nodes) | |
| # This line now succeeds because 'text' is guaranteed to be unique | |
| pos_map = unique_entities.set_index('text')[['x', 'y']].to_dict('index') | |
| # 3. Edge Calculation (Co-occurrence) | |
| edges = set() | |
| sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?|\!)\s', raw_text) | |
| unique_entity_texts = unique_entities['text'].unique().tolist() | |
| for sentence in sentences: | |
| entities_in_sentence = [] | |
| for entity_text in unique_entity_texts: | |
| if entity_text.lower() in sentence.lower(): | |
| entities_in_sentence.append(entity_text) | |
| unique_entities_in_sentence = list(set(entities_in_sentence)) | |
| for i in range(len(unique_entities_in_sentence)): | |
| for j in range(i + 1, len(unique_entities_in_sentence)): | |
| node1 = unique_entities_in_sentence[i] | |
| node2 = unique_entities_in_sentence[j] | |
| edge_tuple = tuple(sorted((node1, node2))) | |
| edges.add(edge_tuple) | |
| # 4. Plotly Figure Generation | |
| edge_x = [] | |
| edge_y = [] | |
| for edge in edges: | |
| n1, n2 = edge | |
| if n1 in pos_map and n2 in pos_map: | |
| edge_x.extend([pos_map[n1]['x'], pos_map[n2]['x'], None]) | |
| edge_y.extend([pos_map[n1]['y'], pos_map[n2]['y'], None]) | |
| fig = go.Figure() | |
| edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines', name='Co-occurrence Edges', showlegend=False) | |
| fig.add_trace(edge_trace) | |
| fig.add_trace(go.Scatter( | |
| x=unique_entities['x'], y=unique_entities['y'], mode='markers+text', name='Entities', text=unique_entities['text'], textposition="top center", showlegend=False, | |
| marker=dict( | |
| size=unique_entities['frequency'] * 5 + 10, | |
| color=[entity_color_map.get(label, '#cccccc') for label in unique_entities['label']], | |
| line_width=1, line_color='black', opacity=0.9 | |
| ), | |
| textfont=dict(size=10), | |
| customdata=unique_entities[['label', 'score', 'frequency']], | |
| hovertemplate=("<b>%{text}</b><br>Label: %{customdata[0]}<br>Score: %{customdata[1]:.2f}<br>Frequency: %{customdata[2]}<extra></extra>") | |
| )) | |
| # 5. Legend and Layout | |
| legend_traces = [] | |
| seen_labels = set() | |
| for index, row in unique_entities.iterrows(): | |
| label = row['label'] | |
| if label not in seen_labels: | |
| seen_labels.add(label) | |
| color = entity_color_map.get(label, '#cccccc') | |
| legend_traces.append(go.Scatter(x=[None], y=[None], mode='markers', marker=dict(size=10, color=color), name=f"{label.capitalize()}", showlegend=True)) | |
| for trace in legend_traces: | |
| fig.add_trace(trace) | |
| fig.update_layout( | |
| title='Entity Co-occurrence Network (Edges = Same Sentence)', | |
| showlegend=True, hovermode='closest', | |
| xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]), | |
| yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-15, 15]), | |
| plot_bgcolor='#f9f9f9', paper_bgcolor='#f9f9f9', | |
| margin=dict(t=50, b=10, l=10, r=10), height=600, | |
| annotations=[ | |
| dict( | |
| text="When a line is drawn between two nodes (entities), it means those two entities co-occurred in the same sentence at least once.", | |
| xref="paper", yref="paper", | |
| x=0.5, y=0.95, # Position below the title | |
| showarrow=False, | |
| font=dict(size=12, color="gray") | |
| ) | |
| ] | |
| ) | |
| return fig | |
| def generate_entity_csv(df): | |
| """Generates a CSV file of the extracted entities in an in-memory buffer.""" | |
| csv_buffer = BytesIO() | |
| df_export = df[['text', 'label', 'category', 'score', 'start', 'end']] | |
| csv_buffer.write(df_export.to_csv(index=False).encode('utf-8')) | |
| csv_buffer.seek(0) | |
| return csv_buffer | |
| # --- HTML REPORT GENERATION FUNCTION --- | |
| def generate_html_report(df, text_input, elapsed_time, df_topic_data, entity_color_map, report_title="Entity and Topic Analysis Report", branding_html=""): | |
| """ | |
| Generates a full HTML report containing all analysis results and visualizations, | |
| including color gradient styling for the score column in the main table. | |
| """ | |
| # 1. Generate Visualizations (Plotly HTML) | |
| # 1a. Treemap | |
| fig_treemap = px.treemap( | |
| df, | |
| path=[px.Constant("All Entities"), 'category', 'label', 'text'], | |
| values='score', | |
| color='label', | |
| title="Entity Distribution by Category and Label", | |
| color_discrete_sequence=px.colors.qualitative.Bold | |
| ) | |
| fig_treemap.update_layout(margin=dict(t=50, l=25, r=25, b=25)) | |
| treemap_html = fig_treemap.to_html(full_html=False, include_plotlyjs='cdn') | |
| # 1b. Pie Chart | |
| grouped_counts = df['category'].value_counts().reset_index() | |
| grouped_counts.columns = ['Category', 'Count'] | |
| color_seq = px.colors.qualitative.Pastel if len(grouped_counts) > 1 else px.colors.sequential.Cividis | |
| fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=color_seq) | |
| fig_pie.update_layout(margin=dict(t=50, b=10)) | |
| pie_html = fig_pie.to_html(full_html=False, include_plotlyjs='cdn') | |
| # 1c. Bar Chart (Category Count) | |
| fig_bar_category = px.bar(grouped_counts, x='Category', y='Count',color='Category', title='Total Entities per Category',color_discrete_sequence=color_seq) | |
| fig_bar_category.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100)) | |
| bar_category_html = fig_bar_category.to_html(full_html=False,include_plotlyjs='cdn') | |
| # 1d. Bar Chart (Most Frequent Entities) | |
| word_counts = df['text'].value_counts().reset_index() | |
| word_counts.columns = ['Entity', 'Count'] | |
| repeating_entities = word_counts[word_counts['Count'] > 1].head(10) | |
| bar_freq_html = '<p>No entities appear more than once in the text for visualization.</p>' | |
| if not repeating_entities.empty: | |
| fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count',color='Entity', title='Top 10 Most Frequent Entities',color_discrete_sequence=px.colors.sequential.Viridis) | |
| fig_bar_freq.update_layout(xaxis={'categoryorder': 'total descending'},margin=dict(t=50, b=100)) | |
| bar_freq_html = fig_bar_freq.to_html(full_html=False, include_plotlyjs='cdn') | |
| # 1e. Network Graph HTML | |
| network_fig = generate_network_graph(df, text_input, entity_color_map) | |
| network_html = network_fig.to_html(full_html=False, include_plotlyjs='cdn') | |
| # 1f. Topic Modeling Bubble Chart | |
| topic_charts_html = '<h3>Topic Word Weights (Bubble Chart)</h3>' | |
| if df_topic_data is not None and not df_topic_data.empty: | |
| bubble_figure = create_topic_word_bubbles(df_topic_data) | |
| if bubble_figure: | |
| topic_charts_html += f'<div class="chart-box">{bubble_figure.to_html(full_html=False, include_plotlyjs="cdn", config={"responsive": True})}</div>' | |
| else: | |
| topic_charts_html += '<p style="color: red;">Error: Topic modeling data was available but visualization failed.</p>' | |
| else: | |
| topic_charts_html += '<div class="chart-box" style="text-align: center; padding: 50px; background-color: #fff; border: 1px dashed #888888;">' | |
| topic_charts_html += '<p><strong>Topic Modeling requires more unique input.</strong></p>' | |
| topic_charts_html += '<p>Please enter text containing at least two unique entities to generate the Topic Bubble Chart.</p>' | |
| topic_charts_html += '</div>' | |
| # 2. Get Highlighted Text | |
| highlighted_text_html = highlight_entities(text_input, df, entity_color_map).replace("div style", "div class='highlighted-text' style") | |
| # 3. Entity Tables (Pandas to HTML) | |
| # Apply color gradient styling to the DataFrame BEFORE converting to HTML | |
| styled_df = df[['text', 'label', 'score', 'start', 'end', 'category']].style.background_gradient( | |
| cmap='YlGnBu', | |
| subset=['score'] | |
| ).format({'score': '{:.4f}'}) | |
| entity_table_html = styled_df.to_html( | |
| classes='table table-striped', | |
| index=False, | |
| ) | |
| # 4. Construct the Final HTML | |
| html_content = f"""<!DOCTYPE html><html lang="en"><head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>{report_title}</title> | |
| <script src="https://cdn.plot.ly/plotly-latest.min.js"></script> | |
| <style> | |
| body {{ font-family: 'Inter', sans-serif; margin: 0; padding: 20px; background-color: #f4f4f9; color: #333; }} | |
| .container {{ max-width: 1200px; margin: 0 auto; background-color: #ffffff; padding: 30px; border-radius: 12px; box-shadow: 0 4px 12px rgba(0,0,0,0.1); }} | |
| h1 {{ color: #007bff; border-bottom: 3px solid #007bff; padding-bottom: 10px; margin-top: 0; }} | |
| h2 {{ color: #007bff; margin-top: 30px; border-bottom: 1px solid #ddd; padding-bottom: 5px; }} | |
| h3 {{ color: #555; margin-top: 20px; }} | |
| .metadata {{ background-color: #e6f0ff; padding: 15px; border-radius: 8px; margin-bottom: 20px; font-size: 0.9em; }} | |
| .chart-box {{ background-color: #f9f9f9; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.05); min-width: 0; margin-bottom: 20px; }} | |
| table {{ width: 100%; border-collapse: collapse; margin-top: 15px; }} | |
| /* Target the cells generated by pandas styling */ | |
| table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }} | |
| table th {{ border: 1px solid #ddd; padding: 8px; text-align: left; background-color: #f0f0f0; }} | |
| .highlighted-text {{ border: 1px solid #888888; padding: 15px; border-radius: 5px; background-color: #ffffff; font-family: monospace; white-space: pre-wrap; margin-bottom: 20px; }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>{report_title}</h1> | |
| <div class="metadata"> | |
| {branding_html} | |
| <p><strong>Generated on:</strong> {time.strftime('%Y-%m-%d')}</p> | |
| <p><strong>Processing Time:</strong> {elapsed_time:.2f} seconds</p> | |
| </div> | |
| <h2>Analyzed Text & Extracted Entities</h2> | |
| <h3>Original Text with Highlighted Entities</h3> | |
| <div class="highlighted-text-container"> | |
| {highlighted_text_html} | |
| </div> | |
| <h2>2. Full Extracted Entities Table </h2> | |
| {entity_table_html} | |
| <h2>3. Data Visualizations</h2> | |
| <h3>3.1 Entity Distribution Treemap</h3> | |
| <div class="chart-box">{treemap_html}</div> | |
| <h3>3.2 Comparative Charts (Pie, Category Count, Frequency) - *Stacked Vertically*</h3> | |
| <div class="chart-box">{pie_html}</div> | |
| <div class="chart-box">{bar_category_html}</div> | |
| <h3>3.3 Most Frequent Entities</h3> | |
| <div class="chart-box">{bar_freq_html}</div> | |
| <h3>3.4 Entity Relationship Map (Edges = Same Sentence)</h3> | |
| <div class="chart-box">{network_html}</div> | |
| <h2>4. Topic Modelling</h2> | |
| {topic_charts_html} | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| return html_content | |
| def chunk_text(text, max_chunk_size=1500): | |
| """Splits text into chunks by sentence/paragraph, respecting a max size (by character count).""" | |
| segments = re.split(r'(\n\n|(?<=[.!?])\s+)', text) | |
| chunks = [] | |
| current_chunk = "" | |
| current_offset = 0 | |
| for segment in segments: | |
| if not segment: continue | |
| if len(current_chunk) + len(segment) > max_chunk_size and current_chunk: | |
| chunks.append((current_chunk, current_offset)) | |
| current_offset += len(current_chunk) | |
| current_chunk = segment | |
| else: | |
| current_chunk += segment | |
| if current_chunk: | |
| chunks.append((current_chunk, current_offset)) | |
| return chunks | |
| def process_chunked_text(text, labels, model): | |
| """Processes large text in chunks and aggregates/offsets the entities.""" | |
| MAX_CHUNK_CHARS = 3500 | |
| chunks = chunk_text(text, max_chunk_size=MAX_CHUNK_CHARS) | |
| all_entities = [] | |
| for chunk_data, chunk_offset in chunks: | |
| chunk_entities = model.predict_entities(chunk_data, labels) | |
| for entity in chunk_entities: | |
| entity['start'] += chunk_offset | |
| entity['end'] += chunk_offset | |
| all_entities.append(entity) | |
| return all_entities | |
| st.set_page_config(layout="wide", page_title="NER & Topic Report App") | |
| # --- Conditional Mobile Warning CSS --- | |
| st.markdown( | |
| """ | |
| <style> | |
| /* FIX: Aggressive theme override to ensure visibility */ | |
| body { | |
| background-color: #f0f2f6 !important; | |
| color: #333333 !important; | |
| } | |
| [data-testid="stAppViewBlock"] { | |
| background-color: #ffffff !important; | |
| } | |
| @media (max-width: 600px) { | |
| #mobile-warning-container { | |
| display: block; | |
| background-color: #ffcccc; | |
| color: #cc0000; | |
| padding: 10px; | |
| border-radius: 5px; | |
| text-align: center; | |
| margin-bottom: 20px; | |
| font-weight: bold; | |
| border: 1px solid #cc0000; | |
| } | |
| } | |
| @media (min-width: 601px) { | |
| #mobile-warning-container { | |
| display: none; | |
| } | |
| } | |
| [data-testid="stConfigurableTabs"] button { | |
| color: #333333 !important; | |
| background-color: #f0f0f0; | |
| border: 1px solid #cccccc; | |
| } | |
| [data-testid="stConfigurableTabs"] button[aria-selected="true"] { | |
| color: #FFFFFF !important; | |
| background-color: #007bff; | |
| border-bottom: 2px solid #007bff; | |
| } | |
| .streamlit-expanderHeader { | |
| color: #007bff; | |
| } | |
| </style> | |
| <div id="mobile-warning-container"> | |
| β οΈ **Tip for Mobile Users:** For the best viewing experience of the charts and tables, please switch your browser to **"Desktop Site"** view. | |
| </div> | |
| """, | |
| unsafe_allow_html=True) | |
| st.subheader("Entity and Topic Analysis Report Generator", divider="blue") | |
| tab1, tab2 = st.tabs(["Embed", "Important Notes"]) | |
| with tab1: | |
| with st.expander("Embed"): | |
| st.write("Use the following code to embed the DataHarvest web app on your website. Feel free to adjust the width and height values to fit your page.") | |
| code = ''' | |
| <iframe | |
| src="https://aiecosystem-dataharvest.hf.space" | |
| frameborder="0" | |
| width="850" | |
| height="450" | |
| ></iframe> | |
| ''' | |
| st.code(code, language="html") | |
| with tab2: | |
| expander = st.expander("**Important Notes**") | |
| expander.markdown(""" | |
| **Named Entities (Fixed Mode):** This DataHarvest web app predicts nine (9) fixed labels: "person", "country", "city", "organization", "date", "time", "cardinal", "money", "position". | |
| **Results:** Results are compiled into a single, comprehensive **HTML report** and a **CSV file** for easy download and sharing. | |
| **How to Use:** Type or paste your text into the text area below, then click the 'Analyze Text' button. | |
| """) | |
| st.markdown("For any errors or inquiries, please contact us at [[email protected]](mailto:[email protected])") | |
| # --- Model Loading --- | |
| def load_ner_model(labels): | |
| """Loads the GLiNER model and caches it.""" | |
| try: | |
| # GLiNER model is loaded with constraints based on the active labels list | |
| return GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5", nested_ner=True, num_gen_sequences=2, gen_constraints=labels) | |
| except Exception as e: | |
| # print(f"FATAL ERROR: Failed to load NER model: {e}") | |
| st.error(f"Failed to load NER model. This may be due to a dependency issue or resource limits: {e}") | |
| st.stop() | |
| # --- LONG DEFAULT TEXT --- | |
| DEFAULT_TEXT = ( | |
| "In June 2024, the founder, Dr. Emily Carter, officially announced a new, expansive partnership between " | |
| "TechSolutions Inc. and the European Space Agency (ESA). This strategic alliance represents a significant " | |
| "leap forward for commercial space technology across the entire **European Union**. The agreement, finalized " | |
| "on Monday in Paris, France, focuses specifically on jointly developing the next generation of the 'Astra' " | |
| "software platform. This version of the **Astra** platform is critical for processing and managing the vast amounts of data being sent " | |
| "back from the recent Mars rover mission. This project underscores the ESA's commitment to advancing " | |
| "space capabilities within the **European Union**. The core team, including lead engineer Marcus Davies, will hold " | |
| "their first collaborative workshop in Berlin, Germany, on August 15th. The community response on social " | |
| "media platform X (under the username @TechCEO) was overwhelmingly positive, with many major tech " | |
| "publications, including Wired Magazine, predicting a major impact on the space technology industry by the " | |
| "end of the year, further strengthening the technological standing of the **European Union**. The platform is designed to be compatible with both Windows and Linux operating systems. " | |
| "The initial funding, secured via a Series B round, totaled $50 million. Financial analysts from Morgan Stanley " | |
| "are closely monitoring the impact on TechSolutions Inc.'s Q3 financial reports, expected to be released to the " | |
| "general public by October 1st. The goal is to deploy the **Astra** v2 platform before the next solar eclipse event in 2026.") | |
| # ----------------------------------- | |
| # --- Session State Initialization (Custom Label Reinstatement) --- | |
| if 'show_results' not in st.session_state: st.session_state.show_results = False | |
| if 'my_text_area' not in st.session_state: st.session_state.my_text_area = DEFAULT_TEXT | |
| if 'last_text' not in st.session_state: st.session_state.last_text = "" | |
| if 'results_df' not in st.session_state: st.session_state.results_df = pd.DataFrame() | |
| if 'elapsed_time' not in st.session_state: st.session_state.elapsed_time = 0.0 | |
| if 'topic_results' not in st.session_state: st.session_state.topic_results = None | |
| if 'active_labels_list' not in st.session_state: st.session_state.active_labels_list = FIXED_LABELS | |
| if 'is_custom_mode' not in st.session_state: st.session_state.is_custom_mode = "Fixed Labels" # Re-use for radio | |
| if 'custom_labels_input' not in st.session_state: st.session_state.custom_labels_input = DEFAULT_CUSTOM_LABELS | |
| if 'num_topics_slider' not in st.session_state: st.session_state.num_topics_slider = 5 | |
| if 'num_top_words_slider' not in st.session_state: st.session_state.num_top_words_slider = 10 | |
| if 'last_num_topics' not in st.session_state: st.session_state.last_num_topics = None | |
| if 'last_num_top_words' not in st.session_state: st.session_state.last_num_top_words = None | |
| if 'last_active_labels' not in st.session_state: st.session_state.last_active_labels = None | |
| def clear_text(): | |
| """Clears the text area (sets it to an empty string) and hides results.""" | |
| st.session_state['my_text_area'] = "" | |
| st.session_state.show_results = False | |
| st.session_state.last_text = "" | |
| st.session_state.results_df = pd.DataFrame() | |
| st.session_state.elapsed_time = 0.0 | |
| st.session_state.topic_results = None | |
| # --- Revised Text Area Input --- | |
| st.markdown("## βοΈ Text Input for Analysis") | |
| word_limit = 2000 | |
| text = st.text_area( | |
| f"Type or paste your text below (max {word_limit} words), and then press Ctrl + Enter", | |
| height=250, | |
| key='my_text_area', | |
| ) | |
| word_count = len(text.split()) | |
| st.markdown(f"**Word count:** {word_count}/{word_limit}") | |
| # --- Custom/Fixed Label Selector --- | |
| st.markdown("---") | |
| st.markdown("### π·οΈ Entity Label Mode Selection") | |
| mode = st.radio( | |
| "Select Entity Recognition Mode:", | |
| ["Fixed Labels", "Custom Labels"], | |
| key='is_custom_mode', | |
| horizontal=True, | |
| help="Fixed Labels use a predefined set. Custom Labels let you define your own." | |
| ) | |
| active_labels = [] | |
| if mode == "Fixed Labels": | |
| active_labels = FIXED_LABELS | |
| st.info(f"Fixed Labels active: **{', '.join(active_labels)}**") | |
| else: | |
| custom_labels_input = st.text_input( | |
| "Enter your custom labels, separated by commas (e.g., product, feature, ticket_id):", | |
| value=st.session_state.custom_labels_input, | |
| key='custom_labels_input', | |
| help="The labels must be non-empty and comma-separated." | |
| ) | |
| # Clean and set active labels from user input | |
| active_labels = [label.strip().lower() for label in custom_labels_input.split(',') if label.strip()] | |
| if not active_labels: | |
| st.error("Please enter at least one custom label.") | |
| active_labels = [] # Prevents model run if empty | |
| else: | |
| st.info(f"Custom Labels active: **{', '.join(active_labels)}**") | |
| st.session_state.active_labels_list = active_labels | |
| current_num_topics = st.session_state.num_topics_slider | |
| current_num_top_words = st.session_state.num_top_words_slider | |
| # --- Buttons --- | |
| col_results, col_clear = st.columns([1, 1]) | |
| with col_results: | |
| run_button = st.button("Analyze Text", key='run_results', use_container_width=True, type="primary", disabled=not active_labels) | |
| with col_clear: | |
| st.button("Clear text", on_click=clear_text, use_container_width=True) | |
| # --- Results Trigger and Processing (Fixed for index error) --- | |
| if run_button: | |
| if text.strip() and word_count <= word_limit: | |
| # 1. Determine Active Labels (Already done above, just referencing) | |
| active_labels = st.session_state.active_labels_list | |
| # Caching Logic: Check if we need to re-run the full process | |
| should_rerun_full_analysis = ( | |
| text.strip() != st.session_state.last_text.strip() or | |
| set(active_labels) != set(st.session_state.last_active_labels if st.session_state.last_active_labels else []) | |
| ) | |
| if should_rerun_full_analysis: | |
| # 2. Rerunning Full Analysis | |
| CHUNKING_THRESHOLD = 500 | |
| should_chunk = word_count > CHUNKING_THRESHOLD | |
| mode_msg = "custom labels" if mode == "Custom Labels" else "fixed labels" | |
| if should_chunk: | |
| mode_msg += " with **chunking** for large text" | |
| with st.spinner(f"Analyzing text with {mode_msg}..."): | |
| start_time = time.time() | |
| # 2a. Load Model | |
| model = load_ner_model(active_labels) | |
| # 2b. Extract Entities | |
| if should_chunk: | |
| all_entities = process_chunked_text(text, active_labels, model) | |
| else: | |
| all_entities = model.predict_entities(text, active_labels) | |
| end_time = time.time() | |
| elapsed_time = end_time - start_time | |
| # 2c. Prepare DataFrame | |
| df = pd.DataFrame(all_entities) | |
| if not df.empty: | |
| df = df.reset_index(drop=True) | |
| # --- CATEGORY MAPPING ADJUSTMENT --- | |
| # Assign fixed labels to their categories, and custom labels to 'User Defined' | |
| def map_category(label): | |
| if label in REVERSE_FIXED_CATEGORY_MAPPING: | |
| return REVERSE_FIXED_CATEGORY_MAPPING[label] | |
| elif label in active_labels and label not in FIXED_LABELS: | |
| # This handles any truly custom labels entered by the user | |
| return 'User Defined Entities' | |
| else: | |
| return 'Other' | |
| df['category'] = df['label'].apply(map_category) | |
| df['text'] = df['text'].apply(remove_trailing_punctuation) | |
| # 2d. Perform Topic Modeling on extracted entities | |
| df_topic_data = perform_topic_modeling(df, num_topics=current_num_topics, num_top_words=current_num_top_words) | |
| else: | |
| df_topic_data = None | |
| # 3. Save Results to Session State | |
| st.session_state.results_df = df | |
| st.session_state.topic_results = df_topic_data | |
| st.session_state.elapsed_time = elapsed_time | |
| st.session_state.last_text = text | |
| st.session_state.show_results = True | |
| st.session_state.last_active_labels = active_labels | |
| st.session_state.last_num_topics = current_num_topics | |
| st.session_state.last_num_top_words = current_num_top_words | |
| else: | |
| st.info("Results already calculated for the current text and settings.") | |
| st.session_state.show_results = True | |
| elif word_count > word_limit: | |
| st.error(f"Text too long! Please limit your input to {word_limit} words.") | |
| st.session_state.show_results = False | |
| elif not active_labels: | |
| st.error("Please ensure your custom label input is not empty.") | |
| st.session_state.show_results = False | |
| else: | |
| st.warning("Please enter some text to analyze.") | |
| st.session_state.show_results = False | |
| # --- Display Download Link and Results --- | |
| if st.session_state.show_results: | |
| df = st.session_state.results_df | |
| df_topic_data = st.session_state.topic_results | |
| current_labels_in_df = df['label'].unique().tolist() | |
| entity_color_map = get_dynamic_color_map(current_labels_in_df, FIXED_ENTITY_COLOR_MAP) | |
| if df.empty: | |
| st.warning("No entities were found in the provided text with the current label set.") | |
| else: | |
| st.subheader("1. Analysis Results", divider="blue") | |
| # --- Function to Apply Conditional Coloring to Scores (For Streamlit UI only) --- | |
| def color_score_gradient(df_input): | |
| """Applies a color gradient to the 'score' column using Pandas Styler.""" | |
| return df_input.style.background_gradient( | |
| cmap='YlGnBu', | |
| subset=['score'] | |
| ).format( | |
| {'score': '{:.4f}'} | |
| ) | |
| # 1. Highlighted Text placed inside an Expander | |
| with st.expander(f"### 1. Analyzed Text with Highlighted Entities ({mode} Mode)", expanded=False): | |
| st.markdown( | |
| highlight_entities(st.session_state.last_text, df, entity_color_map), | |
| unsafe_allow_html=True | |
| ) | |
| st.markdown(f"**Total Entities Found:** {len(df)}") | |
| # 2. Detailed Entity Analysis Tabs | |
| st.markdown("### 2. Detailed Entity Analysis") | |
| tab_category_details, tab_treemap_viz = st.tabs(["π Entities Grouped by Category", "πΊοΈ Treemap Distribution"]) | |
| # --- Section 2a: Detailed Tables by Category/Label --- | |
| with tab_category_details: | |
| st.markdown("#### Detailed Entities Table (Grouped by Category)") | |
| # Get all unique categories present in the data (Fixed + User Defined) | |
| unique_categories = list(df['category'].unique()) | |
| # Ensure fixed categories appear first if present, followed by custom/other | |
| ordered_categories = [] | |
| # Add fixed categories in defined order | |
| for fixed_cat in FIXED_CATEGORY_MAPPING.keys(): | |
| if fixed_cat in unique_categories: | |
| ordered_categories.append(fixed_cat) | |
| unique_categories.remove(fixed_cat) | |
| # Add User Defined and Other at the end | |
| if 'User Defined Entities' in unique_categories: | |
| ordered_categories.append('User Defined Entities') | |
| unique_categories.remove('User Defined Entities') | |
| if 'Other' in unique_categories: | |
| ordered_categories.append('Other') | |
| unique_categories.remove('Other') | |
| # Add any remaining categories (shouldn't happen with map_category, but for safety) | |
| ordered_categories.extend(unique_categories) | |
| tabs_category = st.tabs(ordered_categories) | |
| for category, tab in zip(ordered_categories, tabs_category): | |
| df_category = df[df['category'] == category][['text', 'label', 'score', 'start', 'end']].sort_values(by='score', ascending=False) | |
| styled_df_category = color_score_gradient(df_category) | |
| with tab: | |
| st.markdown(f"##### {category} Entities ({len(df_category)} total)") | |
| if not df_category.empty: | |
| st.dataframe(styled_df_category, use_container_width=True) | |
| else: | |
| st.info(f"No entities of category **{category}** were found in the text.") | |
| with st.expander("See Glossary of tags"): | |
| st.write('''- **text**: ['entity extracted from your text data']- **label**: ['label (tag) assigned to a given extracted entity (custom or fixed)']- **category**: ['the grouping category (e.g., "Locations" or "User Defined Entities")']- **score**: ['accuracy score; how accurately a tag has been assigned to a given entity']- **start**: ['index of the start of the corresponding entity']- **end**: ['index of the end of the corresponding entity']''') | |
| # --- Section 2b: Treemap Visualization --- | |
| with tab_treemap_viz: | |
| st.markdown("#### Treemap: Entity Distribution") | |
| fig_treemap = px.treemap( | |
| df, | |
| path=[px.Constant("All Entities"), 'category', 'label', 'text'], | |
| values='score', | |
| color='label', | |
| color_discrete_sequence=px.colors.qualitative.Bold | |
| ) | |
| fig_treemap.update_layout(margin=dict(t=10, l=10, r=10, b=10)) | |
| st.plotly_chart(fig_treemap, use_container_width=True) | |
| # 3. Comparative Charts | |
| st.markdown("---") | |
| st.markdown("### 3. Comparative Charts") | |
| col1, col2, col3 = st.columns(3) | |
| grouped_counts = df['category'].value_counts().reset_index() | |
| grouped_counts.columns = ['Category', 'Count'] | |
| chart_color_seq = px.colors.qualitative.Pastel if len(grouped_counts) > 1 else px.colors.sequential.Cividis | |
| with col1: # Pie Chart | |
| fig_pie = px.pie(grouped_counts, values='Count', names='Category',title='Distribution of Entities by Category',color_discrete_sequence=chart_color_seq) | |
| fig_pie.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350) | |
| st.plotly_chart(fig_pie, use_container_width=True) | |
| with col2: # Bar Chart by Category | |
| st.markdown("#### Entity Count by Category") | |
| fig_bar_category = px.bar(grouped_counts, x='Category', y='Count', color='Category', title='Total Entities per Category', color_discrete_sequence=chart_color_seq) | |
| fig_bar_category.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350, showlegend=False) | |
| st.plotly_chart(fig_bar_category, use_container_width=True) | |
| with col3: # Bar Chart for Most Frequent Entities | |
| st.markdown("#### Top 10 Most Frequent Entities") | |
| word_counts = df['text'].value_counts().reset_index() | |
| word_counts.columns = ['Entity', 'Count'] | |
| repeating_entities = word_counts[word_counts['Count'] > 1].head(10) | |
| if not repeating_entities.empty: | |
| fig_bar_freq = px.bar(repeating_entities, x='Entity', y='Count', title='Top 10 Most Frequent Entities', color='Entity', color_discrete_sequence=px.colors.sequential.Viridis) | |
| fig_bar_freq.update_layout(margin=dict(t=30, b=10, l=10, r=10), height=350, showlegend=False) | |
| st.plotly_chart(fig_bar_freq, use_container_width=True) | |
| else: | |
| st.info("No entities were repeated enough for a Top 10 frequency chart.") | |
| # 4. Advanced Analysis | |
| st.markdown("---") | |
| st.markdown("### 4. Advanced Analysis") | |
| # --- A. Network Graph Section --- | |
| with st.expander("π Entity Co-occurrence Network Graph", expanded=True): | |
| st.plotly_chart(generate_network_graph(df, st.session_state.last_text, entity_color_map), use_container_width=True) | |
| # --- B. Topic Modeling Section --- | |
| st.markdown("---") | |
| with st.container(border=True): | |
| st.markdown("#### π‘ Topic Modeling (LDA) Configuration and Results") | |
| st.markdown("Adjust the settings below and click **'Re-Run Topic Model'** to instantly update the visualization based on the extracted entities.") | |
| col_slider_topic, col_slider_words, col_rerun_btn = st.columns([1, 1, 0.5]) | |
| with col_slider_topic: | |
| new_num_topics = st.slider( | |
| "Number of Topics", | |
| min_value=2, | |
| max_value=10, | |
| value=st.session_state.num_topics_slider, | |
| step=1, | |
| key='num_topics_slider_new', | |
| help="The number of topics to discover (2 to 10)." | |
| ) | |
| with col_slider_words: | |
| new_num_top_words = st.slider( | |
| "Number of Top Words", | |
| min_value=5, | |
| max_value=20, | |
| value=st.session_state.num_top_words_slider, | |
| step=1, | |
| key='num_top_words_slider_new', | |
| help="The number of top words to display per topic (5 to 20)." | |
| ) | |
| def rerun_topic_model(): | |
| # Update session state with the new slider values | |
| st.session_state.num_topics_slider = st.session_state.num_topics_slider_new | |
| st.session_state.num_top_words_slider = st.session_state.num_top_words_slider_new | |
| if not st.session_state.results_df.empty: | |
| # Recalculate topic modeling results | |
| df_topic_data_new = perform_topic_modeling( | |
| df_entities=st.session_state.results_df, | |
| num_topics=st.session_state.num_topics_slider, | |
| num_top_words=st.session_state.num_top_words_slider | |
| ) | |
| st.session_state.topic_results = df_topic_data_new | |
| st.session_state.last_num_topics = st.session_state.num_topics_slider | |
| st.session_state.last_num_top_words = st.session_state.num_top_words_slider | |
| with col_rerun_btn: | |
| st.markdown("<div style='height: 38px;'></div>", unsafe_allow_html=True) | |
| st.button("Re-Run Topic Model", on_click=rerun_topic_model, use_container_width=True, type="primary") | |
| st.markdown("---") | |
| st.markdown(f""" | |
| **Current LDA Parameters:** | |
| * Topics: **{st.session_state.num_topics_slider}** | |
| * Top Words: **{st.session_state.num_top_words_slider}** | |
| """) | |
| df_topic_data = st.session_state.topic_results | |
| if df_topic_data is not None and not df_topic_data.empty: | |
| st.plotly_chart(create_topic_word_bubbles(df_topic_data), use_container_width=True) | |
| st.markdown("This chart visualizes the key words driving the identified topics, based on extracted entities.") | |
| else: | |
| st.info("Topic Modeling requires at least two unique entities with a minimum frequency to perform statistical analysis.") | |
| # 5. White-Label Configuration | |
| st.markdown("---") | |
| st.markdown("### 5. White-Label Report Configuration π¨") | |
| default_report_title = "Fixed Entity Analysis Report" if mode == "Fixed Labels" else "Custom Entity Analysis Report" | |
| custom_report_title = st.text_input( | |
| "Type Your Report Title (for HTML Report), and then press Enter.", | |
| value=default_report_title | |
| ) | |
| custom_branding_text_input = st.text_area( | |
| "Type Your Brand Name or Tagline (Appears below the title in the report), and then press Enter.", | |
| value="Analysis powered by My Own Brand", | |
| key='custom_branding_input', | |
| help="Enter your brand name or a short tagline. This text will be automatically styled and included below the main title." | |
| ) | |
| # 6. Downloads | |
| st.markdown("---") | |
| st.markdown("### 6. Downloads") | |
| col_csv, col_html = st.columns(2) | |
| # CSV Download | |
| csv_buffer = generate_entity_csv(df) | |
| with col_csv: | |
| st.download_button( | |
| label="β¬οΈ Download Entities as CSV", | |
| data=csv_buffer, | |
| file_name="ner_entities_report.csv", | |
| mime="text/csv", | |
| use_container_width=True | |
| ) | |
| # HTML Download (Passing custom white-label parameters) | |
| branding_to_pass = f'<p style="font-size: 1.1em; font-weight: 500;">{custom_branding_text_input}</p>' | |
| html_content = generate_html_report( | |
| df, | |
| st.session_state.last_text, | |
| st.session_state.elapsed_time, | |
| df_topic_data, | |
| entity_color_map, | |
| report_title=custom_report_title, | |
| branding_html=branding_to_pass | |
| ) | |
| html_bytes = html_content.encode('utf-8') | |
| with col_html: | |
| st.download_button( | |
| label="β¬οΈ Download Full HTML Report", | |
| data=html_bytes, | |
| file_name="ner_topic_full_report.html", | |
| mime="text/html", | |
| use_container_width=True | |
| ) | |