Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -62,7 +62,7 @@ def create_empty_figure(title):
|
|
| 62 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 63 |
|
| 64 |
# Function to process and visualize log probs with interactive Plotly plots and pagination
|
| 65 |
-
def visualize_logprobs(json_input,
|
| 66 |
try:
|
| 67 |
# Parse the input (handles both JSON and Python dictionaries)
|
| 68 |
data = parse_input(json_input)
|
|
@@ -75,13 +75,13 @@ def visualize_logprobs(json_input, prob_filter=-100000, page_size=100, page=0):
|
|
| 75 |
else:
|
| 76 |
raise ValueError("Input must be a list or dictionary with 'content' key")
|
| 77 |
|
| 78 |
-
# Extract tokens, log probs, and top alternatives, skipping None or non-finite values with fixed filter
|
| 79 |
tokens = []
|
| 80 |
logprobs = []
|
| 81 |
top_alternatives = [] # List to store top 3 log probs (selected token + 2 alternatives)
|
| 82 |
for entry in content:
|
| 83 |
logprob = ensure_float(entry.get("logprob", None))
|
| 84 |
-
if logprob is not None and math.isfinite(logprob) and logprob >=
|
| 85 |
tokens.append(entry["token"])
|
| 86 |
logprobs.append(logprob)
|
| 87 |
# Get top_logprobs, default to empty dict if None
|
|
@@ -105,7 +105,7 @@ def visualize_logprobs(json_input, prob_filter=-100000, page_size=100, page=0):
|
|
| 105 |
if not logprobs or not tokens:
|
| 106 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top 3 Token Log Probabilities"), create_empty_figure("Significant Probability Drops"), 1, 0)
|
| 107 |
|
| 108 |
-
# Paginate data for large inputs
|
| 109 |
total_pages = max(1, (len(logprobs) + page_size - 1) // page_size)
|
| 110 |
start_idx = page * page_size
|
| 111 |
end_idx = min((page + 1) * page_size, len(logprobs))
|
|
@@ -151,7 +151,7 @@ def visualize_logprobs(json_input, prob_filter=-100000, page_size=100, page=0):
|
|
| 151 |
table_data = []
|
| 152 |
for i, entry in enumerate(content[start_idx:end_idx]):
|
| 153 |
logprob = ensure_float(entry.get("logprob", None))
|
| 154 |
-
if logprob is not None and math.isfinite(logprob) and logprob >=
|
| 155 |
token = entry["token"]
|
| 156 |
top_logprobs = entry["top_logprobs"]
|
| 157 |
# Ensure all values in top_logprobs are floats
|
|
@@ -277,10 +277,20 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
|
|
| 277 |
current_page_output = gr.Number(label="Current Page", interactive=False)
|
| 278 |
|
| 279 |
def update_page(json_input, current_page, action):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
if action == "prev" and current_page > 0:
|
| 281 |
current_page -= 1
|
| 282 |
elif action == "next":
|
| 283 |
-
total_pages = visualize_logprobs(json_input, -100000, 100, 0)[5] # Get total pages with fixed filter and page size
|
| 284 |
if current_page < total_pages - 1:
|
| 285 |
current_page += 1
|
| 286 |
return gr.update(value=current_page), gr.update(value=total_pages)
|
|
|
|
| 62 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 63 |
|
| 64 |
# Function to process and visualize log probs with interactive Plotly plots and pagination
|
| 65 |
+
def visualize_logprobs(json_input, page_size=100, page=0):
|
| 66 |
try:
|
| 67 |
# Parse the input (handles both JSON and Python dictionaries)
|
| 68 |
data = parse_input(json_input)
|
|
|
|
| 75 |
else:
|
| 76 |
raise ValueError("Input must be a list or dictionary with 'content' key")
|
| 77 |
|
| 78 |
+
# Extract tokens, log probs, and top alternatives, skipping None or non-finite values with fixed filter of -100000
|
| 79 |
tokens = []
|
| 80 |
logprobs = []
|
| 81 |
top_alternatives = [] # List to store top 3 log probs (selected token + 2 alternatives)
|
| 82 |
for entry in content:
|
| 83 |
logprob = ensure_float(entry.get("logprob", None))
|
| 84 |
+
if logprob is not None and math.isfinite(logprob) and logprob >= -100000:
|
| 85 |
tokens.append(entry["token"])
|
| 86 |
logprobs.append(logprob)
|
| 87 |
# Get top_logprobs, default to empty dict if None
|
|
|
|
| 105 |
if not logprobs or not tokens:
|
| 106 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top 3 Token Log Probabilities"), create_empty_figure("Significant Probability Drops"), 1, 0)
|
| 107 |
|
| 108 |
+
# Paginate data for large inputs
|
| 109 |
total_pages = max(1, (len(logprobs) + page_size - 1) // page_size)
|
| 110 |
start_idx = page * page_size
|
| 111 |
end_idx = min((page + 1) * page_size, len(logprobs))
|
|
|
|
| 151 |
table_data = []
|
| 152 |
for i, entry in enumerate(content[start_idx:end_idx]):
|
| 153 |
logprob = ensure_float(entry.get("logprob", None))
|
| 154 |
+
if logprob is not None and math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry and entry["top_logprobs"] is not None:
|
| 155 |
token = entry["token"]
|
| 156 |
top_logprobs = entry["top_logprobs"]
|
| 157 |
# Ensure all values in top_logprobs are floats
|
|
|
|
| 277 |
current_page_output = gr.Number(label="Current Page", interactive=False)
|
| 278 |
|
| 279 |
def update_page(json_input, current_page, action):
|
| 280 |
+
try:
|
| 281 |
+
# Safely get total_pages by trying to process the data
|
| 282 |
+
result = visualize_logprobs(json_input, 100, 0) # Use fixed page size and page 0
|
| 283 |
+
if isinstance(result[0], str) or result[0] is None: # Check if it's an error message or empty figure
|
| 284 |
+
total_pages = 1 # Default to 1 page if no data
|
| 285 |
+
else:
|
| 286 |
+
total_pages = result[5] # Extract total_pages from the result (index 5)
|
| 287 |
+
except Exception as e:
|
| 288 |
+
logger.error("Failed to calculate total pages: %s", str(e))
|
| 289 |
+
total_pages = 1 # Default to 1 page on error
|
| 290 |
+
|
| 291 |
if action == "prev" and current_page > 0:
|
| 292 |
current_page -= 1
|
| 293 |
elif action == "next":
|
|
|
|
| 294 |
if current_page < total_pages - 1:
|
| 295 |
current_page += 1
|
| 296 |
return gr.update(value=current_page), gr.update(value=total_pages)
|