Tameem7 commited on
Commit
e326dc2
·
1 Parent(s): be6e2a8

fix threading issue

Browse files
Files changed (1) hide show
  1. app.py +8 -41
app.py CHANGED
@@ -145,23 +145,14 @@ def classify_prompt(prompt: str) -> tuple[str, str]:
145
  return result_text, label
146
 
147
 
148
- def evaluate_test_set(max_samples: int = None, progress=gr.Progress()) -> str:
149
- """Evaluate the model on the test dataset and return metrics.
150
-
151
- Args:
152
- max_samples: Maximum number of samples to evaluate. If None, evaluates on full dataset.
153
- """
154
  if trainer is None or test_tokenized is None:
155
  return "⚠️ Error: Model or test dataset not loaded."
156
 
157
- # Limit dataset size if specified
158
  eval_dataset = test_tokenized
159
- if max_samples is not None and max_samples > 0:
160
- max_samples = min(max_samples, len(test_tokenized))
161
- eval_dataset = test_tokenized.select(range(max_samples))
162
- print(f"Evaluating on {max_samples} samples (out of {len(test_tokenized)} total)")
163
- else:
164
- print(f"Evaluating on full test set ({len(test_tokenized)} samples)")
165
 
166
  # Ensure tqdm is enabled for progress tracking
167
  trainer.args.disable_tqdm = False
@@ -209,12 +200,7 @@ def evaluate_test_set(max_samples: int = None, progress=gr.Progress()) -> str:
209
 
210
  # Format results
211
  output = "## Test Set Evaluation Results\n\n"
212
-
213
- # Show dataset size info
214
- if max_samples is not None and max_samples < len(test_tokenized):
215
- output += f"**Note:** Evaluated on {max_samples} samples (out of {len(test_tokenized)} total)\n\n"
216
- else:
217
- output += f"**Note:** Evaluated on full test set ({len(test_tokenized)} samples)\n\n"
218
 
219
  # Main metrics
220
  output += "### Classification Metrics\n\n"
@@ -418,16 +404,8 @@ with app:
418
 
419
  # Tab 2: Test Set Evaluation
420
  with gr.Tab("📊 Evaluate Test Set"):
421
- gr.Markdown("### Evaluate the model on the test dataset")
422
  gr.Markdown("**Note:** Progress percentage will be shown during evaluation.")
423
- gr.Markdown("**Tip:** Limit the number of samples for faster evaluation during testing.")
424
-
425
- max_samples_input = gr.Textbox(
426
- label="Maximum samples to evaluate (leave empty for full dataset)",
427
- value="",
428
- placeholder="Enter a number or leave empty for full dataset",
429
- info="Set a limit to evaluate faster. Leave empty to evaluate on the full dataset."
430
- )
431
 
432
  eval_btn = gr.Button(
433
  "Run Evaluation",
@@ -436,19 +414,9 @@ with app:
436
  )
437
  eval_output = gr.Markdown(label="Evaluation Results")
438
 
439
- def run_evaluation(max_samples_str):
440
  """Run evaluation and return result."""
441
- # Handle empty string, None, or invalid values as "use full dataset"
442
- max_samples_int = None
443
- if max_samples_str and max_samples_str.strip():
444
- try:
445
- max_samples_int = int(max_samples_str.strip())
446
- if max_samples_int < 1:
447
- max_samples_int = None
448
- except (ValueError, TypeError):
449
- max_samples_int = None
450
-
451
- result = evaluate_test_set(max_samples=max_samples_int)
452
  return result
453
 
454
  def enable_button():
@@ -460,7 +428,6 @@ with app:
460
  outputs=eval_btn
461
  ).then(
462
  fn=run_evaluation,
463
- inputs=max_samples_input,
464
  outputs=eval_output
465
  ).then(
466
  fn=enable_button,
 
145
  return result_text, label
146
 
147
 
148
+ def evaluate_test_set(progress=gr.Progress()) -> str:
149
+ """Evaluate the model on the test dataset and return metrics."""
 
 
 
 
150
  if trainer is None or test_tokenized is None:
151
  return "⚠️ Error: Model or test dataset not loaded."
152
 
153
+ # Use full test dataset
154
  eval_dataset = test_tokenized
155
+ print(f"Evaluating on full test set ({len(test_tokenized)} samples)")
 
 
 
 
 
156
 
157
  # Ensure tqdm is enabled for progress tracking
158
  trainer.args.disable_tqdm = False
 
200
 
201
  # Format results
202
  output = "## Test Set Evaluation Results\n\n"
203
+ output += f"**Note:** Evaluated on full test set ({len(test_tokenized)} samples)\n\n"
 
 
 
 
 
204
 
205
  # Main metrics
206
  output += "### Classification Metrics\n\n"
 
404
 
405
  # Tab 2: Test Set Evaluation
406
  with gr.Tab("📊 Evaluate Test Set"):
407
+ gr.Markdown("### Evaluate the model on the full test dataset")
408
  gr.Markdown("**Note:** Progress percentage will be shown during evaluation.")
 
 
 
 
 
 
 
 
409
 
410
  eval_btn = gr.Button(
411
  "Run Evaluation",
 
414
  )
415
  eval_output = gr.Markdown(label="Evaluation Results")
416
 
417
+ def run_evaluation():
418
  """Run evaluation and return result."""
419
+ result = evaluate_test_set()
 
 
 
 
 
 
 
 
 
 
420
  return result
421
 
422
  def enable_button():
 
428
  outputs=eval_btn
429
  ).then(
430
  fn=run_evaluation,
 
431
  outputs=eval_output
432
  ).then(
433
  fn=enable_button,