NLarchive commited on
Commit
682c743
·
verified ·
1 Parent(s): 1d203c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +434 -299
app.py CHANGED
@@ -1,11 +1,14 @@
1
- import gradio as gr
 
2
  import requests
3
  import time
4
  import json
5
  import re
6
  import ast
7
- from typing import Dict, List, Union, Optional, Tuple
8
- from urllib.parse import urlparse
 
 
9
 
10
  def parse_huggingface_url(url: str) -> str:
11
  """
@@ -32,16 +35,14 @@ def parse_huggingface_url(url: str) -> str:
32
  if '.hf.space' in base_url:
33
  domain_match = re.search(r'https://([^.]+)\.hf\.space', base_url)
34
  if domain_match:
35
- space_name = domain_match.group(1)
36
- # Convert domain format back to HF spaces format
37
- # Example: {username}-{space-name} -> {username}/{space-name}
38
- parts = space_name.split('-')
39
- if len(parts) >= 2:
40
- username = parts[0]
41
- repo_name = '-'.join(parts[1:])
42
- hf_spaces_url = f"https://huggingface.co/spaces/{username}/{repo_name}"
43
  else:
44
- hf_spaces_url = f"https://huggingface.co/spaces/{space_name}"
45
  else:
46
  hf_spaces_url = "unknown"
47
  else:
@@ -55,14 +56,14 @@ def parse_huggingface_url(url: str) -> str:
55
  # Extract space name and create HF spaces URL
56
  domain_match = re.search(r'https://([^.]+)\.hf\.space', url)
57
  if domain_match:
58
- space_name = domain_match.group(1)
59
- parts = space_name.split('-')
60
- if len(parts) >= 2:
61
- username = parts[0]
62
- repo_name = '-'.join(parts[1:])
63
- hf_spaces_url = f"https://huggingface.co/spaces/{username}/{repo_name}"
64
  else:
65
- hf_spaces_url = f"https://huggingface.co/spaces/{space_name}"
66
  else:
67
  hf_spaces_url = "unknown"
68
 
@@ -73,12 +74,8 @@ def parse_huggingface_url(url: str) -> str:
73
  # Extract username and space name
74
  spaces_match = re.search(r'huggingface\.co/spaces/([^/]+)/([^/?]+)', url)
75
  if spaces_match:
76
- username = spaces_match.group(1)
77
- space_name = spaces_match.group(2)
78
-
79
- # Convert to hf.space domain format
80
- space_domain = f"{username}-{space_name}".lower()
81
- space_url = f"https://{space_domain}.hf.space"
82
  mcp_endpoint = f"{space_url}/gradio_api/mcp/sse"
83
  else:
84
  space_url = "unknown"
@@ -101,11 +98,21 @@ def parse_huggingface_url(url: str) -> str:
101
 
102
  return json.dumps(result, indent=2)
103
 
104
- def parse_huggingface_url_with_summary(url: str) -> tuple:
105
  """Parse URL and return both markdown summary and JSON."""
 
 
 
 
 
 
106
  json_result = parse_huggingface_url(url)
 
 
107
  parsed_info = json.loads(json_result)
108
  md_summary = format_url_summary(parsed_info)
 
 
109
  return md_summary, json_result
110
 
111
  def format_url_summary(parsed_info: dict) -> str:
@@ -117,9 +124,9 @@ def format_url_summary(parsed_info: dict) -> str:
117
  md += "✅ **Status:** Valid URL format\n\n"
118
  md += "## 📋 Extracted URLs\n\n"
119
  if parsed_info['hf_spaces_url'] != "unknown":
120
- md += f"- **HuggingFace Space:** [{parsed_info['hf_spaces_url']}]({parsed_info['hf_spaces_url']})\n"
121
- md += f"- **Space URL:** [{parsed_info['space_url']}]({parsed_info['space_url']})\n"
122
- md += f"- **MCP Endpoint:** [{parsed_info['mcp_endpoint']}]({parsed_info['mcp_endpoint']})\n\n"
123
 
124
  md += "## ⚙️ MCP Client Configuration\n\n"
125
  md += "Copy this configuration for your MCP client:\n\n"
@@ -138,7 +145,7 @@ def format_url_summary(parsed_info: dict) -> str:
138
 
139
  return md
140
 
141
- def check_single_server_health(url: str) -> tuple:
142
  """
143
  Check health of a single MCP server from any URL format.
144
 
@@ -148,6 +155,12 @@ def check_single_server_health(url: str) -> tuple:
148
  Returns:
149
  tuple: (markdown_summary, json_data)
150
  """
 
 
 
 
 
 
151
  parsed_info = json.loads(parse_huggingface_url(url))
152
 
153
  if not parsed_info["is_valid"]:
@@ -169,10 +182,11 @@ def check_single_server_health(url: str) -> tuple:
169
  }
170
 
171
  # Test 1: Check space URL health
 
172
  if parsed_info["space_url"] != "unknown":
173
  start_time = time.time()
174
  try:
175
- response = requests.get(parsed_info["space_url"], timeout=10)
176
  response_time = round((time.time() - start_time) * 1000, 2)
177
 
178
  results["space_health"] = {
@@ -192,6 +206,7 @@ def check_single_server_health(url: str) -> tuple:
192
  }
193
 
194
  # Test 2: Check MCP endpoint health
 
195
  start_time = time.time()
196
  try:
197
  response = requests.get(parsed_info["mcp_endpoint"], timeout=8, stream=True)
@@ -213,6 +228,8 @@ def check_single_server_health(url: str) -> tuple:
213
  "error": str(e)
214
  }
215
 
 
 
216
  # Determine overall status
217
  space_ok = results["space_health"] is None or results["space_health"]["accessible"]
218
  mcp_ok = results["mcp_health"]["accessible"]
@@ -229,6 +246,7 @@ def check_single_server_health(url: str) -> tuple:
229
  # Generate markdown summary
230
  md = format_health_summary(results)
231
 
 
232
  return md, json.dumps(results, indent=2)
233
 
234
  def format_health_summary(results: dict) -> str:
@@ -250,7 +268,7 @@ def format_health_summary(results: dict) -> str:
250
  sh = results["space_health"]
251
  status_icon = "✅" if sh["accessible"] else "❌"
252
  md += f"## 🌐 Space Health {status_icon}\n\n"
253
- md += f"- **URL:** [{sh['url']}]({sh['url']})\n"
254
  md += f"- **Status Code:** {sh.get('status_code', 'N/A')}\n"
255
  md += f"- **Response Time:** {sh['response_time_ms']}ms\n"
256
  if "error" in sh:
@@ -261,7 +279,7 @@ def format_health_summary(results: dict) -> str:
261
  mh = results["mcp_health"]
262
  status_icon = "✅" if mh["accessible"] else "❌"
263
  md += f"## 🔧 MCP Endpoint Health {status_icon}\n\n"
264
- md += f"- **URL:** [{mh['url']}]({mh['url']})\n"
265
  md += f"- **Status Code:** {mh.get('status_code', 'N/A')}\n"
266
  md += f"- **Response Time:** {mh['response_time_ms']}ms\n"
267
  if "error" in mh:
@@ -299,21 +317,13 @@ def extract_functions_from_source(source_code: str) -> List[Tuple[str, str, List
299
 
300
  for node in ast.walk(tree):
301
  if isinstance(node, ast.FunctionDef):
302
- # Skip private functions and common non-tool functions
303
- if node.name.startswith('_') or node.name in ['main', 'launch', 'setup']:
304
- continue
305
-
306
- # Get function name
307
  func_name = node.name
 
308
 
309
- # Get docstring
310
- docstring = ast.get_docstring(node, clean=True) or ""
311
-
312
- # Get parameters (exclude 'self' for methods)
313
  parameters = []
314
  for arg in node.args.args:
315
- if arg.arg != 'self':
316
- parameters.append(arg.arg)
317
 
318
  functions.append((func_name, docstring, parameters))
319
 
@@ -323,7 +333,7 @@ def extract_functions_from_source(source_code: str) -> List[Tuple[str, str, List
323
 
324
  return functions
325
 
326
- def discover_server_tools(url: str) -> tuple:
327
  """
328
  Discover available MCP tools from a server.
329
 
@@ -333,50 +343,49 @@ def discover_server_tools(url: str) -> tuple:
333
  Returns:
334
  tuple: (markdown_summary, json_data)
335
  """
 
 
 
 
 
 
336
  parsed_info = json.loads(parse_huggingface_url(url))
337
 
338
  if not parsed_info["is_valid"]:
339
  result = {
340
  "original_url": url,
341
  "status": "invalid_url",
342
- "error": "Could not parse URL format",
343
- "tools": [],
344
- "tool_count": 0
345
  }
346
- md = "# ❌ Tools Discovery Failed\n\nCould not parse URL format. Please check the URL."
347
  return md, json.dumps(result, indent=2)
348
 
349
  tools = []
350
  discovery_methods = []
351
 
352
  # Method: Analyze app.py source code
 
353
  try:
 
354
  if parsed_info["hf_spaces_url"] != "unknown":
355
- app_py_url = f"{parsed_info['hf_spaces_url']}/raw/main/app.py"
356
- response = requests.get(app_py_url, timeout=8)
357
- discovery_methods.append(f"App.py source code: HTTP {response.status_code}")
358
-
359
  if response.status_code == 200:
360
- source_code = response.text
361
-
362
- # Use AST to extract functions
363
- functions = extract_functions_from_source(source_code)
364
-
365
- if functions:
366
- discovery_methods.append(f"Found {len(functions)} functions in source code")
367
-
368
- for func_name, docstring, parameters in functions:
369
- tool_data = {
370
- "name": func_name,
371
- "description": docstring if docstring else "No description available",
372
- "parameters": parameters,
373
- "parameter_count": len(parameters)
374
- }
375
- tools.append(tool_data)
376
- else:
377
- discovery_methods.append("No functions found in source code")
378
  except Exception as e:
379
- discovery_methods.append(f"Source code analysis failed: {str(e)}")
 
 
380
 
381
  # Prepare result
382
  result = {
@@ -390,12 +399,12 @@ def discover_server_tools(url: str) -> tuple:
390
  }
391
 
392
  if not tools:
393
- result["error"] = "No tools could be discovered"
394
- result["note"] = "Check if the space has functions with proper definitions"
395
 
396
  # Generate markdown summary
397
  md = format_tools_summary(result)
398
 
 
399
  return md, json.dumps(result, indent=2)
400
 
401
  def format_tools_summary(result: dict) -> str:
@@ -405,52 +414,27 @@ def format_tools_summary(result: dict) -> str:
405
  if result["status"] == "success":
406
  md += f"✅ **Status:** Found {result['tool_count']} tools\n\n"
407
 
408
- if result["tools"]:
409
- md += "## 🛠️ Available Tools\n\n"
410
- for i, tool in enumerate(result["tools"], 1):
411
- md += f"### {i}. `{tool['name']}`\n\n"
412
- if tool["description"].strip():
413
- # Clean up description and limit length
414
- desc = tool["description"][:200] + "..." if len(tool["description"]) > 200 else tool["description"]
415
- md += f"**Description:** {desc}\n\n"
416
- else:
417
- md += "**Description:** *No description available*\n\n"
418
-
419
- if tool["parameters"]:
420
- md += f"**Parameters:** `{', '.join(tool['parameters'])}`\n\n"
421
- else:
422
- md += "**Parameters:** *None*\n\n"
423
-
424
- md += "## 📋 Quick Reference\n\n"
425
- md += f"**Tool Names:** {', '.join([f'`{name}`' for name in result['tool_names']])}\n\n"
426
-
427
- md += "## ⚙️ MCP Client Configuration\n\n"
428
- md += "Add this to your MCP client config:\n\n"
429
- md += "```json\n"
430
- md += "{\n"
431
- md += ' "mcpServers": {\n'
432
- md += ' "gradio_server": {\n'
433
- md += f' "url": "{result["mcp_endpoint"]}"\n'
434
- md += ' }\n'
435
- md += ' }\n'
436
- md += "}\n"
437
- md += "```\n"
438
 
439
  else:
440
  md += "❌ **Status:** No tools found\n\n"
441
- if "error" in result:
442
- md += f"**Error:** {result['error']}\n\n"
443
- if "note" in result:
444
- md += f"**Note:** {result['note']}\n\n"
445
 
446
  if result.get("discovery_methods"):
447
- md += "## 🔍 Discovery Methods\n\n"
448
  for method in result["discovery_methods"]:
449
  md += f"- {method}\n"
450
 
451
  return md
452
 
453
- def monitor_multiple_servers(urls_text: str) -> tuple:
454
  """
455
  Monitor health and tools of multiple MCP servers simultaneously.
456
 
@@ -460,45 +444,63 @@ def monitor_multiple_servers(urls_text: str) -> tuple:
460
  Returns:
461
  tuple: (markdown_summary, json_data)
462
  """
 
 
463
  if not urls_text.strip():
464
- result = {"error": "No URLs provided", "servers": []}
465
- md = "# ❌ No URLs Provided\n\nPlease provide at least one URL to monitor."
 
 
 
 
466
  return md, json.dumps(result, indent=2)
467
 
 
468
  urls = [url.strip() for url in urls_text.strip().split('\n') if url.strip()]
469
 
470
  if not urls:
471
- result = {"error": "No valid URLs found", "servers": []}
472
- md = "# ❌ No Valid URLs Found\n\nPlease check your URL format."
 
 
 
 
473
  return md, json.dumps(result, indent=2)
474
 
475
  results = []
476
 
477
- for url in urls:
 
 
 
478
  try:
479
- # Get health info
 
480
  _, health_json = check_single_server_health(url)
481
- health_info = json.loads(health_json)
482
 
483
- # Get tools info
484
  _, tools_json = discover_server_tools(url)
485
- tools_info = json.loads(tools_json)
486
 
487
- # Combine information
488
  server_result = {
489
  "url": url,
490
- "health": health_info,
491
- "tools": tools_info
 
492
  }
493
-
494
  results.append(server_result)
495
 
496
  except Exception as e:
 
497
  results.append({
498
  "url": url,
499
- "error": str(e)
 
 
500
  })
501
 
 
 
502
  final_result = {
503
  "servers": results,
504
  "total_servers": len(urls),
@@ -508,6 +510,7 @@ def monitor_multiple_servers(urls_text: str) -> tuple:
508
  # Generate markdown summary
509
  md = format_multiple_servers_summary(final_result)
510
 
 
511
  return md, json.dumps(final_result, indent=2)
512
 
513
  def format_multiple_servers_summary(result: dict) -> str:
@@ -520,45 +523,18 @@ def format_multiple_servers_summary(result: dict) -> str:
520
  total_tools = 0
521
 
522
  for i, server in enumerate(result["servers"], 1):
523
- md += f"## {i}. Server Report\n\n"
524
- md += f"**URL:** `{server['url']}`\n\n"
525
-
526
- if "error" in server:
527
- md += f"❌ **Error:** {server['error']}\n\n"
528
- continue
529
-
530
- # Health summary
531
- health = server.get("health", {})
532
- overall_status = health.get("overall_status", "unknown")
533
- status_icons = {
534
- "healthy": "🟢",
535
- "mcp_only": "🟡",
536
- "space_only": "🟠",
537
- "unreachable": "🔴"
538
- }
539
- icon = status_icons.get(overall_status, "❓")
540
-
541
- md += f"**Health:** {icon} {overall_status.replace('_', ' ').title()}\n"
542
-
543
- if overall_status == "healthy":
544
  healthy_count += 1
545
 
546
- # MCP endpoint response time
547
- mcp_health = health.get("mcp_health", {})
548
- if mcp_health.get("response_time_ms"):
549
- md += f"**MCP Response Time:** {mcp_health['response_time_ms']}ms\n"
550
-
551
- # Tools summary
552
- tools = server.get("tools", {})
553
- tool_count = tools.get("tool_count", 0)
554
- total_tools += tool_count
555
-
556
- if tool_count > 0:
557
- md += f"**Tools:** {tool_count} available (`{', '.join(tools.get('tool_names', []))}`)\n"
558
- else:
559
- md += f"**Tools:** None found\n"
560
 
561
- md += "\n"
 
 
 
 
562
 
563
  # Overall summary
564
  md += "## 📈 Summary\n\n"
@@ -566,96 +542,211 @@ def format_multiple_servers_summary(result: dict) -> str:
566
  md += f"- **Total Tools Available:** {total_tools}\n"
567
 
568
  if healthy_count > 0:
569
- md += f"- **Success Rate:** {(healthy_count/result['total_servers']*100):.1f}%\n"
570
 
571
  return md
572
 
573
- def validate_mcp_endpoint(url: str) -> tuple:
574
  """
575
- Validate that a URL is a working MCP endpoint.
576
 
577
  Args:
578
- url (str): URL to validate as MCP endpoint
579
 
580
  Returns:
581
  tuple: (markdown_summary, json_data)
582
  """
 
 
 
 
 
 
583
  parsed_info = json.loads(parse_huggingface_url(url))
584
 
585
  validation_result = {
586
  "original_url": url,
587
  "is_valid_mcp": False,
588
- "connection_config": None
 
 
 
 
589
  }
590
 
591
- if not parsed_info["is_valid"]:
592
- validation_result["error"] = "Invalid URL format"
593
- md = "# ❌ MCP Validation Failed\n\nInvalid URL format. Please check the URL."
594
  return md, json.dumps(validation_result, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
 
596
- # Test MCP endpoint
597
  try:
598
- response = requests.get(parsed_info["mcp_endpoint"], timeout=8, stream=True)
 
599
 
 
 
 
 
600
  if response.status_code == 200:
601
- validation_result["is_valid_mcp"] = True
602
- validation_result["connection_config"] = {
603
- "mcp_endpoint": parsed_info["mcp_endpoint"],
604
- "mcp_client_config": {
605
  "mcpServers": {
606
- "gradio_server": {
607
- "url": parsed_info["mcp_endpoint"]
608
  }
609
  }
610
  }
611
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
612
  else:
613
- validation_result["error"] = f"MCP endpoint returned HTTP {response.status_code}"
614
- except Exception as e:
615
- validation_result["error"] = f"Cannot connect to MCP endpoint: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
616
 
617
  # Generate markdown summary
618
  md = format_validation_summary(validation_result)
619
 
 
620
  return md, json.dumps(validation_result, indent=2)
621
 
622
  def format_validation_summary(result: dict) -> str:
623
  """Generate markdown summary for MCP validation results."""
624
  md = f"# ✅ MCP Endpoint Validation\n\n"
625
- md += f"**URL:** `{result['original_url']}`\n\n"
 
 
 
 
626
 
627
  if result["is_valid_mcp"]:
628
- md += "🟢 **Status:** Valid MCP Endpoint\n\n"
629
- md += "This URL is a working MCP endpoint and can be used with MCP clients.\n\n"
630
 
631
- md += "## ⚙️ Configuration\n\n"
632
- md += "Copy this configuration for your MCP client:\n\n"
 
 
 
 
 
 
 
 
 
 
633
  md += "```json\n"
634
- config = result["connection_config"]["mcp_client_config"]
635
- md += json.dumps(config, indent=2)
636
- md += "\n```\n\n"
637
-
638
- md += "## 📁 Configuration File Locations\n\n"
639
- md += "Save the configuration in the appropriate location for your MCP client:\n\n"
640
- md += "### Claude Desktop\n"
641
- md += "- **macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`\n"
642
- md += "- **Windows:** `%APPDATA%\\Claude\\claude_desktop_config.json`\n\n"
643
- md += "### Cursor IDE\n"
644
- md += "- **macOS:** `~/Library/Application Support/Cursor/User/globalStorage/anysphere.cursor-mcp/settings.json`\n"
645
- md += "- **Windows:** `%APPDATA%\\Cursor\\User\\globalStorage\\anysphere.cursor-mcp\\settings.json`\n\n"
646
- md += "### Cline (VS Code Extension)\n"
647
- md += "- Configure through VS Code settings or the Cline extension settings\n\n"
648
- md += "### Other MCP Clients\n"
649
- md += "- Refer to your specific MCP client's documentation for configuration file location\n"
650
 
651
  else:
652
- md += "❌ **Status:** Invalid MCP Endpoint\n\n"
653
- if "error" in result:
654
- md += f"**Error:** {result['error']}\n\n"
655
- md += "This URL is not a valid MCP endpoint. Please check the URL or ensure the server supports MCP.\n"
656
-
 
 
 
 
 
 
 
 
 
 
657
  return md
658
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
659
  # Default URLs for testing
660
  DEFAULT_URLS = """https://huggingface.co/spaces/abidlabs/mcp-tools"""
661
 
@@ -665,125 +756,169 @@ with gr.Blocks(title="🚀 MCP Server Health Monitor") as demo:
665
  gr.Markdown("Monitor and analyze Hugging Face Spaces configured as MCP servers")
666
 
667
  with gr.Tabs():
668
- # Multi-Server Monitor Tab
669
- with gr.Tab("📊 Multi-Server Monitor"):
670
- gr.Markdown("### Monitor multiple MCP servers simultaneously")
671
 
672
- with gr.Column():
673
- multi_input = gr.Textbox(
674
- label="Server URLs (one per line)",
675
- lines=5,
676
- value=DEFAULT_URLS,
677
- placeholder="Enter HF Space URLs, one per line\nExample:\nhttps://huggingface.co/spaces/abidlabs/mcp-tools\nhttps://user-space.hf.space"
678
  )
679
- multi_submit = gr.Button("🔍 Monitor Servers", variant="primary", size="lg")
680
-
681
- with gr.Column():
682
- multi_md = gr.Markdown(label="Summary Report")
683
-
684
- with gr.Column():
685
- multi_json = gr.JSON(label="Detailed JSON Data")
686
 
687
- multi_submit.click(
688
- fn=monitor_multiple_servers,
689
- inputs=multi_input,
690
- outputs=[multi_md, multi_json]
 
 
 
691
  )
692
 
693
- # Single Server Health Tab
694
- with gr.Tab("🏥 Single Server Health"):
695
- gr.Markdown("### Check health of a single MCP server")
696
 
697
- with gr.Column():
698
- single_input = gr.Textbox(
699
- label="Server URL",
700
- value="https://huggingface.co/spaces/abidlabs/mcp-tools",
701
- placeholder="Enter HF Space URL (any format supported)"
702
  )
703
- single_submit = gr.Button("🩺 Check Health", variant="primary", size="lg")
704
-
705
- with gr.Column():
706
- single_md = gr.Markdown(label="Health Summary")
707
-
708
- with gr.Column():
709
- single_json = gr.JSON(label="Detailed JSON Data")
710
 
711
- single_submit.click(
712
- fn=check_single_server_health,
713
- inputs=single_input,
714
- outputs=[single_md, single_json]
 
 
 
715
  )
716
 
717
- # Tools Discovery Tab
718
- with gr.Tab("🔧 Tools Discovery"):
719
  gr.Markdown("### Discover available MCP tools from a server")
720
 
721
- with gr.Column():
722
- tools_input = gr.Textbox(
723
  label="Server URL",
724
- value="https://abidlabs-mcp-tools.hf.space",
725
- placeholder="Enter HF Space URL to analyze for MCP tools"
726
  )
727
- tools_submit = gr.Button("🛠️ Discover Tools", variant="primary", size="lg")
728
-
729
- with gr.Column():
730
- tools_md = gr.Markdown(label="Tools Summary")
731
-
732
- with gr.Column():
733
- tools_json = gr.JSON(label="Detailed JSON Data")
734
 
735
- tools_submit.click(
736
- fn=discover_server_tools,
737
- inputs=tools_input,
738
- outputs=[tools_md, tools_json]
739
  )
740
 
741
- # MCP Validator Tab
742
- with gr.Tab(" MCP Validator"):
743
- gr.Markdown("### Validate if a URL is a working MCP endpoint")
744
 
745
- with gr.Column():
746
- validator_input = gr.Textbox(
747
- label="URL to Validate",
748
- value="https://huggingface.co/spaces/abidlabs/mcp-tools",
749
- placeholder="Enter URL to validate as MCP endpoint"
750
- )
751
- validator_submit = gr.Button(" Validate Endpoint", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
752
 
753
- with gr.Column():
754
- validator_md = gr.Markdown(label="Validation Summary")
 
 
 
 
 
 
 
 
 
 
 
755
 
756
- with gr.Column():
757
- validator_json = gr.JSON(label="Detailed JSON Data")
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
- validator_submit.click(
760
- fn=validate_mcp_endpoint,
761
- inputs=validator_input,
762
- outputs=[validator_md, validator_json]
763
  )
764
 
765
- # URL Parser Tab
766
- with gr.Tab("🔍 URL Parser"):
767
- gr.Markdown("### Parse and analyze HuggingFace URL formats")
768
 
769
- with gr.Column():
770
- parser_input = gr.Textbox(
771
- label="URL to Parse",
772
- value="https://huggingface.co/spaces/abidlabs/mcp-tools",
773
- placeholder="Enter any HF Space URL format to parse"
774
  )
775
- parser_submit = gr.Button("🔍 Parse URL", variant="primary", size="lg")
776
-
777
- with gr.Column():
778
- parser_md = gr.Markdown(label="Parsing Summary")
779
-
780
- with gr.Column():
781
- parser_json = gr.JSON(label="Detailed JSON Data")
782
 
783
- parser_submit.click(
784
- fn=parse_huggingface_url_with_summary,
785
- inputs=parser_input,
786
- outputs=[parser_md, parser_json]
787
  )
788
 
789
  if __name__ == "__main__":
 
1
+ from typing import Dict, List, Union, Optional, Tuple
2
+ from urllib.parse import urlparse
3
  import requests
4
  import time
5
  import json
6
  import re
7
  import ast
8
+ import gradio as gr
9
+
10
+ # Import the scraper function
11
+ from scrape_and_monitor_hf_mcp_servers import scrape_and_monitor_hf_mcp_servers, SORT_OPTIONS
12
 
13
  def parse_huggingface_url(url: str) -> str:
14
  """
 
35
  if '.hf.space' in base_url:
36
  domain_match = re.search(r'https://([^.]+)\.hf\.space', base_url)
37
  if domain_match:
38
+ space_identifier = domain_match.group(1)
39
+ # Try to split username-spacename
40
+ if '-' in space_identifier:
41
+ parts = space_identifier.split('-', 1)
42
+ username, space_name = parts[0], parts[1]
43
+ hf_spaces_url = f"https://huggingface.co/spaces/{username}/{space_name}"
 
 
44
  else:
45
+ hf_spaces_url = "unknown"
46
  else:
47
  hf_spaces_url = "unknown"
48
  else:
 
56
  # Extract space name and create HF spaces URL
57
  domain_match = re.search(r'https://([^.]+)\.hf\.space', url)
58
  if domain_match:
59
+ space_identifier = domain_match.group(1)
60
+ # Try to split username-spacename
61
+ if '-' in space_identifier:
62
+ parts = space_identifier.split('-', 1)
63
+ username, space_name = parts[0], parts[1]
64
+ hf_spaces_url = f"https://huggingface.co/spaces/{username}/{space_name}"
65
  else:
66
+ hf_spaces_url = "unknown"
67
  else:
68
  hf_spaces_url = "unknown"
69
 
 
74
  # Extract username and space name
75
  spaces_match = re.search(r'huggingface\.co/spaces/([^/]+)/([^/?]+)', url)
76
  if spaces_match:
77
+ username, space_name = spaces_match.groups()
78
+ space_url = f"https://{username}-{space_name}.hf.space"
 
 
 
 
79
  mcp_endpoint = f"{space_url}/gradio_api/mcp/sse"
80
  else:
81
  space_url = "unknown"
 
98
 
99
  return json.dumps(result, indent=2)
100
 
101
+ def parse_huggingface_url_with_summary(url: str, progress=gr.Progress()) -> tuple:
102
  """Parse URL and return both markdown summary and JSON."""
103
+ progress(0, desc="🔍 Starting URL parsing...")
104
+
105
+ if not url.strip():
106
+ return "# ❌ No URL Provided\n\nPlease enter a URL to parse.", "{}"
107
+
108
+ progress(0.3, desc="🔍 Analyzing URL format...")
109
  json_result = parse_huggingface_url(url)
110
+
111
+ progress(0.7, desc="🔍 Generating summary...")
112
  parsed_info = json.loads(json_result)
113
  md_summary = format_url_summary(parsed_info)
114
+
115
+ progress(1.0, desc="✅ URL parsing complete!")
116
  return md_summary, json_result
117
 
118
  def format_url_summary(parsed_info: dict) -> str:
 
124
  md += "✅ **Status:** Valid URL format\n\n"
125
  md += "## 📋 Extracted URLs\n\n"
126
  if parsed_info['hf_spaces_url'] != "unknown":
127
+ md += f"- **HF Spaces URL:** `{parsed_info['hf_spaces_url']}`\n"
128
+ md += f"- **Space URL:** `{parsed_info['space_url']}`\n"
129
+ md += f"- **MCP Endpoint:** `{parsed_info['mcp_endpoint']}`\n\n"
130
 
131
  md += "## ⚙️ MCP Client Configuration\n\n"
132
  md += "Copy this configuration for your MCP client:\n\n"
 
145
 
146
  return md
147
 
148
+ def check_single_server_health(url: str, progress=gr.Progress()) -> tuple:
149
  """
150
  Check health of a single MCP server from any URL format.
151
 
 
155
  Returns:
156
  tuple: (markdown_summary, json_data)
157
  """
158
+ progress(0, desc="🏥 Starting health check...")
159
+
160
+ if not url.strip():
161
+ return "# ❌ No URL Provided\n\nPlease enter a URL to check.", "{}"
162
+
163
+ progress(0.1, desc="🏥 Parsing URL...")
164
  parsed_info = json.loads(parse_huggingface_url(url))
165
 
166
  if not parsed_info["is_valid"]:
 
182
  }
183
 
184
  # Test 1: Check space URL health
185
+ progress(0.3, desc="🌐 Checking space accessibility...")
186
  if parsed_info["space_url"] != "unknown":
187
  start_time = time.time()
188
  try:
189
+ response = requests.get(parsed_info["space_url"], timeout=8)
190
  response_time = round((time.time() - start_time) * 1000, 2)
191
 
192
  results["space_health"] = {
 
206
  }
207
 
208
  # Test 2: Check MCP endpoint health
209
+ progress(0.6, desc="🔧 Checking MCP endpoint...")
210
  start_time = time.time()
211
  try:
212
  response = requests.get(parsed_info["mcp_endpoint"], timeout=8, stream=True)
 
228
  "error": str(e)
229
  }
230
 
231
+ progress(0.9, desc="📊 Analyzing results...")
232
+
233
  # Determine overall status
234
  space_ok = results["space_health"] is None or results["space_health"]["accessible"]
235
  mcp_ok = results["mcp_health"]["accessible"]
 
246
  # Generate markdown summary
247
  md = format_health_summary(results)
248
 
249
+ progress(1.0, desc="✅ Health check complete!")
250
  return md, json.dumps(results, indent=2)
251
 
252
  def format_health_summary(results: dict) -> str:
 
268
  sh = results["space_health"]
269
  status_icon = "✅" if sh["accessible"] else "❌"
270
  md += f"## 🌐 Space Health {status_icon}\n\n"
271
+ md += f"- **URL:** `{sh['url']}`\n"
272
  md += f"- **Status Code:** {sh.get('status_code', 'N/A')}\n"
273
  md += f"- **Response Time:** {sh['response_time_ms']}ms\n"
274
  if "error" in sh:
 
279
  mh = results["mcp_health"]
280
  status_icon = "✅" if mh["accessible"] else "❌"
281
  md += f"## 🔧 MCP Endpoint Health {status_icon}\n\n"
282
+ md += f"- **URL:** `{mh['url']}`\n"
283
  md += f"- **Status Code:** {mh.get('status_code', 'N/A')}\n"
284
  md += f"- **Response Time:** {mh['response_time_ms']}ms\n"
285
  if "error" in mh:
 
317
 
318
  for node in ast.walk(tree):
319
  if isinstance(node, ast.FunctionDef):
 
 
 
 
 
320
  func_name = node.name
321
+ docstring = ast.get_docstring(node) or "No docstring available"
322
 
323
+ # Extract parameter names
 
 
 
324
  parameters = []
325
  for arg in node.args.args:
326
+ parameters.append(arg.arg)
 
327
 
328
  functions.append((func_name, docstring, parameters))
329
 
 
333
 
334
  return functions
335
 
336
+ def discover_server_tools(url: str, progress=gr.Progress()) -> tuple:
337
  """
338
  Discover available MCP tools from a server.
339
 
 
343
  Returns:
344
  tuple: (markdown_summary, json_data)
345
  """
346
+ progress(0, desc="🛠️ Starting tools discovery...")
347
+
348
+ if not url.strip():
349
+ return "# ❌ No URL Provided\n\nPlease enter a URL to discover tools.", "{}"
350
+
351
+ progress(0.1, desc="🛠️ Parsing URL...")
352
  parsed_info = json.loads(parse_huggingface_url(url))
353
 
354
  if not parsed_info["is_valid"]:
355
  result = {
356
  "original_url": url,
357
  "status": "invalid_url",
358
+ "error": "Could not parse URL format"
 
 
359
  }
360
+ md = "# ❌ Tools Discovery Failed\n\nCould not parse URL format."
361
  return md, json.dumps(result, indent=2)
362
 
363
  tools = []
364
  discovery_methods = []
365
 
366
  # Method: Analyze app.py source code
367
+ progress(0.3, desc="🛠️ Fetching source code...")
368
  try:
369
+ # Try to get app.py from HF spaces
370
  if parsed_info["hf_spaces_url"] != "unknown":
371
+ app_url = f"{parsed_info['hf_spaces_url']}/raw/main/app.py"
372
+ progress(0.5, desc="🛠️ Analyzing app.py...")
373
+ response = requests.get(app_url, timeout=10)
 
374
  if response.status_code == 200:
375
+ progress(0.7, desc="🛠️ Extracting functions...")
376
+ functions = extract_functions_from_source(response.text)
377
+ for func_name, docstring, params in functions:
378
+ tools.append({
379
+ "name": func_name,
380
+ "description": docstring,
381
+ "parameters": params,
382
+ "source": "app.py_analysis"
383
+ })
384
+ discovery_methods.append("Analyzed app.py source code")
 
 
 
 
 
 
 
 
385
  except Exception as e:
386
+ discovery_methods.append(f"Failed to analyze app.py: {str(e)}")
387
+
388
+ progress(0.9, desc="🛠️ Preparing results...")
389
 
390
  # Prepare result
391
  result = {
 
399
  }
400
 
401
  if not tools:
402
+ result["message"] = "No tools discovered. Server may not expose MCP tools or may be private."
 
403
 
404
  # Generate markdown summary
405
  md = format_tools_summary(result)
406
 
407
+ progress(1.0, desc="✅ Tools discovery complete!")
408
  return md, json.dumps(result, indent=2)
409
 
410
  def format_tools_summary(result: dict) -> str:
 
414
  if result["status"] == "success":
415
  md += f"✅ **Status:** Found {result['tool_count']} tools\n\n"
416
 
417
+ md += "## 🛠️ Available Tools\n\n"
418
+ for i, tool in enumerate(result["tools"], 1):
419
+ md += f"### {i}. {tool['name']}\n"
420
+ md += f"**Description:** {tool['description'][:200]}{'...' if len(tool['description']) > 200 else ''}\n"
421
+ md += f"**Parameters:** {', '.join(tool['parameters'])}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
 
423
  else:
424
  md += "❌ **Status:** No tools found\n\n"
425
+ md += "This could mean:\n"
426
+ md += "- The server doesn't expose MCP tools\n"
427
+ md += "- The server is private or requires authentication\n"
428
+ md += "- The server is not running\n\n"
429
 
430
  if result.get("discovery_methods"):
431
+ md += "## 🔍 Discovery Methods Used\n\n"
432
  for method in result["discovery_methods"]:
433
  md += f"- {method}\n"
434
 
435
  return md
436
 
437
+ def monitor_multiple_servers(urls_text: str, progress=gr.Progress()) -> tuple:
438
  """
439
  Monitor health and tools of multiple MCP servers simultaneously.
440
 
 
444
  Returns:
445
  tuple: (markdown_summary, json_data)
446
  """
447
+ progress(0, desc="📊 Starting multi-server monitoring...")
448
+
449
  if not urls_text.strip():
450
+ result = {
451
+ "error": "No URLs provided",
452
+ "servers": [],
453
+ "total_servers": 0
454
+ }
455
+ md = "# ❌ No URLs Provided\n\nPlease enter URLs to monitor."
456
  return md, json.dumps(result, indent=2)
457
 
458
+ progress(0.1, desc="📊 Parsing URL list...")
459
  urls = [url.strip() for url in urls_text.strip().split('\n') if url.strip()]
460
 
461
  if not urls:
462
+ result = {
463
+ "error": "No valid URLs found",
464
+ "servers": [],
465
+ "total_servers": 0
466
+ }
467
+ md = "# ❌ No Valid URLs\n\nPlease check the URL format."
468
  return md, json.dumps(result, indent=2)
469
 
470
  results = []
471
 
472
+ for i, url in enumerate(urls, 1):
473
+ progress_pct = 0.1 + (i / len(urls)) * 0.8 # Reserve 10% for setup, 10% for final processing
474
+ progress(progress_pct, desc=f"📊 Checking server {i}/{len(urls)}: {url[:50]}...")
475
+
476
  try:
477
+ print(f"🔍 Checking server {i}/{len(urls)}: {url}")
478
+
479
  _, health_json = check_single_server_health(url)
480
+ health_data = json.loads(health_json)
481
 
 
482
  _, tools_json = discover_server_tools(url)
483
+ tools_data = json.loads(tools_json)
484
 
 
485
  server_result = {
486
  "url": url,
487
+ "health": health_data,
488
+ "tools": tools_data,
489
+ "combined_status": health_data.get("overall_status", "unknown")
490
  }
 
491
  results.append(server_result)
492
 
493
  except Exception as e:
494
+ print(f"❌ Error checking {url}: {str(e)}")
495
  results.append({
496
  "url": url,
497
+ "health": {"error": str(e)},
498
+ "tools": {"error": str(e)},
499
+ "combined_status": "error"
500
  })
501
 
502
+ progress(0.95, desc="📊 Generating report...")
503
+
504
  final_result = {
505
  "servers": results,
506
  "total_servers": len(urls),
 
510
  # Generate markdown summary
511
  md = format_multiple_servers_summary(final_result)
512
 
513
+ progress(1.0, desc="✅ Multi-server monitoring complete!")
514
  return md, json.dumps(final_result, indent=2)
515
 
516
  def format_multiple_servers_summary(result: dict) -> str:
 
523
  total_tools = 0
524
 
525
  for i, server in enumerate(result["servers"], 1):
526
+ status = server.get("combined_status", "unknown")
527
+ if status == "healthy":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  healthy_count += 1
529
 
530
+ tools_count = server.get("tools", {}).get("tool_count", 0)
531
+ total_tools += tools_count
 
 
 
 
 
 
 
 
 
 
 
 
532
 
533
+ status_icon = "🟢" if status == "healthy" else "🔴"
534
+ md += f"## {status_icon} Server {i}\n\n"
535
+ md += f"**URL:** `{server['url']}`\n"
536
+ md += f"**Status:** {status.replace('_', ' ').title()}\n"
537
+ md += f"**Tools Found:** {tools_count}\n\n"
538
 
539
  # Overall summary
540
  md += "## 📈 Summary\n\n"
 
542
  md += f"- **Total Tools Available:** {total_tools}\n"
543
 
544
  if healthy_count > 0:
545
+ md += f"- **Success Rate:** {round(healthy_count/result['total_servers']*100, 1)}%\n"
546
 
547
  return md
548
 
549
+ def validate_mcp_endpoint(url: str, progress=gr.Progress()) -> tuple:
550
  """
551
+ Validate that a URL is a working MCP endpoint by checking its schema.
552
 
553
  Args:
554
+ url (str): URL to validate as MCP endpoint (can be space URL or direct MCP endpoint)
555
 
556
  Returns:
557
  tuple: (markdown_summary, json_data)
558
  """
559
+ progress(0, desc="✅ Starting MCP validation...")
560
+
561
+ if not url.strip():
562
+ return "# ❌ No URL Provided\n\nPlease enter a URL to validate.", "{}"
563
+
564
+ progress(0.1, desc="✅ Parsing URL...")
565
  parsed_info = json.loads(parse_huggingface_url(url))
566
 
567
  validation_result = {
568
  "original_url": url,
569
  "is_valid_mcp": False,
570
+ "mcp_endpoint_url": parsed_info.get("mcp_endpoint"),
571
+ "mcp_schema_url": None,
572
+ "connection_config": None,
573
+ "error": None,
574
+ "schema_details": None
575
  }
576
 
577
+ if not parsed_info["is_valid"] or validation_result["mcp_endpoint_url"] == "unknown":
578
+ validation_result["error"] = "Invalid URL format or could not determine MCP endpoint."
579
+ md = f"# ❌ Invalid URL\n\nCould not parse the provided URL format to find an MCP endpoint: `{url}`"
580
  return md, json.dumps(validation_result, indent=2)
581
+
582
+ mcp_endpoint = validation_result["mcp_endpoint_url"]
583
+
584
+ progress(0.3, desc="✅ Building schema URL...")
585
+
586
+ # Construct schema URL from MCP SSE endpoint
587
+ # Example: https://user-space.hf.space/gradio_api/mcp/sse -> https://user-space.hf.space/gradio_api/mcp/schema
588
+ if mcp_endpoint.endswith("/sse"):
589
+ mcp_schema_url = mcp_endpoint[:-4] + "/schema"
590
+ validation_result["mcp_schema_url"] = mcp_schema_url
591
+ else:
592
+ # If it's not an SSE endpoint, we might not be able to reliably find the schema
593
+ validation_result["error"] = f"MCP endpoint does not end with /sse, cannot determine schema URL: {mcp_endpoint}"
594
+ md = f"# ⚠️ MCP Validation Warning\n\nCould not determine schema URL from MCP endpoint: `{mcp_endpoint}`. Validation might be incomplete."
595
+ return md, json.dumps(validation_result, indent=2)
596
+
597
+ print(f"ℹ️ Validating MCP: Original URL='{url}', Endpoint='{mcp_endpoint}', Schema='{mcp_schema_url}'")
598
+
599
+ progress(0.5, desc="✅ Testing schema endpoint...")
600
 
601
+ # Test MCP schema endpoint
602
  try:
603
+ headers = {'User-Agent': 'MCP-Validator/1.0'}
604
+ response = requests.get(mcp_schema_url, timeout=10, headers=headers)
605
 
606
+ validation_result["schema_http_status"] = response.status_code
607
+
608
+ progress(0.7, desc="✅ Processing schema response...")
609
+
610
  if response.status_code == 200:
611
+ try:
612
+ schema_data = response.json()
613
+ validation_result["is_valid_mcp"] = True
614
+ validation_result["connection_config"] = {
615
  "mcpServers": {
616
+ "gradio_server": { # Default key, user might change
617
+ "url": mcp_endpoint
618
  }
619
  }
620
  }
621
+ # Store some basic schema info if available
622
+ if isinstance(schema_data, dict) and "tools" in schema_data:
623
+ validation_result["schema_details"] = {
624
+ "tool_count": len(schema_data["tools"]),
625
+ "tool_names": [tool.get("name") for tool in schema_data["tools"]]
626
+ }
627
+ elif isinstance(schema_data, list): # Sometimes schema is a list of tools
628
+ validation_result["schema_details"] = {
629
+ "tool_count": len(schema_data),
630
+ "tool_names": [tool.get("name") for tool in schema_data]
631
+ }
632
+ else:
633
+ validation_result["schema_details"] = "Schema format not recognized or no tools found."
634
+ print(f"✅ MCP Schema valid for {mcp_schema_url}")
635
+
636
+ except json.JSONDecodeError:
637
+ validation_result["error"] = "Schema endpoint returned 200 OK, but response is not valid JSON."
638
+ print(f"❌ MCP Schema JSON decode error for {mcp_schema_url}")
639
+ except Exception as e_json:
640
+ validation_result["error"] = f"Schema endpoint returned 200 OK, but error processing JSON: {str(e_json)}"
641
+ print(f"❌ MCP Schema JSON processing error for {mcp_schema_url}: {str(e_json)}")
642
+ elif response.status_code == 401 or response.status_code == 403:
643
+ validation_result["error"] = f"Schema endpoint access denied (HTTP {response.status_code}). Private space may require auth token."
644
+ print(f"⚠️ MCP Schema access denied for {mcp_schema_url} (HTTP {response.status_code})")
645
  else:
646
+ validation_result["error"] = f"Schema endpoint returned HTTP {response.status_code}."
647
+ print(f"❌ MCP Schema request failed for {mcp_schema_url} (HTTP {response.status_code})")
648
+
649
+ except requests.exceptions.Timeout:
650
+ validation_result["error"] = f"Request to schema endpoint timed out: {mcp_schema_url}"
651
+ print(f"❌ MCP Schema request timeout for {mcp_schema_url}")
652
+ except requests.exceptions.RequestException as e:
653
+ validation_result["error"] = f"Request to schema endpoint failed: {str(e)}"
654
+ print(f"❌ MCP Schema request failed for {mcp_schema_url}: {str(e)}")
655
+ except Exception as e_gen:
656
+ validation_result["error"] = f"An unexpected error occurred during validation: {str(e_gen)}"
657
+ print(f"❌ Unexpected error during MCP validation for {mcp_schema_url}: {str(e_gen)}")
658
+
659
+ progress(0.9, desc="✅ Generating validation report...")
660
 
661
  # Generate markdown summary
662
  md = format_validation_summary(validation_result)
663
 
664
+ progress(1.0, desc="✅ MCP validation complete!")
665
  return md, json.dumps(validation_result, indent=2)
666
 
667
  def format_validation_summary(result: dict) -> str:
668
  """Generate markdown summary for MCP validation results."""
669
  md = f"# ✅ MCP Endpoint Validation\n\n"
670
+ md += f"**Original URL:** `{result['original_url']}`\n"
671
+ if result.get('mcp_endpoint_url'):
672
+ md += f"**Attempted MCP Endpoint:** `{result['mcp_endpoint_url']}`\n"
673
+ if result.get('mcp_schema_url'):
674
+ md += f"**Attempted MCP Schema URL:** `{result['mcp_schema_url']}`\n\n"
675
 
676
  if result["is_valid_mcp"]:
677
+ md += "## **Status: Valid MCP Endpoint**\n\n"
678
+ md += "The server appears to be a functional MCP endpoint based on schema accessibility.\n\n"
679
 
680
+ if result.get("schema_details"):
681
+ md += "### 📋 Schema Details:\n"
682
+ if isinstance(result["schema_details"], dict):
683
+ md += f"- **Tools Found:** {result['schema_details'].get('tool_count', 'N/A')}\n"
684
+ if result['schema_details'].get('tool_names'):
685
+ md += f"- **Tool Names:** {', '.join(filter(None, result['schema_details']['tool_names']))}\n"
686
+ else:
687
+ md += f"- {result['schema_details']}\n"
688
+ md += "\n"
689
+
690
+ md += "### 🔧 Configuration for MCP Client\n\n"
691
+ md += "You can likely use the following configuration (ensure the key like `gradio_server` is appropriate for your client):\n"
692
  md += "```json\n"
693
+ md += json.dumps(result["connection_config"], indent=2)
694
+ md += "\n```\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
 
696
  else:
697
+ md += "## ❌ **Status: Invalid or Inaccessible MCP Endpoint**\n\n"
698
+ if result.get("error"):
699
+ md += f"**Reason:** {result['error']}\n\n"
700
+ else:
701
+ md += "Could not confirm MCP functionality.\n\n"
702
+
703
+ md += "### 💡 Troubleshooting Tips:\n"
704
+ md += "- Ensure the URL is correct and the Hugging Face Space is running.\n"
705
+ md += "- Verify the Space has `mcp_server=True` in its `launch()` method (if it's a Gradio app).\n"
706
+ md += "- For private Spaces, your MCP client might need an `Authorization: Bearer <HF_TOKEN>` header.\n"
707
+ md += "- Check the Space logs for any errors if you own the Space.\n"
708
+
709
+ if result.get("schema_http_status"):
710
+ md += f"\n**Schema HTTP Status:** {result['schema_http_status']}\n"
711
+
712
  return md
713
 
714
+ def scrape_hf_spaces_with_progress(max_pages: int, sort_by: str, progress=gr.Progress()) -> tuple:
715
+ """Wrapper function for scraping with progress feedback."""
716
+ progress(0, desc="🕷️ Starting HuggingFace scraping...")
717
+
718
+ # Validate sort option
719
+ if sort_by not in SORT_OPTIONS:
720
+ sort_by = "relevance"
721
+
722
+ progress(0.1, desc=f"🕷️ Using sort method: {SORT_OPTIONS[sort_by]['label']}")
723
+
724
+ # Update the scraper to accept a progress callback
725
+ def progress_callback(message):
726
+ # Extract progress percentage from message if possible
727
+ if "page" in message.lower():
728
+ try:
729
+ # Try to extract current page from message
730
+ import re
731
+ match = re.search(r'page (\d+)(/(\d+))?', message.lower())
732
+ if match:
733
+ current = int(match.group(1))
734
+ total = int(match.group(3)) if match.group(3) else max_pages
735
+ progress_pct = min(0.9, (current / total) * 0.8 + 0.1) # Reserve 10% for final processing
736
+ progress(progress_pct, desc=f"🕷️ {message}")
737
+ else:
738
+ progress(None, desc=f"🕷️ {message}")
739
+ except:
740
+ progress(None, desc=f"🕷️ {message}")
741
+ else:
742
+ progress(None, desc=f"🕷️ {message}")
743
+
744
+ # Call the scraper with progress callback and sort option
745
+ md, json_data = scrape_and_monitor_hf_mcp_servers(max_pages, sort_by)
746
+
747
+ progress(1.0, desc="✅ HuggingFace scraping complete!")
748
+ return md, json_data
749
+
750
  # Default URLs for testing
751
  DEFAULT_URLS = """https://huggingface.co/spaces/abidlabs/mcp-tools"""
752
 
 
756
  gr.Markdown("Monitor and analyze Hugging Face Spaces configured as MCP servers")
757
 
758
  with gr.Tabs():
759
+ # Tab 1: Single Server Health Check
760
+ with gr.Tab("🏥 Single Server Health"):
761
+ gr.Markdown("### Check the health of a single MCP server")
762
 
763
+ with gr.Row():
764
+ single_url = gr.Textbox(
765
+ label="Server URL",
766
+ placeholder="Enter any HF Space URL format...",
767
+ value="https://huggingface.co/spaces/abidlabs/mcp-tools"
 
768
  )
769
+ check_health_btn = gr.Button("Check Health", variant="primary")
 
 
 
 
 
 
770
 
771
+ health_output = gr.Markdown(label="Health Report")
772
+ health_json = gr.JSON(label="Detailed Results", visible=False)
773
+
774
+ check_health_btn.click(
775
+ check_single_server_health,
776
+ inputs=[single_url],
777
+ outputs=[health_output, health_json]
778
  )
779
 
780
+ # Tab 2: URL Parser
781
+ with gr.Tab("🔍 URL Parser"):
782
+ gr.Markdown("### Parse and validate HuggingFace Space URLs")
783
 
784
+ with gr.Row():
785
+ parse_url = gr.Textbox(
786
+ label="URL to Parse",
787
+ placeholder="Enter any HF Space URL format...",
788
+ value="https://huggingface.co/spaces/abidlabs/mcp-tools"
789
  )
790
+ parse_btn = gr.Button("Parse URL", variant="primary")
 
 
 
 
 
 
791
 
792
+ parse_output = gr.Markdown(label="Parsing Results")
793
+ parse_json = gr.JSON(label="JSON Output", visible=False)
794
+
795
+ parse_btn.click(
796
+ parse_huggingface_url_with_summary,
797
+ inputs=[parse_url],
798
+ outputs=[parse_output, parse_json]
799
  )
800
 
801
+ # Tab 3: Tools Discovery
802
+ with gr.Tab("🛠️ Tools Discovery"):
803
  gr.Markdown("### Discover available MCP tools from a server")
804
 
805
+ with gr.Row():
806
+ tools_url = gr.Textbox(
807
  label="Server URL",
808
+ placeholder="Enter HF Space URL...",
809
+ value="https://huggingface.co/spaces/abidlabs/mcp-tools"
810
  )
811
+ discover_btn = gr.Button("Discover Tools", variant="primary")
812
+
813
+ tools_output = gr.Markdown(label="Tools Report")
814
+ tools_json = gr.JSON(label="Tools Data", visible=False)
 
 
 
815
 
816
+ discover_btn.click(
817
+ discover_server_tools,
818
+ inputs=[tools_url],
819
+ outputs=[tools_output, tools_json]
820
  )
821
 
822
+ # Tab 4: Multi-Server Monitor
823
+ with gr.Tab("📊 Multi-Server Monitor"):
824
+ gr.Markdown("### Monitor multiple MCP servers simultaneously")
825
 
826
+ multi_urls = gr.Textbox(
827
+ label="Server URLs (one per line)",
828
+ placeholder="Enter multiple URLs, one per line...",
829
+ lines=8,
830
+ value=DEFAULT_URLS
831
+ )
832
+ monitor_btn = gr.Button("Monitor All Servers", variant="primary")
833
+
834
+ multi_output = gr.Markdown(label="Multi-Server Report")
835
+ multi_json = gr.JSON(label="Detailed Results", visible=False)
836
+
837
+ monitor_btn.click(
838
+ monitor_multiple_servers,
839
+ inputs=[multi_urls],
840
+ outputs=[multi_output, multi_json]
841
+ )
842
+
843
+ # Tab 5: HF Spaces Scraper (ENHANCED)
844
+ with gr.Tab("🕷️ HF Spaces Scraper"):
845
+ gr.Markdown("### Discover MCP servers on HuggingFace Spaces")
846
+ gr.Markdown("Scrape HuggingFace to find all spaces tagged with 'mcp-server' using different sorting methods")
847
+
848
+ with gr.Row():
849
+ with gr.Column(scale=1):
850
+ max_pages = gr.Slider(
851
+ minimum=1,
852
+ maximum=50, # Increased from 100 to 50 for reasonable limits
853
+ value=5, # Increased default from 3 to 5
854
+ step=1,
855
+ label="Maximum Pages to Scrape",
856
+ info="Each page contains ~24 spaces. Total pages available: ~48+"
857
+ )
858
+
859
+ # Create dropdown for sort options
860
+ sort_choices = [(SORT_OPTIONS[key]["label"], key) for key in SORT_OPTIONS.keys()]
861
+ sort_dropdown = gr.Dropdown(
862
+ choices=sort_choices,
863
+ value="relevance",
864
+ label="Sort Method",
865
+ info="Choose how to sort the search results"
866
+ )
867
 
868
+ with gr.Column(scale=1):
869
+ scrape_btn = gr.Button("🕷️ Scrape HF Spaces", variant="primary", size="lg")
870
+
871
+ # Add info about sort methods and pagination
872
+ with gr.Accordion("ℹ️ Scraping Information", open=False):
873
+ gr.Markdown("""
874
+ **Sort Methods Explained:**
875
+
876
+ - **🎯 Relevance (Default):** HuggingFace's default relevance ranking
877
+ - **📈 Trending:** Currently popular and active spaces
878
+ - **❤️ Most Likes:** Spaces with the highest community appreciation
879
+ - **🆕 Recently Created:** Newest spaces, great for discovering latest tools
880
+ - **🔄 Recently Updated:** Recently modified spaces, likely actively maintained
881
 
882
+ **Pagination Information:**
883
+ - Each page contains approximately 24 spaces
884
+ - Current total: 48+ pages available (and growing!)
885
+ - The scraper will automatically stop if it encounters 3 consecutive empty pages
886
+ - Different sort methods may reveal different sets of MCP servers
887
+
888
+ **Tips:**
889
+ - Start with 5-10 pages for a good sample
890
+ - Try multiple sort methods for comprehensive discovery
891
+ - Higher page counts will take longer but find more servers
892
+ """)
893
+
894
+ scrape_output = gr.Markdown(label="Scraping Results")
895
+ scrape_json = gr.JSON(label="Scraped Data", visible=False)
896
 
897
+ scrape_btn.click(
898
+ scrape_hf_spaces_with_progress,
899
+ inputs=[max_pages, sort_dropdown],
900
+ outputs=[scrape_output, scrape_json]
901
  )
902
 
903
+ # Tab 6: MCP Validator
904
+ with gr.Tab(" MCP Validator"):
905
+ gr.Markdown("### Validate MCP endpoint connectivity")
906
 
907
+ with gr.Row():
908
+ validate_url = gr.Textbox(
909
+ label="URL to Validate",
910
+ placeholder="Enter URL to validate as MCP endpoint...",
911
+ value="https://abidlabs-mcp-tools.hf.space/gradio_api/mcp/sse"
912
  )
913
+ validate_btn = gr.Button("Validate Endpoint", variant="primary")
914
+
915
+ validate_output = gr.Markdown(label="Validation Results")
916
+ validate_json = gr.JSON(label="Validation Data", visible=False)
 
 
 
917
 
918
+ validate_btn.click(
919
+ validate_mcp_endpoint,
920
+ inputs=[validate_url],
921
+ outputs=[validate_output, validate_json]
922
  )
923
 
924
  if __name__ == "__main__":