Spaces:
Running
on
Zero
Running
on
Zero
File size: 8,629 Bytes
752474d 55d584b 752474d 55d584b 752474d 55d584b a0dbf73 752474d 5bcb8ba 752474d 5bcb8ba 752474d 5bcb8ba 752474d 5bcb8ba 752474d 5bcb8ba a0dbf73 752474d 55d584b bfcb0d4 752474d 55d584b 752474d 55d584b 752474d 55d584b 752474d 55d584b 752474d 55d584b 752474d 55d584b 752474d 5bcb8ba 55d584b 752474d 55d584b 752474d 55d584b 752474d 5bcb8ba 752474d 55d584b 752474d 55d584b 752474d 55d584b 752474d 55d584b 752474d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
#!/usr/bin/env python3
"""
Gradio interface for Warbler CDA on HuggingFace Spaces.
Provides a web UI for the FractalStat RAG system with GPU acceleration.
"""
import gradio as gr
import time
# Import Warbler CDA components
from warbler_cda.retrieval_api import RetrievalAPI, RetrievalQuery, RetrievalMode
from warbler_cda.embeddings import EmbeddingProviderFactory
from warbler_cda.fractalstat_rag_bridge import FractalStatRAGBridge
from warbler_cda.semantic_anchors import SemanticAnchorGraph
from warbler_cda.pack_loader import PackLoader
# Initialize the system
print("π Initializing Warbler CDA...")
# Create embedding provider (will use sentence-transformers with GPU if available)
embedding_provider = EmbeddingProviderFactory.get_default_provider()
print(f"β
Embedding provider: {embedding_provider.get_provider_info()['provider_id']}")
# Create semantic anchors (required by RetrievalAPI)
semantic_anchors = SemanticAnchorGraph(embedding_provider=embedding_provider)
print("β
Semantic anchors initialized")
# Create FractalStat bridge
fractalstat_bridge = FractalStatRAGBridge()
print("β
FractalStat bridge initialized")
# Create RetrievalAPI with proper components
api = RetrievalAPI(
semantic_anchors=semantic_anchors,
embedding_provider=embedding_provider,
fractalstat_bridge=fractalstat_bridge,
config={"enable_fractalstat_hybrid": True}
)
print("β
RetrievalAPI initialized")
# Load packs
print("π Loading Warbler packs...")
pack_loader = PackLoader()
documents = pack_loader.discover_documents()
# If no packs found, try to download them
if len(documents) == 0:
print("β οΈ No packs found locally. Attempting to download from HuggingFace...")
try:
from warbler_cda.utils.hf_warbler_ingest import HFWarblerIngestor
ingestor = HFWarblerIngestor(packs_dir=pack_loader.packs_dir, verbose=True)
# Download a small demo dataset for deployment
print("π¦ Downloading warbler-pack-hf-prompt-report...")
success = ingestor.ingest_dataset("prompt-report")
if success:
# Reload after download
documents = pack_loader.discover_documents()
print(f"β
Downloaded {len(documents)} documents")
else:
print("β Failed to download dataset, using sample documents...")
documents = []
except Exception as e:
print(f"β οΈ Could not download packs: {e}")
print("Using sample documents instead...")
documents = []
if len(documents) == 0:
# Fallback to sample documents
sample_docs = [
{"id": "sample1", "content": "FractalStat is an 8-dimensional addressing system for intelligent retrieval.", "metadata": {}},
{"id": "sample2", "content": "Semantic search finds documents by meaning, not just keywords.", "metadata": {}},
{"id": "sample3", "content": "Bob the Skeptic validates results to prevent bias and hallucinations.", "metadata": {}},
]
for doc in sample_docs:
api.add_document(doc["id"], doc["content"], doc["metadata"])
print(f"β
Loaded {len(sample_docs)} sample documents")
else:
print(f"β
Found {len(documents)} documents")
# Ingest documents
for doc in documents:
api.add_document(
doc_id=doc["id"],
content=doc["content"],
metadata=doc.get("metadata", {})
)
print(f"π Warbler CDA ready with {api.get_context_store_size()} documents!")
def query_warbler(query_text: str, max_results: int = 5, use_hybrid: bool = True) -> str:
"""Query the Warbler CDA system."""
if not query_text.strip():
return "Please enter a query."
start_time = time.time()
# Create query
query = RetrievalQuery(
query_id=f"gradio_{int(time.time())}",
mode=RetrievalMode.SEMANTIC_SIMILARITY,
semantic_query=query_text,
max_results=max_results,
fractalstat_hybrid=use_hybrid
)
# Execute query
assembly = api.retrieve_context(query)
elapsed_ms = (time.time() - start_time) * 1000
# Format results
output = f"## Query Results\n\n"
output += f"**Query:** {query_text}\n\n"
output += f"**Found:** {len(assembly.results)} results in {elapsed_ms:.0f}ms\n\n"
output += f"**Quality Score:** {assembly.assembly_quality:.3f}\n\n"
if assembly.results:
output += "### Top Results\n\n"
for i, result in enumerate(assembly.results[:max_results], 1):
output += f"**{i}. Score: {result.relevance_score:.3f}**\n\n"
output += f"{result.content[:300]}...\n\n"
if use_hybrid:
output += f"- Semantic: {result.semantic_similarity:.3f}\n"
output += f"- FractalStat: {result.fractalstat_resonance:.3f}\n\n"
output += "---\n\n"
else:
output += "No results found.\n"
return output
def get_system_stats() -> str:
"""Get system statistics."""
metrics = api.get_retrieval_metrics()
output = "## System Statistics\n\n"
output += f"**Total Documents:** {api.get_context_store_size():,}\n\n"
output += f"**Total Queries:** {metrics['retrieval_metrics']['total_queries']}\n\n"
output += f"**Cache Hit Rate:** {metrics['cache_performance']['hit_rate']:.1%}\n\n"
output += f"**Avg Quality:** {metrics['system_health']['average_quality']:.3f}\n\n"
return output
# Create Gradio interface
with gr.Blocks(title="Warbler CDA - FractalStat RAG") as demo:
gr.Markdown("""
# π¦ Warbler CDA - FractalStat RAG System
Semantic retrieval with 8D FractalStat multi-dimensional addressing.
**Features:**
- 2.6M+ documents from arXiv, education, fiction, and more
- Hybrid semantic + FractalStat scoring
- Bob the Skeptic bias detection
- Narrative coherence analysis
""")
with gr.Tab("Query"):
with gr.Row():
with gr.Column():
query_input = gr.Textbox(
label="Query",
placeholder="Enter your search query...",
lines=2
)
max_results = gr.Slider(
minimum=1,
maximum=20,
value=5,
step=1,
label="Max Results"
)
use_hybrid = gr.Checkbox(
label="Enable FractalStat Hybrid Scoring",
value=True
)
query_btn = gr.Button("Search", variant="primary")
with gr.Column():
results_output = gr.Markdown(label="Results")
query_btn.click( # pylint: disable=E1101
fn=query_warbler,
inputs=[query_input, max_results, use_hybrid],
outputs=results_output
)
gr.Examples(
examples=[
["hello world", 5, True],
["rotation dynamics of Saturn's moons", 5, True],
["anything about machine learning", 10, False],
],
inputs=[query_input, max_results, use_hybrid]
)
with gr.Tab("System Stats"):
stats_output = gr.Markdown()
stats_btn = gr.Button("Refresh Stats")
stats_btn.click(fn=get_system_stats, outputs=stats_output) # pylint: disable=E1101
demo.load(fn=get_system_stats, outputs=stats_output) # pylint: disable=E1101
with gr.Tab("About"):
gr.Markdown("""
## About Warbler CDA
Warbler CDA is a production-ready RAG system featuring:
- **8D FractalStat Addressing**: Multi-dimensional intelligence for superior retrieval
- **Semantic Anchors**: Persistent memory with provenance tracking
- **Bob the Skeptic**: Automatic bias detection and validation
- **Narrative Coherence**: Quality analysis beyond simple similarity
### Performance
- 84% test coverage with 587 passing tests
- 9-28s query response time
- 0.88 average relevance score
- 75-83% narrative coherence
### Links
- [Source Code](https://gitlab.com/tiny-walnut-games/the-seed)
- [Documentation](https://gitlab.com/tiny-walnut-games/the-seed/-/tree/main/warbler-cda-package)
- [Performance Report](https://gitlab.com/tiny-walnut-games/the-seed/-/blob/main/warbler-cda-package/WARBLER_CDA_PERFORMANCE_REPORT.md)
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|