tinyllama-chat

Running

App Files Files Community

NeoPy commited on 5 days ago

Commit

f7da735

verified ·

1 Parent(s): cb15ef3

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -97

app.py CHANGED Viewed

@@ -1,44 +1,110 @@
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 from threading import Thread
 from sentence_transformers import SentenceTransformer, util
 # --- CONFIGURATION ---
-# Loading the tokenizer and model from Hugging Face's model hub.
 print("Loading TinyLlama...")
 tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
-# Loading the Embedding model for RAG
 print("Loading Embedding Model...")
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
-# using CUDA for an optimal experience
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = model.to(device)
 # --- GLOBAL STATE FOR RAG ---
 KNOWLEDGE_CHUNKS = []
 KNOWLEDGE_EMBEDDINGS = None
 RAG_ENABLED = False
-# System content - Define the assistant's personality and capabilities
 DEFAULT_SYSTEM_PROMPT = """You are TinyLlama, a friendly and helpful AI assistant.
-You are based on the TinyLlama-1.1B-Chat model and you excel at providing clear,
-concise answers to various questions."""
 SYSTEM_CONTENT = DEFAULT_SYSTEM_PROMPT
-# Defining a custom stopping criteria class for the model's text generation.
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        stop_ids = [2]  # IDs of tokens where the generation should stop.
         for stop_id in stop_ids:
-            if input_ids[0][-1] == stop_id:  # Checking if the last generated token is a stop token.
                 return True
         return False
 # --- RAG FUNCTIONS ---
 def process_knowledge_base(text_content):
@@ -49,15 +115,14 @@ def process_knowledge_base(text_content):
         RAG_ENABLED = False
         return "Knowledge base cleared.", False
-    # 1. Simple Chunking (Split by paragraphs or roughly by max characters)
-    # For a real app, use a proper text splitter (like RecursiveCharacterTextSplitter)
     raw_chunks = text_content.split('\n\n')
     chunks = [chunk.strip() for chunk in raw_chunks if len(chunk.strip()) > 20]
     if not chunks:
         return "No valid text found to process.", False
-    # 2. Create Embeddings
     try:
         embeddings = embedder.encode(chunks, convert_to_tensor=True)
@@ -65,27 +130,21 @@ def process_knowledge_base(text_content):
         KNOWLEDGE_EMBEDDINGS = embeddings
         RAG_ENABLED = True
-        return f"Successfully indexed {len(chunks)} text chunks.", True
     except Exception as e:
         return f"Error creating embeddings: {str(e)}", False
 def retrieve_context(query, top_k=3):
-    """Finds relevant chunks for the query."""
     if not RAG_ENABLED or KNOWLEDGE_EMBEDDINGS is None:
         return ""
-    # Encode user query
     query_embedding = embedder.encode(query, convert_to_tensor=True)
-    # Compute Cosine Similarity
     cos_scores = util.cos_sim(query_embedding, KNOWLEDGE_EMBEDDINGS)[0]
-    # Get top_k results
     top_results = torch.topk(cos_scores, k=min(top_k, len(KNOWLEDGE_CHUNKS)))
     retrieved_text = []
     for score, idx in zip(top_results[0], top_results[1]):
-        if score > 0.3: # Threshold to ensure relevance
             retrieved_text.append(KNOWLEDGE_CHUNKS[idx])
     return "\n\n".join(retrieved_text)
@@ -93,30 +152,23 @@ def retrieve_context(query, top_k=3):
 # --- PREDICTION FUNCTION ---
 def predict(message, history, system_content=None):
-    # Use custom system content if provided, otherwise use default
     current_system_content = system_content if system_content else SYSTEM_CONTENT
-    # --- RAG LOGIC ---
     context = ""
     if RAG_ENABLED:
         retrieved = retrieve_context(message)
         if retrieved:
-            context = f"\nUse the following context to answer the user's question:\n{retrieved}\n"
-            # We modify the prompt to include the context
             message = f"{context}\nQuestion: {message}"
-    # -----------------
     history_transformer_format = history + [[message, ""]]
     stop = StopOnTokens()
-    # Formatting the input for the model with system content
     system_prompt = f"<|system|>\n{current_system_content}</s>"
     conversation = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
                         for item in history_transformer_format])
     messages = system_prompt + conversation
-    # Tokenize
     model_inputs = tokenizer([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
@@ -132,103 +184,104 @@ def predict(message, history, system_content=None):
         stopping_criteria=StoppingCriteriaList([stop])
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()  # Starting the generation in a separate thread.
     partial_message = ""
     for new_token in streamer:
         partial_message += new_token
-        if '</s>' in partial_message:  # Breaking the loop if the stop token is generated.
             break
         yield partial_message
-# --- UI HANDLERS ---
-def update_system_content(system_content):
-    global SYSTEM_CONTENT
-    if system_content.strip():
-        SYSTEM_CONTENT = system_content
-        return "System content updated successfully!"
-    else:
-        return "Please enter valid system content."
-def reset_system_content():
-    global SYSTEM_CONTENT
-    SYSTEM_CONTENT = DEFAULT_SYSTEM_PROMPT
-    return DEFAULT_SYSTEM_PROMPT, "System content reset to default!"
 # --- GRADIO INTERFACE ---
-with gr.Blocks(title="TinyLlama ChatBot + RAG") as demo:
-    gr.Markdown("# 🦙 TinyLlama RAG ChatBot")
-    gr.Markdown("Chat with TinyLlama-1.1B. Use the **RAG settings** to add your own context (Knowledge Base).")
     with gr.Row():
         # Left Column: Chat
         with gr.Column(scale=2):
-            gr.Markdown("### 💬 Chat Interface")
             chat_interface = gr.ChatInterface(
                 predict,
-                examples=['How to cook a fish?', 'Who is the president of US now?', 'Explain quantum computing simply'],
-                cache_examples=False
             )
-        # Right Column: Settings & RAG
         with gr.Column(scale=1):
-            # RAG Section
-            with gr.Accordion("📚 RAG / Knowledge Base", open=True):
-                gr.Markdown("Paste text below to give the AI specific knowledge.")
                 kb_input = gr.Textbox(
-                    label="Reference Text",
-                    lines=8,
-                    placeholder="Paste an article, email, or documentation here...",
-                    info="The AI will search this text to answer your questions."
                 )
                 with gr.Row():
-                    process_btn = gr.Button("Build Knowledge Base", variant="primary")
                     rag_status = gr.Checkbox(label="RAG Active", interactive=False, value=False)
-                kb_output = gr.Textbox(label="Status", interactive=False)
-            # System Prompt Section
-            with gr.Accordion("⚙️ System Personality", open=False):
-                system_content_input = gr.Textbox(
-                    label="System Content",
-                    value=SYSTEM_CONTENT,
-                    lines=4
-                )
-                with gr.Row():
-                    update_btn = gr.Button("Update System")
-                    reset_btn = gr.Button("Reset")
-                system_status = gr.Textbox(label="Status", interactive=False)
-            gr.Markdown("### ℹ️ About")
-            gr.Markdown("""
-            **Model:** TinyLlama-1.1B
-            **RAG:** sentence-transformers (all-MiniLM-L6-v2)
-            **How to use RAG:**
-            1. Paste text into 'Reference Text'.
-            2. Click 'Build Knowledge Base'.
-            3. Ask questions about that text.
-            """)
-    # Event Handlers
-    process_btn.click(
-        process_knowledge_base,
-        inputs=[kb_input],
-        outputs=[kb_output, rag_status]
     )
-    update_btn.click(
-        update_system_content,
-        inputs=[system_content_input],
-        outputs=[system_status]
     )
-    reset_btn.click(
-        reset_system_content,
-        outputs=[system_content_input, system_status]
     )
 if __name__ == "__main__":
-    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    StoppingCriteria,
+    StoppingCriteriaList,
+    TextIteratorStreamer,
+    BlipProcessor,
+    BlipForConditionalGeneration
+)
 from threading import Thread
 from sentence_transformers import SentenceTransformer, util
+import requests
+from bs4 import BeautifulSoup
+from PIL import Image
 # --- CONFIGURATION ---
+# 1. LLM: TinyLlama
 print("Loading TinyLlama...")
 tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
+# 2. Embedding Model: For Text RAG
 print("Loading Embedding Model...")
 embedder = SentenceTransformer('all-MiniLM-L6-v2')
+# 3. Vision Model: BLIP (for Image to Text)
+# We use this to convert images into text descriptions so TinyLlama can "read" them.
+print("Loading Vision Model (BLIP)...")
+vision_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+vision_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+# Device Setup
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = model.to(device)
+vision_model = vision_model.to(device)
 # --- GLOBAL STATE FOR RAG ---
 KNOWLEDGE_CHUNKS = []
 KNOWLEDGE_EMBEDDINGS = None
 RAG_ENABLED = False
+# System content
 DEFAULT_SYSTEM_PROMPT = """You are TinyLlama, a friendly and helpful AI assistant.
+You are based on the TinyLlama-1.1B-Chat model."""
 SYSTEM_CONTENT = DEFAULT_SYSTEM_PROMPT
 class StopOnTokens(StoppingCriteria):
     def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
+        stop_ids = [2]
         for stop_id in stop_ids:
+            if input_ids[0][-1] == stop_id:
                 return True
         return False
+# --- NEW TOOL FUNCTIONS ---
+def scrape_wikifandom(url):
+    """Scrapes text content from a WikiFandom page."""
+    if "fandom.com" not in url:
+        return "Error: Please provide a valid URL containing 'fandom.com'"
+    try:
+        headers = {'User-Agent': 'Mozilla/5.0'}
+        response = requests.get(url, headers=headers)
+        if response.status_code != 200:
+            return f"Error: Failed to fetch page (Status {response.status_code})"
+        soup = BeautifulSoup(response.content, 'html.parser')
+        # Fandom usually puts the main article text in 'mw-parser-output'
+        content_div = soup.find('div', class_='mw-parser-output')
+        if not content_div:
+            # Fallback for some wiki layouts
+            content_div = soup.find('div', id='content')
+        if not content_div:
+            return "Error: Could not parse content from this Fandom page."
+        # Extract paragraphs
+        paragraphs = content_div.find_all('p')
+        text_content = "\n\n".join([p.get_text() for p in paragraphs if len(p.get_text()) > 50])
+        return text_content
+    except Exception as e:
+        return f"Error scraping URL: {str(e)}"
+def process_image_to_text(image):
+    """Generates a caption for an image using BLIP."""
+    if image is None:
+        return ""
+    try:
+        # Prepare image
+        inputs = vision_processor(image, return_tensors="pt").to(device)
+        # Generate caption
+        out = vision_model.generate(**inputs, max_new_tokens=50)
+        caption = vision_processor.decode(out[0], skip_special_tokens=True)
+        return f"Image Context: The user uploaded an image that shows {caption}."
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
 # --- RAG FUNCTIONS ---
 def process_knowledge_base(text_content):
         RAG_ENABLED = False
         return "Knowledge base cleared.", False
+    # Chunking
     raw_chunks = text_content.split('\n\n')
     chunks = [chunk.strip() for chunk in raw_chunks if len(chunk.strip()) > 20]
     if not chunks:
         return "No valid text found to process.", False
+    # Create Embeddings
     try:
         embeddings = embedder.encode(chunks, convert_to_tensor=True)
         KNOWLEDGE_EMBEDDINGS = embeddings
         RAG_ENABLED = True
+        return f"Indexed {len(chunks)} chunks. RAG Ready.", True
     except Exception as e:
         return f"Error creating embeddings: {str(e)}", False
 def retrieve_context(query, top_k=3):
     if not RAG_ENABLED or KNOWLEDGE_EMBEDDINGS is None:
         return ""
     query_embedding = embedder.encode(query, convert_to_tensor=True)
     cos_scores = util.cos_sim(query_embedding, KNOWLEDGE_EMBEDDINGS)[0]
     top_results = torch.topk(cos_scores, k=min(top_k, len(KNOWLEDGE_CHUNKS)))
     retrieved_text = []
     for score, idx in zip(top_results[0], top_results[1]):
+        if score > 0.25: # Slightly lower threshold for broader context
             retrieved_text.append(KNOWLEDGE_CHUNKS[idx])
     return "\n\n".join(retrieved_text)
 # --- PREDICTION FUNCTION ---
 def predict(message, history, system_content=None):
     current_system_content = system_content if system_content else SYSTEM_CONTENT
     context = ""
     if RAG_ENABLED:
         retrieved = retrieve_context(message)
         if retrieved:
+            context = f"\nUse this context to answer:\n{retrieved}\n"
             message = f"{context}\nQuestion: {message}"
     history_transformer_format = history + [[message, ""]]
     stop = StopOnTokens()
     system_prompt = f"<|system|>\n{current_system_content}</s>"
     conversation = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
                         for item in history_transformer_format])
     messages = system_prompt + conversation
     model_inputs = tokenizer([messages], return_tensors="pt").to(device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
         stopping_criteria=StoppingCriteriaList([stop])
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
     partial_message = ""
     for new_token in streamer:
         partial_message += new_token
+        if '</s>' in partial_message:
             break
         yield partial_message
+# --- UI LOGIC ---
+def add_fandom_content(url, current_text):
+    """Fetches fandom content and appends it to the textbox."""
+    scraped_text = scrape_wikifandom(url)
+    if scraped_text.startswith("Error"):
+        return current_text, scraped_text # Return error in status
+    new_text = (current_text + "\n\n" + scraped_text).strip()
+    return new_text, "Fandom content added to Knowledge Base text area."
+def add_image_content(image, current_text):
+    """Analyzes image and appends description to textbox."""
+    description = process_image_to_text(image)
+    if description.startswith("Error"):
+        return current_text, description
+    new_text = (current_text + "\n\n" + description).strip()
+    return new_text, "Image analysis added. RAG now knows what this image looks like."
 # --- GRADIO INTERFACE ---
+with gr.Blocks(title="TinyLlama Multi-Source RAG") as demo:
+    gr.Markdown("# 🦙 TinyLlama RAG (WikiFandom + Images)")
+    gr.Markdown("Chat with TinyLlama. Build a knowledge base from text, WikiFandom URLs, or Images.")
     with gr.Row():
         # Left Column: Chat
         with gr.Column(scale=2):
             chat_interface = gr.ChatInterface(
                 predict,
+                examples=['Who is in the image?', 'Tell me about the wiki page'],
             )
+        # Right Column: Tools
         with gr.Column(scale=1):
+            # --- RAG INPUTS ---
+            with gr.Accordion("📚 Knowledge Sources", open=True):
+                # Main Text Area (Where all data ends up)
                 kb_input = gr.Textbox(
+                    label="Compiled Knowledge Base",
+                    lines=6,
+                    placeholder="Data from Wiki or Images will appear here...",
+                    interactive=True
                 )
+                with gr.Tab("🔗 WikiFandom"):
+                    url_input = gr.Textbox(label="Fandom URL", placeholder="https://starwars.fandom.com/wiki/Luke_Skywalker")
+                    scrape_btn = gr.Button("Scrape & Add Text")
+                with gr.Tab("🖼️ Image Support"):
+                    img_input = gr.Image(type="pil", label="Upload Image")
+                    img_btn = gr.Button("Analyze & Add Description")
+                # Build Button
                 with gr.Row():
+                    process_btn = gr.Button("⚡ Build Knowledge Base", variant="primary")
                     rag_status = gr.Checkbox(label="RAG Active", interactive=False, value=False)
+                status_output = gr.Textbox(label="Status", interactive=False)
+            # System Prompt
+            with gr.Accordion("⚙️ System Settings", open=False):
+                system_content_input = gr.Textbox(value=SYSTEM_CONTENT, lines=2, label="System Prompt")
+    # --- EVENT HANDLERS ---
+    # 1. Scrape Fandom -> Append to Textbox
+    scrape_btn.click(
+        add_fandom_content,
+        inputs=[url_input, kb_input],
+        outputs=[kb_input, status_output]
     )
+    # 2. Analyze Image -> Append to Textbox
+    img_btn.click(
+        add_image_content,
+        inputs=[img_input, kb_input],
+        outputs=[kb_input, status_output]
     )
+    # 3. Build RAG Index
+    process_btn.click(
+        process_knowledge_base,
+        inputs=[kb_input],
+        outputs=[status_output, rag_status]
     )
 if __name__ == "__main__":
+    demo.launch()