retvq's picture
Update app.py
c43d61a verified
raw
history blame
4.68 kB
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.vectorstores import FAISS
from huggingface_hub import InferenceClient
from langchain_core.prompts import ChatPromptTemplate
# --- 1. Model Setup using HF Inference Client ---
HF_TOKEN = os.environ.get("HF_TOKEN", "")
if not HF_TOKEN:
print("⚠️ Warning: HF_TOKEN not set. The app may not work properly.")
# Use InferenceClient directly instead of LangChain wrapper
client = InferenceClient(token=HF_TOKEN)
# --- 2. The Core Logic ---
def generate_question_paper(pdf_file, difficulty, num_questions):
if not pdf_file:
return "❌ Please upload a PDF file first."
if not HF_TOKEN:
return "❌ Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
try:
# A. Load PDF
loader = PyPDFLoader(pdf_file.name)
pages = loader.load()
if not pages:
return "❌ Error: Could not extract text from PDF. Please ensure it's a valid PDF with text content."
# B. Split Text
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
chunks = text_splitter.split_documents(pages)
# C. Vector Store (FAISS)
embeddings = FastEmbedEmbeddings()
vector_store = FAISS.from_documents(chunks, embeddings)
# D. Retrieve Context
retriever = vector_store.as_retriever(search_kwargs={"k": 7})
context_docs = retriever.invoke("Key concepts and definitions")
context_text = "\n\n".join([doc.page_content for doc in context_docs])
# E. Create Prompt
prompt = f"""You are an expert academic examiner. Create a formal Question Paper based ONLY on the context provided below.
CONTEXT:
{context_text}
INSTRUCTIONS:
- Difficulty: {difficulty}
- Total Questions: {num_questions}
- Format:
Section A: Multiple Choice Questions (MCQs)
Section B: Short Answer Questions
Section C: Long Answer/Essay Questions
- Provide the Answer Key for MCQs at the very end.
Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
# F. Generate using Mistral (no license needed and works better)
response = client.text_generation(
prompt,
model="mistralai/Mistral-7B-Instruct-v0.3",
max_new_tokens=2000,
temperature=0.7,
)
return response
except Exception as e:
return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDF is valid and contains text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
# --- 3. The UI ---
with gr.Blocks(title="AI Question Paper Generator", theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown("# πŸ“„ AI Question Paper Generator")
gr.Markdown("Powered by **Mistral 7B** via Hugging Face Inference API")
gr.Markdown("⚑ Fast β€’ 🎯 Accurate β€’ πŸ“š Context-Aware")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(
label="πŸ“„ Upload Study Material (PDF)",
file_types=[".pdf"]
)
with gr.Group():
difficulty = gr.Radio(
["Easy", "Medium", "Hard"],
label="🎚️ Difficulty Level",
value="Medium"
)
num_questions = gr.Slider(
5, 20, value=10, step=1,
label="πŸ“Š Total Questions"
)
btn = gr.Button("✨ Generate Question Paper", variant="primary", size="lg")
gr.Markdown("""
### πŸ“ Instructions:
1. Upload a PDF containing study material
2. Select difficulty level
3. Choose number of questions
4. Click Generate!
""")
with gr.Column(scale=2):
output = gr.Markdown(label="Generated Question Paper")
btn.click(
fn=generate_question_paper,
inputs=[pdf_input, difficulty, num_questions],
outputs=output
)
gr.Markdown("""
---
**Note:** Set `HF_TOKEN` in your Space's Settings β†’ Repository secrets.
Get your token from https://huggingface.co/settings/tokens
""")
if __name__ == "__main__":
demo.launch()