Spaces:

retvq
/

Question-Paper-Generator

Sleeping

App Files Files Community

Question-Paper-Generator / app.py

retvq

Update app.py

c43d61a verified 20 days ago

raw

history blame

4.68 kB

	import gradio as gr
	import os
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
	from langchain_community.vectorstores import FAISS
	from huggingface_hub import InferenceClient
	from langchain_core.prompts import ChatPromptTemplate

	# --- 1. Model Setup using HF Inference Client ---
	HF_TOKEN = os.environ.get("HF_TOKEN", "")

	if not HF_TOKEN:
	print("⚠️ Warning: HF_TOKEN not set. The app may not work properly.")

	# Use InferenceClient directly instead of LangChain wrapper
	client = InferenceClient(token=HF_TOKEN)

	# --- 2. The Core Logic ---
	def generate_question_paper(pdf_file, difficulty, num_questions):
	if not pdf_file:
	return "❌ Please upload a PDF file first."

	if not HF_TOKEN:
	return "❌ Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."

	try:
	# A. Load PDF
	loader = PyPDFLoader(pdf_file.name)
	pages = loader.load()

	if not pages:
	return "❌ Error: Could not extract text from PDF. Please ensure it's a valid PDF with text content."

	# B. Split Text
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=100
	)
	chunks = text_splitter.split_documents(pages)

	# C. Vector Store (FAISS)
	embeddings = FastEmbedEmbeddings()
	vector_store = FAISS.from_documents(chunks, embeddings)

	# D. Retrieve Context
	retriever = vector_store.as_retriever(search_kwargs={"k": 7})
	context_docs = retriever.invoke("Key concepts and definitions")
	context_text = "\n\n".join([doc.page_content for doc in context_docs])

	# E. Create Prompt
	prompt = f"""You are an expert academic examiner. Create a formal Question Paper based ONLY on the context provided below.

	CONTEXT:
	{context_text}

	INSTRUCTIONS:
	- Difficulty: {difficulty}
	- Total Questions: {num_questions}
	- Format:
	Section A: Multiple Choice Questions (MCQs)
	Section B: Short Answer Questions
	Section C: Long Answer/Essay Questions
	- Provide the Answer Key for MCQs at the very end.

	Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""

	# F. Generate using Mistral (no license needed and works better)
	response = client.text_generation(
	prompt,
	model="mistralai/Mistral-7B-Instruct-v0.3",
	max_new_tokens=2000,
	temperature=0.7,
	)

	return response

	except Exception as e:
	return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDF is valid and contains text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"

	# --- 3. The UI ---
	with gr.Blocks(title="AI Question Paper Generator", theme=gr.themes.Soft(primary_hue="blue")) as demo:
	gr.Markdown("# 📄 AI Question Paper Generator")
	gr.Markdown("Powered by Mistral 7B via Hugging Face Inference API")
	gr.Markdown("⚡ Fast • 🎯 Accurate • 📚 Context-Aware")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(
	label="📄 Upload Study Material (PDF)",
	file_types=[".pdf"]
	)

	with gr.Group():
	difficulty = gr.Radio(
	["Easy", "Medium", "Hard"],
	label="🎚️ Difficulty Level",
	value="Medium"
	)
	num_questions = gr.Slider(
	5, 20, value=10, step=1,
	label="📊 Total Questions"
	)

	btn = gr.Button("✨ Generate Question Paper", variant="primary", size="lg")

	gr.Markdown("""
	### 📝 Instructions:
	1. Upload a PDF containing study material
	2. Select difficulty level
	3. Choose number of questions
	4. Click Generate!
	""")

	with gr.Column(scale=2):
	output = gr.Markdown(label="Generated Question Paper")

	btn.click(
	fn=generate_question_paper,
	inputs=[pdf_input, difficulty, num_questions],
	outputs=output
	)

	gr.Markdown("""
	---
	Note: Set `HF_TOKEN` in your Space's Settings → Repository secrets.
	Get your token from https://huggingface.co/settings/tokens
	""")

	if __name__ == "__main__":
	demo.launch()