Spaces:
Running
Running
File size: 13,331 Bytes
bf4d444 c43d61a 3be7efd bf4d444 3be7efd 7ef2d1a bf4d444 3be7efd 2c7451e 3be7efd c43d61a bf4d444 3be7efd 7af27f6 d1f54a1 7af27f6 7ef2d1a 643729e 2c7451e 3be7efd 7af27f6 bf4d444 7af27f6 2c7451e 7af27f6 bf4d444 7af27f6 2c7451e 7af27f6 3be7efd 2c7451e 0a1d5c9 3be7efd 2c7451e bf4d444 7af27f6 2c7451e 3be7efd 2c7451e bf4d444 2c7451e bf4d444 2c7451e 3be7efd 7af27f6 2c7451e 7af27f6 bf4d444 2c7451e bf4d444 7af27f6 2c7451e 7af27f6 3be7efd 7af27f6 3be7efd 7af27f6 2c7451e 7af27f6 d1f54a1 7af27f6 d1f54a1 2c7451e 7af27f6 a23f94d 2c7451e 3be7efd 7af27f6 bf4d444 7af27f6 9e84cc4 3be7efd b130f4d 7af27f6 5744830 7af27f6 7c99ff9 3be7efd 7ef2d1a 7af27f6 7ef2d1a 7af27f6 3be7efd 7af27f6 3be7efd 7af27f6 3be7efd 7af27f6 3be7efd 9e84cc4 7af27f6 9e84cc4 3be7efd 7af27f6 3be7efd 7af27f6 3be7efd 9e84cc4 d1f54a1 ef83f22 3be7efd bf4d444 7af27f6 d1f54a1 bf4d444 3be7efd 0a1d5c9 d1f54a1 7af27f6 d1f54a1 0a1d5c9 bf4d444 d1f54a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 |
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.vectorstores import FAISS
from huggingface_hub import InferenceClient
from langchain_core.prompts import ChatPromptTemplate
# --- 1. Model Setup using HF Inference Client ---
HF_TOKEN = os.environ.get("HF_TOKEN", "")
if not HF_TOKEN:
print("β οΈ Warning: HF_TOKEN not set. The app may not work properly.")
# Use InferenceClient directly instead of LangChain wrapper
client = InferenceClient(token=HF_TOKEN)
# --- 2. The Core Logic ---
def generate_question_paper(
pdf_files,
mcq_difficulty, mcq_count,
short_difficulty, short_count,
long_difficulty, long_count,
num_sets,
progress=gr.Progress()
):
# Add timeout protection
import time
start_time = time.time()
if not pdf_files or len(pdf_files) == 0:
return "β Please upload at least one PDF file."
if len(pdf_files) > 5:
return "β Error: Maximum 5 PDF files allowed."
if not HF_TOKEN:
return "β Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
total_questions = mcq_count + short_count + long_count
if total_questions == 0:
return "β Please specify at least one question."
try:
# A. Load all PDFs
progress(0, desc=f"π PDF file(s) uploaded, accessing {len(pdf_files)} file(s)...")
all_pages = []
for idx, pdf_file in enumerate(pdf_files):
current_progress = 0.05 + (idx * 0.1 / len(pdf_files))
progress(current_progress,
desc=f"π Accessing PDF {idx + 1}/{len(pdf_files)}: {pdf_file.name.split('/')[-1][:30]}...")
loader = PyPDFLoader(pdf_file.name)
pages = loader.load()
if not pages:
return f"β Error: Could not extract text from {pdf_file.name}. Please ensure it's a valid PDF with text content."
all_pages.extend(pages)
progress(0.15, desc=f"β
PDF loaded successfully! Extracted {len(all_pages)} pages from {len(pdf_files)} file(s)")
# B. Split Text
progress(0.20, desc="π Extracting text content from PDFs...")
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
chunks = text_splitter.split_documents(all_pages)
progress(0.30, desc=f"β
Text extracted successfully! Created {len(chunks)} text chunks, preparing embeddings...")
# C. Vector Store (FAISS)
progress(0.35, desc="π§ Generating embeddings for content understanding...")
embeddings = FastEmbedEmbeddings()
progress(0.40, desc="π§ Creating knowledge base from embeddings...")
vector_store = FAISS.from_documents(chunks, embeddings)
progress(0.50, desc="β
Knowledge base created successfully! Analyzing content for key concepts...")
# D. Retrieve Context (more chunks for multiple PDFs)
progress(0.55, desc="π Identifying key concepts and topics from content...")
retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
context_docs = retriever.invoke("Key concepts, definitions, and important topics")
context_text = "\n\n".join([doc.page_content for doc in context_docs])
progress(0.60, desc=f"β
Analysis complete! Found {len(context_docs)} key sections. Activating AI model...")
# E. Generate all sets
all_outputs = []
for set_num in range(1, num_sets + 1):
progress(0.65 + (set_num - 1) * 0.30 / num_sets,
desc=f"π€ AI Model activated! Preparing to generate Set {set_num}/{num_sets}...")
# Create Prompt for this set
sections = []
answer_key_instructions = []
if mcq_count > 0:
sections.append(f"""Section A: Multiple Choice Questions (MCQs) - {mcq_count} questions
Difficulty: {mcq_difficulty}
Create {mcq_count} MCQs with 4 options each (A, B, C, D). Mark the correct answer clearly.""")
answer_key_instructions.append("MCQ Answer Key")
if short_count > 0:
sections.append(f"""Section B: Short Answer Questions - {short_count} questions
Difficulty: {short_difficulty}
Create {short_count} short answer questions (2-3 marks each, expected answer: 2-3 sentences).""")
if long_count > 0:
sections.append(f"""Section C: Long Answer/Essay Questions - {long_count} questions
Difficulty: {long_difficulty}
Create {long_count} long answer questions (5-10 marks each, expected answer: detailed explanation).""")
sections_text = "\n\n".join(sections)
answer_key_text = "\n".join([f"- {key}" for key in answer_key_instructions])
prompt = f"""You are an expert academic examiner. Create a formal Question Paper based ONLY on the context provided below.
CONTEXT:
{context_text}
INSTRUCTIONS:
Create Question Paper Set {set_num} of {num_sets}
{sections_text}
FORMAT REQUIREMENTS:
- Start with "QUESTION PAPER - SET {set_num}"
- Include proper section headers
- Number all questions sequentially within each section
- For MCQs: Provide 4 options (A, B, C, D)
- At the end, provide:
{answer_key_text}
Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
progress(0.70 + (set_num - 1) * 0.30 / num_sets,
desc=f"βοΈ Generating Question Paper Set {set_num}/{num_sets}... 0%")
# F. Generate using chat completion
messages = [{"role": "user", "content": prompt}]
response = ""
token_count = 0
max_tokens = 2500 # Increased for longer papers
last_update_time = time.time()
try:
for message in client.chat_completion(
messages=messages,
model="meta-llama/Llama-3.2-3B-Instruct",
max_tokens=max_tokens,
temperature=0.7,
stream=True,
timeout=120, # 2 minute timeout per request
):
# Check total timeout
if time.time() - start_time > 300: # 5 minute total timeout
return f"β±οΈ Request timeout. Please try with:\n- Fewer PDF files\n- Fewer questions\n- Fewer sets\n\nPartial output:\n{response}"
if hasattr(message, 'choices') and len(message.choices) > 0:
if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
response += message.choices[0].delta.content or ""
token_count += 1
# Update progress every 50 tokens to reduce overhead
if token_count % 50 == 0 or time.time() - last_update_time > 2:
# Calculate progress within this set (70-95% range divided by number of sets)
set_start = 0.70 + (set_num - 1) * 0.30 / num_sets
set_range = 0.25 / num_sets # 25% of total progress for generation
generation_progress = min((token_count / max_tokens), 1.0)
current_progress = set_start + (generation_progress * set_range)
percentage = int(generation_progress * 100)
# Update with dynamic percentage
progress(current_progress,
desc=f"βοΈ Generating Question Paper Set {set_num}/{num_sets}... {percentage}%")
last_update_time = time.time()
except Exception as e:
if response:
return f"β οΈ Generation interrupted: {str(e)}\n\nPartial output for Set {set_num}:\n{response}"
else:
raise e
progress(0.70 + set_num * 0.30 / num_sets,
desc=f"β
Set {set_num}/{num_sets} generated successfully!")
all_outputs.append(response)
progress(1.0, desc=f"β
All {num_sets} Question Paper(s) Generated Successfully! π")
# Combine all sets
final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
return final_output
except Exception as e:
return f"β Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"
# --- 3. The UI ---
with gr.Blocks(title="AI Question Paper Generator") as demo:
gr.Markdown("# π AI Question Paper Generator Pro")
gr.Markdown("Powered by **Fine-Tuned Llama 3.2 3B**")
gr.Markdown("β‘ Fast β’ π― Accurate β’ π Multi-PDF Support β’ π² Multiple Sets")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(
label="π Upload Study Materials (PDF) - Max 5 files",
file_types=[".pdf"],
file_count="multiple"
)
gr.Markdown("### π² Number of Question Paper Sets")
num_sets = gr.Slider(
1, 3, value=1, step=1,
label="π Generate multiple unique sets"
)
gr.Markdown("### π Section A: Multiple Choice Questions")
with gr.Group():
mcq_difficulty = gr.Radio(
["Easy", "Medium", "Hard"],
label="ποΈ MCQ Difficulty",
value="Medium"
)
mcq_count = gr.Slider(
0, 20, value=5, step=1,
label="π Number of MCQs"
)
gr.Markdown("### βοΈ Section B: Short Answer Questions")
with gr.Group():
short_difficulty = gr.Radio(
["Easy", "Medium", "Hard"],
label="ποΈ Short Answer Difficulty",
value="Medium"
)
short_count = gr.Slider(
0, 15, value=3, step=1,
label="π Number of Short Answer Questions"
)
gr.Markdown("### π Section C: Long Answer Questions")
with gr.Group():
long_difficulty = gr.Radio(
["Easy", "Medium", "Hard"],
label="ποΈ Long Answer Difficulty",
value="Medium"
)
long_count = gr.Slider(
0, 10, value=2, step=1,
label="π Number of Long Answer Questions"
)
btn = gr.Button("β¨ Generate Question Paper(s)", variant="primary", size="lg")
gr.Markdown("""
### π Instructions:
1. Upload 1-5 PDF files containing study material
2. Choose number of sets to generate (1-3)
3. Configure each section:
- Set difficulty level
- Set number of questions
4. Click Generate!
**Note:** Set any section to 0 questions to exclude it.
""")
with gr.Column(scale=2):
output = gr.Markdown(
label="Generated Question Paper(s)",
value="π Upload PDF files and configure settings to generate question papers..."
)
btn.click(
fn=generate_question_paper,
inputs=[
pdf_input,
mcq_difficulty, mcq_count,
short_difficulty, short_count,
long_difficulty, long_count,
num_sets
],
outputs=output,
show_progress="full"
)
gr.Markdown("""
---
**Features:**
- β
Multiple PDF support (up to 5 files)
- β
Separate difficulty control for each question type
- β
Customizable question count per section
- β
Generate 1-3 unique question paper sets
- β
Automatic answer key generation for MCQs
- β
Queue system for concurrent users
**Performance Tips:**
- For faster results: Use 1-2 PDFs, fewer questions, single set
- If timeout occurs: Reduce number of questions or sets
- Queue position will be shown when multiple users are active
""")
if __name__ == "__main__":
demo.queue(
max_size=20, # Maximum queue size
default_concurrency_limit=2 # Allow 2 concurrent users
)
demo.launch(
show_error=True,
share=False
) |