File size: 13,331 Bytes
bf4d444
 
 
 
 
 
c43d61a
3be7efd
bf4d444
3be7efd
7ef2d1a
bf4d444
3be7efd
2c7451e
3be7efd
 
c43d61a
bf4d444
3be7efd
7af27f6
 
 
 
 
 
 
 
d1f54a1
 
 
 
7af27f6
 
 
 
 
7ef2d1a
643729e
2c7451e
3be7efd
7af27f6
 
 
 
bf4d444
7af27f6
2c7451e
7af27f6
bf4d444
7af27f6
2c7451e
 
 
7af27f6
 
 
 
 
 
 
3be7efd
2c7451e
0a1d5c9
3be7efd
2c7451e
bf4d444
 
 
 
7af27f6
2c7451e
3be7efd
 
2c7451e
bf4d444
2c7451e
bf4d444
2c7451e
3be7efd
7af27f6
2c7451e
7af27f6
 
bf4d444
2c7451e
bf4d444
7af27f6
 
 
 
2c7451e
 
7af27f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3be7efd
 
 
 
 
7af27f6
 
 
 
 
 
 
 
 
 
 
3be7efd
 
7af27f6
2c7451e
 
 
7af27f6
 
 
 
 
 
d1f54a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7af27f6
d1f54a1
 
 
 
 
2c7451e
 
 
7af27f6
 
a23f94d
2c7451e
3be7efd
7af27f6
 
 
bf4d444
 
7af27f6
9e84cc4
3be7efd
b130f4d
7af27f6
5744830
7af27f6
7c99ff9
 
3be7efd
7ef2d1a
7af27f6
 
 
 
 
 
 
 
 
7ef2d1a
 
7af27f6
3be7efd
7af27f6
3be7efd
7af27f6
3be7efd
 
7af27f6
 
 
3be7efd
9e84cc4
7af27f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9e84cc4
3be7efd
 
7af27f6
 
 
 
 
3be7efd
7af27f6
 
3be7efd
 
9e84cc4
d1f54a1
 
 
 
ef83f22
3be7efd
bf4d444
7af27f6
 
 
 
 
 
 
d1f54a1
 
bf4d444
3be7efd
0a1d5c9
d1f54a1
7af27f6
 
 
 
 
 
d1f54a1
 
 
 
 
 
0a1d5c9
 
bf4d444
d1f54a1
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.vectorstores import FAISS
from huggingface_hub import InferenceClient
from langchain_core.prompts import ChatPromptTemplate

# --- 1. Model Setup using HF Inference Client ---
HF_TOKEN = os.environ.get("HF_TOKEN", "")

if not HF_TOKEN:
    print("⚠️ Warning: HF_TOKEN not set. The app may not work properly.")

# Use InferenceClient directly instead of LangChain wrapper
client = InferenceClient(token=HF_TOKEN)

# --- 2. The Core Logic ---
def generate_question_paper(
    pdf_files, 
    mcq_difficulty, mcq_count,
    short_difficulty, short_count,
    long_difficulty, long_count,
    num_sets,
    progress=gr.Progress()
):
    # Add timeout protection
    import time
    start_time = time.time()
    
    if not pdf_files or len(pdf_files) == 0:
        return "❌ Please upload at least one PDF file."
    
    if len(pdf_files) > 5:
        return "❌ Error: Maximum 5 PDF files allowed."
    
    if not HF_TOKEN:
        return "❌ Error: HF_TOKEN not configured. Please add your Hugging Face token in Space Settings > Repository secrets."
    
    total_questions = mcq_count + short_count + long_count
    if total_questions == 0:
        return "❌ Please specify at least one question."
    
    try:
        # A. Load all PDFs
        progress(0, desc=f"πŸ“„ PDF file(s) uploaded, accessing {len(pdf_files)} file(s)...")
        all_pages = []
        
        for idx, pdf_file in enumerate(pdf_files):
            current_progress = 0.05 + (idx * 0.1 / len(pdf_files))
            progress(current_progress, 
                    desc=f"πŸ“‚ Accessing PDF {idx + 1}/{len(pdf_files)}: {pdf_file.name.split('/')[-1][:30]}...")
            loader = PyPDFLoader(pdf_file.name)
            pages = loader.load()
            
            if not pages:
                return f"❌ Error: Could not extract text from {pdf_file.name}. Please ensure it's a valid PDF with text content."
            
            all_pages.extend(pages)
        
        progress(0.15, desc=f"βœ… PDF loaded successfully! Extracted {len(all_pages)} pages from {len(pdf_files)} file(s)")
        
        # B. Split Text
        progress(0.20, desc="πŸ“ Extracting text content from PDFs...")
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100
        )
        chunks = text_splitter.split_documents(all_pages)
        progress(0.30, desc=f"βœ… Text extracted successfully! Created {len(chunks)} text chunks, preparing embeddings...")
        
        # C. Vector Store (FAISS)
        progress(0.35, desc="🧠 Generating embeddings for content understanding...")
        embeddings = FastEmbedEmbeddings()
        progress(0.40, desc="🧠 Creating knowledge base from embeddings...")
        vector_store = FAISS.from_documents(chunks, embeddings)
        progress(0.50, desc="βœ… Knowledge base created successfully! Analyzing content for key concepts...")
        
        # D. Retrieve Context (more chunks for multiple PDFs)
        progress(0.55, desc="πŸ” Identifying key concepts and topics from content...")
        retriever = vector_store.as_retriever(search_kwargs={"k": min(10, len(chunks))})
        context_docs = retriever.invoke("Key concepts, definitions, and important topics")
        context_text = "\n\n".join([doc.page_content for doc in context_docs])
        progress(0.60, desc=f"βœ… Analysis complete! Found {len(context_docs)} key sections. Activating AI model...")
        
        # E. Generate all sets
        all_outputs = []
        
        for set_num in range(1, num_sets + 1):
            progress(0.65 + (set_num - 1) * 0.30 / num_sets, 
                    desc=f"πŸ€– AI Model activated! Preparing to generate Set {set_num}/{num_sets}...")
            
            # Create Prompt for this set
            sections = []
            answer_key_instructions = []
            
            if mcq_count > 0:
                sections.append(f"""Section A: Multiple Choice Questions (MCQs) - {mcq_count} questions
Difficulty: {mcq_difficulty}
Create {mcq_count} MCQs with 4 options each (A, B, C, D). Mark the correct answer clearly.""")
                answer_key_instructions.append("MCQ Answer Key")
            
            if short_count > 0:
                sections.append(f"""Section B: Short Answer Questions - {short_count} questions
Difficulty: {short_difficulty}
Create {short_count} short answer questions (2-3 marks each, expected answer: 2-3 sentences).""")
            
            if long_count > 0:
                sections.append(f"""Section C: Long Answer/Essay Questions - {long_count} questions
Difficulty: {long_difficulty}
Create {long_count} long answer questions (5-10 marks each, expected answer: detailed explanation).""")
            
            sections_text = "\n\n".join(sections)
            answer_key_text = "\n".join([f"- {key}" for key in answer_key_instructions])
            
            prompt = f"""You are an expert academic examiner. Create a formal Question Paper based ONLY on the context provided below.

CONTEXT:
{context_text}

INSTRUCTIONS:
Create Question Paper Set {set_num} of {num_sets}

{sections_text}

FORMAT REQUIREMENTS:
- Start with "QUESTION PAPER - SET {set_num}"
- Include proper section headers
- Number all questions sequentially within each section
- For MCQs: Provide 4 options (A, B, C, D)
- At the end, provide:
{answer_key_text}

Do not output conversational text. Output ONLY the exam paper in a well-formatted structure."""
            
            progress(0.70 + (set_num - 1) * 0.30 / num_sets, 
                    desc=f"✍️ Generating Question Paper Set {set_num}/{num_sets}... 0%")
            
            # F. Generate using chat completion
            messages = [{"role": "user", "content": prompt}]
            
            response = ""
            token_count = 0
            max_tokens = 2500  # Increased for longer papers
            last_update_time = time.time()
            
            try:
                for message in client.chat_completion(
                    messages=messages,
                    model="meta-llama/Llama-3.2-3B-Instruct",
                    max_tokens=max_tokens,
                    temperature=0.7,
                    stream=True,
                    timeout=120,  # 2 minute timeout per request
                ):
                    # Check total timeout
                    if time.time() - start_time > 300:  # 5 minute total timeout
                        return f"⏱️ Request timeout. Please try with:\n- Fewer PDF files\n- Fewer questions\n- Fewer sets\n\nPartial output:\n{response}"
                    
                    if hasattr(message, 'choices') and len(message.choices) > 0:
                        if hasattr(message.choices[0], 'delta') and hasattr(message.choices[0].delta, 'content'):
                            response += message.choices[0].delta.content or ""
                            token_count += 1
                            
                            # Update progress every 50 tokens to reduce overhead
                            if token_count % 50 == 0 or time.time() - last_update_time > 2:
                                # Calculate progress within this set (70-95% range divided by number of sets)
                                set_start = 0.70 + (set_num - 1) * 0.30 / num_sets
                                set_range = 0.25 / num_sets  # 25% of total progress for generation
                                generation_progress = min((token_count / max_tokens), 1.0)
                                current_progress = set_start + (generation_progress * set_range)
                                percentage = int(generation_progress * 100)
                                
                                # Update with dynamic percentage
                                progress(current_progress, 
                                        desc=f"✍️ Generating Question Paper Set {set_num}/{num_sets}... {percentage}%")
                                last_update_time = time.time()
            
            except Exception as e:
                if response:
                    return f"⚠️ Generation interrupted: {str(e)}\n\nPartial output for Set {set_num}:\n{response}"
                else:
                    raise e
            
            progress(0.70 + set_num * 0.30 / num_sets, 
                    desc=f"βœ… Set {set_num}/{num_sets} generated successfully!")
            
            all_outputs.append(response)
        
        progress(1.0, desc=f"βœ… All {num_sets} Question Paper(s) Generated Successfully! πŸŽ‰")
        
        # Combine all sets
        final_output = "\n\n" + "="*80 + "\n\n".join(all_outputs)
        return final_output

    except Exception as e:
        return f"❌ Error: {str(e)}\n\nPlease check:\n1. PDFs are valid and contain text\n2. HF_TOKEN is correctly set in Space secrets\n3. Try again or contact support"

# --- 3. The UI ---
with gr.Blocks(title="AI Question Paper Generator") as demo:
    gr.Markdown("# πŸ“„ AI Question Paper Generator Pro")
    gr.Markdown("Powered by **Fine-Tuned Llama 3.2 3B**")
    gr.Markdown("⚑ Fast β€’ 🎯 Accurate β€’ πŸ“š Multi-PDF Support β€’ 🎲 Multiple Sets")
    
    with gr.Row():
        with gr.Column(scale=1):
            pdf_input = gr.File(
                label="πŸ“„ Upload Study Materials (PDF) - Max 5 files",
                file_types=[".pdf"],
                file_count="multiple"
            )
            
            gr.Markdown("### 🎲 Number of Question Paper Sets")
            num_sets = gr.Slider(
                1, 3, value=1, step=1,
                label="πŸ“‹ Generate multiple unique sets"
            )
            
            gr.Markdown("### πŸ“ Section A: Multiple Choice Questions")
            with gr.Group():
                mcq_difficulty = gr.Radio(
                    ["Easy", "Medium", "Hard"], 
                    label="🎚️ MCQ Difficulty", 
                    value="Medium"
                )
                mcq_count = gr.Slider(
                    0, 20, value=5, step=1, 
                    label="πŸ“Š Number of MCQs"
                )
            
            gr.Markdown("### ✍️ Section B: Short Answer Questions")
            with gr.Group():
                short_difficulty = gr.Radio(
                    ["Easy", "Medium", "Hard"], 
                    label="🎚️ Short Answer Difficulty", 
                    value="Medium"
                )
                short_count = gr.Slider(
                    0, 15, value=3, step=1, 
                    label="πŸ“Š Number of Short Answer Questions"
                )
            
            gr.Markdown("### πŸ“– Section C: Long Answer Questions")
            with gr.Group():
                long_difficulty = gr.Radio(
                    ["Easy", "Medium", "Hard"], 
                    label="🎚️ Long Answer Difficulty", 
                    value="Medium"
                )
                long_count = gr.Slider(
                    0, 10, value=2, step=1, 
                    label="πŸ“Š Number of Long Answer Questions"
                )
            
            btn = gr.Button("✨ Generate Question Paper(s)", variant="primary", size="lg")
            
            gr.Markdown("""
            ### πŸ“ Instructions:
            1. Upload 1-5 PDF files containing study material
            2. Choose number of sets to generate (1-3)
            3. Configure each section:
               - Set difficulty level
               - Set number of questions
            4. Click Generate!
            
            **Note:** Set any section to 0 questions to exclude it.
            """)
        
        with gr.Column(scale=2):
            output = gr.Markdown(
                label="Generated Question Paper(s)",
                value="πŸ‘‹ Upload PDF files and configure settings to generate question papers..."
            )

    btn.click(
        fn=generate_question_paper,
        inputs=[
            pdf_input, 
            mcq_difficulty, mcq_count,
            short_difficulty, short_count,
            long_difficulty, long_count,
            num_sets
        ],
        outputs=output,
        show_progress="full"
    )
    
    gr.Markdown("""
    ---  
    **Features:**
    - βœ… Multiple PDF support (up to 5 files)
    - βœ… Separate difficulty control for each question type
    - βœ… Customizable question count per section
    - βœ… Generate 1-3 unique question paper sets
    - βœ… Automatic answer key generation for MCQs
    - βœ… Queue system for concurrent users
    
    **Performance Tips:**
    - For faster results: Use 1-2 PDFs, fewer questions, single set
    - If timeout occurs: Reduce number of questions or sets
    - Queue position will be shown when multiple users are active
    """)

if __name__ == "__main__":
    demo.queue(
        max_size=20,  # Maximum queue size
        default_concurrency_limit=2  # Allow 2 concurrent users
    )
    demo.launch(
        show_error=True,
        share=False
    )