Spaces:
Running
Running
| """Skill extraction from CV text using LLM.""" | |
| from __future__ import annotations | |
| from typing import Any, Dict, List | |
| from LLM.llm_models import cv_analyzer_model # Using CV-specific model (same as orchestrator) | |
| import json | |
| import re | |
| def _print_terminal_log(action: str, details: str = ""): | |
| """Print formatted log to terminal.""" | |
| timestamp = __import__('datetime').datetime.now().strftime("%H:%M:%S") | |
| if details: | |
| print(f"[{timestamp}] [CV ANALYZER] {action} :: {details}") | |
| else: | |
| print(f"[{timestamp}] [CV ANALYZER] {action}") | |
| def extract_skills_from_cv_text(cv_text: str, log_callback=None) -> Dict[str, Any]: | |
| """ | |
| Extract skills and relevant information from CV text using LLM. | |
| Args: | |
| cv_text: The extracted text content from a CV | |
| Returns: | |
| Dictionary containing extracted skills and candidate information | |
| """ | |
| prompt = f"""Analyze the following CV/Resume text and extract ALL relevant information in a structured format. | |
| CV TEXT: | |
| {cv_text} | |
| Please extract and organize the following information: | |
| 1. TECHNICAL SKILLS: Programming languages, frameworks, tools, technologies | |
| 2. SOFT SKILLS: Communication, leadership, teamwork, problem-solving, etc. | |
| 3. PROFESSIONAL EXPERIENCE: Years of experience, job titles, companies | |
| 4. EDUCATION: Degrees, certifications, institutions | |
| 5. DOMAIN EXPERTISE: Industries, specific domains (e.g., Finance, Healthcare, AI/ML) | |
| Return your analysis in the following JSON-like structure: | |
| {{ | |
| "technical_skills": ["skill1", "skill2", ...], | |
| "soft_skills": ["skill1", "skill2", ...], | |
| "experience_years": <number or "unknown">, | |
| "recent_roles": ["role1", "role2", ...], | |
| "education": ["degree1", "degree2", ...], | |
| "certifications": ["cert1", "cert2", ...], | |
| "domain_expertise": ["domain1", "domain2", ...], | |
| "summary": "A brief 2-3 sentence summary of the candidate's profile" | |
| }} | |
| Be thorough and extract as many relevant skills as possible. If information is not available, use empty arrays or "unknown".""" | |
| try: | |
| _print_terminal_log("Starting AI skill extraction from CV text") | |
| if log_callback: | |
| log_callback("AI Skill Extraction", {"status": "Initializing LLM model..."}) | |
| # Use the CV analyzer model (same provider as orchestrator - HF/Gemini) | |
| model = cv_analyzer_model | |
| _print_terminal_log("LLM Initialized", f"Model ready, CV length: {len(cv_text)} chars") | |
| if log_callback: | |
| log_callback("AI Analysis", {"status": "Sending CV to AI for analysis", "cv_length": len(cv_text)}) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are an expert HR analyst specializing in CV/Resume analysis and skill extraction. Extract information accurately and comprehensively." | |
| }, | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ] | |
| if log_callback: | |
| log_callback("LLM Request", {"message_count": len(messages), "model": "cv_analyzer_model"}) | |
| _print_terminal_log("Sending request to AI", "Waiting for skill extraction...") | |
| response = model.generate(messages=messages) | |
| # Handle ChatMessage object - convert to string | |
| if hasattr(response, 'content'): | |
| response_text = response.content | |
| else: | |
| response_text = str(response) | |
| _print_terminal_log("AI Response Received", f"Response length: {len(response_text) if response_text else 0} chars") | |
| if log_callback: | |
| log_callback("AI Response Received", {"response_length": len(response_text) if response_text else 0}) | |
| # Extract JSON from response (handle markdown code blocks) | |
| if log_callback: | |
| log_callback("Parsing AI Response", {"status": "Extracting structured data from AI response"}) | |
| json_match = re.search(r'\{[\s\S]*\}', response_text) | |
| if json_match: | |
| if log_callback: | |
| log_callback("JSON Extraction", {"status": "Found JSON in response, parsing..."}) | |
| _print_terminal_log("Parsing JSON response", "Extracting structured skill data...") | |
| skills_data = json.loads(json_match.group()) | |
| tech_count = len(skills_data.get("technical_skills", [])) | |
| soft_count = len(skills_data.get("soft_skills", [])) | |
| _print_terminal_log("Skills Extracted Successfully", | |
| f"Technical: {tech_count}, Soft: {soft_count}, Total: {tech_count + soft_count}") | |
| if log_callback: | |
| log_callback("Skills Parsed Successfully", { | |
| "technical_skills": tech_count, | |
| "soft_skills": soft_count, | |
| "total_skills": tech_count + soft_count | |
| }) | |
| else: | |
| if log_callback: | |
| log_callback("JSON Extraction Failed", {"status": "No JSON found, using fallback structure"}) | |
| # Fallback: return a basic structure with the raw response | |
| skills_data = { | |
| "technical_skills": [], | |
| "soft_skills": [], | |
| "experience_years": "unknown", | |
| "recent_roles": [], | |
| "education": [], | |
| "certifications": [], | |
| "domain_expertise": [], | |
| "summary": response_text[:500] # First 500 chars | |
| } | |
| _print_terminal_log("β CV Analysis Complete", "All skills successfully extracted and structured") | |
| if log_callback: | |
| log_callback("β Extraction Complete", {"status": "CV processing finished successfully"}) | |
| return skills_data | |
| except Exception as e: | |
| error_msg = str(e) | |
| _print_terminal_log(f"β ERROR: {type(e).__name__}", error_msg) | |
| if log_callback: | |
| log_callback("β AI Extraction Error", {"error": error_msg, "type": type(e).__name__}) | |
| # Return error information | |
| return { | |
| "error": error_msg, | |
| "technical_skills": [], | |
| "soft_skills": [], | |
| "experience_years": "unknown", | |
| "recent_roles": [], | |
| "education": [], | |
| "certifications": [], | |
| "domain_expertise": [], | |
| "summary": f"Failed to extract skills: {error_msg}" | |
| } | |
| def format_skills_for_display(skills_data: Dict[str, Any]) -> str: | |
| """ | |
| Format extracted skills data into HTML for display in Gradio. | |
| Args: | |
| skills_data: Dictionary containing extracted skills | |
| Returns: | |
| HTML string for display | |
| """ | |
| if "error" in skills_data: | |
| return f""" | |
| <div style="padding: 20px; background: var(--bg-card); border: 1px solid var(--arc-red); border-radius: 4px;"> | |
| <h3 style="color: var(--arc-red); margin-top: 0;">β οΈ Error Extracting Skills</h3> | |
| <p style="color: var(--text-dim);">{skills_data.get('summary', 'Unknown error')}</p> | |
| </div> | |
| """ | |
| html_parts = [ | |
| '<div style="padding: 24px; background: var(--bg-card); border: 1px solid var(--border-bright); border-radius: 4px; margin-top: 20px;">', | |
| '<h2 style="color: var(--arc-orange); margin-top: 0; display: flex; align-items: center; gap: 12px;">', | |
| '<span style="font-size: 32px;">π</span> CV ANALYSIS COMPLETE', | |
| '</h2>', | |
| ] | |
| # Summary | |
| if skills_data.get("summary"): | |
| html_parts.append(f'<div style="background: var(--bg-panel); padding: 16px; border-left: 4px solid var(--arc-cyan); margin-bottom: 24px;">') | |
| html_parts.append(f'<p style="color: var(--text-main); margin: 0; line-height: 1.6;">{skills_data["summary"]}</p>') | |
| html_parts.append('</div>') | |
| # Technical Skills | |
| if skills_data.get("technical_skills"): | |
| html_parts.append('<div style="margin-bottom: 20px;">') | |
| html_parts.append('<h3 style="color: var(--arc-yellow); margin-bottom: 12px;">π» TECHNICAL SKILLS</h3>') | |
| html_parts.append('<div style="display: flex; flex-wrap: wrap; gap: 8px;">') | |
| for skill in skills_data["technical_skills"]: | |
| html_parts.append( | |
| f'<span style="background: var(--bg-panel); border: 1px solid var(--border-dim); ' | |
| f'padding: 6px 12px; border-radius: 4px; color: var(--arc-green); font-weight: 600; ' | |
| f'font-size: 13px;">{skill}</span>' | |
| ) | |
| html_parts.append('</div></div>') | |
| # Soft Skills | |
| if skills_data.get("soft_skills"): | |
| html_parts.append('<div style="margin-bottom: 20px;">') | |
| html_parts.append('<h3 style="color: var(--arc-yellow); margin-bottom: 12px;">π€ SOFT SKILLS</h3>') | |
| html_parts.append('<div style="display: flex; flex-wrap: wrap; gap: 8px;">') | |
| for skill in skills_data["soft_skills"]: | |
| html_parts.append( | |
| f'<span style="background: var(--bg-panel); border: 1px solid var(--border-dim); ' | |
| f'padding: 6px 12px; border-radius: 4px; color: var(--arc-cyan); font-weight: 600; ' | |
| f'font-size: 13px;">{skill}</span>' | |
| ) | |
| html_parts.append('</div></div>') | |
| # Experience & Roles | |
| if skills_data.get("experience_years") or skills_data.get("recent_roles"): | |
| html_parts.append('<div style="margin-bottom: 20px;">') | |
| html_parts.append('<h3 style="color: var(--arc-yellow); margin-bottom: 12px;">πΌ EXPERIENCE</h3>') | |
| if skills_data.get("experience_years"): | |
| html_parts.append(f'<p style="color: var(--text-main); margin: 8px 0;"><strong>Years:</strong> {skills_data["experience_years"]}</p>') | |
| if skills_data.get("recent_roles"): | |
| html_parts.append('<p style="color: var(--text-main); margin: 8px 0;"><strong>Recent Roles:</strong></p>') | |
| html_parts.append('<ul style="color: var(--text-dim); margin-top: 4px;">') | |
| for role in skills_data["recent_roles"]: | |
| html_parts.append(f'<li>{role}</li>') | |
| html_parts.append('</ul>') | |
| html_parts.append('</div>') | |
| # Education | |
| if skills_data.get("education") or skills_data.get("certifications"): | |
| html_parts.append('<div style="margin-bottom: 20px;">') | |
| html_parts.append('<h3 style="color: var(--arc-yellow); margin-bottom: 12px;">π EDUCATION & CERTIFICATIONS</h3>') | |
| if skills_data.get("education"): | |
| html_parts.append('<p style="color: var(--text-main); margin: 8px 0;"><strong>Education:</strong></p>') | |
| html_parts.append('<ul style="color: var(--text-dim); margin-top: 4px;">') | |
| for edu in skills_data["education"]: | |
| html_parts.append(f'<li>{edu}</li>') | |
| html_parts.append('</ul>') | |
| if skills_data.get("certifications"): | |
| html_parts.append('<p style="color: var(--text-main); margin: 8px 0;"><strong>Certifications:</strong></p>') | |
| html_parts.append('<ul style="color: var(--text-dim); margin-top: 4px;">') | |
| for cert in skills_data["certifications"]: | |
| html_parts.append(f'<li>{cert}</li>') | |
| html_parts.append('</ul>') | |
| html_parts.append('</div>') | |
| # Domain Expertise | |
| if skills_data.get("domain_expertise"): | |
| html_parts.append('<div style="margin-bottom: 20px;">') | |
| html_parts.append('<h3 style="color: var(--arc-yellow); margin-bottom: 12px;">π― DOMAIN EXPERTISE</h3>') | |
| html_parts.append('<div style="display: flex; flex-wrap: wrap; gap: 8px;">') | |
| for domain in skills_data["domain_expertise"]: | |
| html_parts.append( | |
| f'<span style="background: var(--bg-panel); border: 1px solid var(--border-dim); ' | |
| f'padding: 6px 12px; border-radius: 4px; color: var(--arc-orange); font-weight: 600; ' | |
| f'font-size: 13px;">{domain}</span>' | |
| ) | |
| html_parts.append('</div></div>') | |
| html_parts.append('</div>') | |
| return ''.join(html_parts) | |