EastSync-AI / utils /cv_training_cost.py
Daniel Tatar
Cv reader + matching project (#13)
07273d8
"""Training cost estimation for CV project matching."""
from __future__ import annotations
from typing import Any, Dict, List
from smolagents import CodeAgent
from LLM.llm_models import websearch_model
from agents.websearch_agent import WebSearchAgent
def _print_terminal_log(action: str, details: str = ""):
"""Print formatted log to terminal."""
timestamp = __import__('datetime').datetime.now().strftime("%H:%M:%S")
if details:
print(f"[{timestamp}] [TRAINING COST] {action} :: {details}")
else:
print(f"[{timestamp}] [TRAINING COST] {action}")
def estimate_training_costs_for_skills(
missing_skills: List[str],
project_name: str,
log_callback=None
) -> List[Dict[str, Any]]:
"""
Estimate training costs for missing skills using websearch.
Uses the same logic as project analysis training plan generation.
Args:
missing_skills: List of skill names that candidate is missing
project_name: Name of the project (for context)
log_callback: Optional callback for logging
Returns:
List of training plans with costs
"""
if not missing_skills:
return []
_print_terminal_log(
f"Estimating Training Costs",
f"Searching for {len(missing_skills)} missing skills for {project_name}"
)
if log_callback:
log_callback("πŸ’° Training Cost Analysis", {
"project": project_name,
"missing_skills": len(missing_skills)
})
training_plans = []
try:
# Don't use WebSearchAgent - it has the same singleton callback issues
# Instead, use default estimates (fast and reliable)
_print_terminal_log("Using Default Estimates", "Skipping websearch to avoid callback issues")
# Use default estimates for all skills (reliable and fast)
for skill in missing_skills[:10]: # Limit to 10 skills
# Extract skill name from dict if needed
skill_name = skill.get('skill_name', skill) if isinstance(skill, dict) else skill
_print_terminal_log(f"Estimating: {skill_name}", "Using default estimates")
if log_callback:
log_callback(f"πŸ’° Estimating: {skill_name}", {"status": "Calculating training cost"})
training_plans.append({
"skill": skill_name,
"title": f"{skill_name} - Professional Training",
"cost": estimate_default_cost(skill_name),
"duration_hours": estimate_default_duration(skill_name),
"provider": "Estimated",
"source": "default"
})
_print_terminal_log("βœ… Cost Estimation Complete", f"Generated {len(training_plans)} training plans")
if log_callback:
total_cost = sum(plan['cost'] for plan in training_plans)
total_hours = sum(plan['duration_hours'] for plan in training_plans)
log_callback("βœ… Training Costs Calculated", {
"total_plans": len(training_plans),
"total_cost": f"${total_cost:,.2f}",
"total_hours": total_hours
})
return training_plans
except Exception as e:
_print_terminal_log(f"❌ ERROR: {type(e).__name__}", str(e))
if log_callback:
log_callback("❌ Training Cost Error", {"error": str(e)})
# Return default estimates for all skills
# Return default estimates
result = []
for skill in missing_skills[:10]:
skill_name = skill.get('skill_name', skill) if isinstance(skill, dict) else skill
result.append({
"skill": skill_name,
"title": f"{skill_name} - Professional Training",
"cost": estimate_default_cost(skill_name),
"duration_hours": estimate_default_duration(skill_name),
"provider": "Estimated",
"source": "default"
})
return result
def estimate_default_cost(skill: str) -> float:
"""Estimate default training cost based on skill complexity."""
skill_lower = skill.lower()
# High-complexity skills
if any(term in skill_lower for term in ['architect', 'senior', 'lead', 'advanced', 'expert']):
return 500.0
# Medium-complexity technical skills
elif any(term in skill_lower for term in ['programming', 'development', 'engineering', 'framework', 'platform']):
return 200.0
# Soft skills and basic technical skills
elif any(term in skill_lower for term in ['communication', 'management', 'leadership', 'teamwork', 'basic']):
return 100.0
# Default for other skills
else:
return 150.0
def estimate_default_duration(skill: str) -> int:
"""Estimate default training duration based on skill complexity."""
skill_lower = skill.lower()
# High-complexity skills
if any(term in skill_lower for term in ['architect', 'senior', 'lead', 'advanced', 'expert']):
return 40
# Medium-complexity technical skills
elif any(term in skill_lower for term in ['programming', 'development', 'engineering', 'framework', 'platform']):
return 20
# Soft skills and basic technical skills
elif any(term in skill_lower for term in ['communication', 'management', 'leadership', 'teamwork', 'basic']):
return 8
# Default for other skills
else:
return 16
def calculate_total_training_cost(matched_projects: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Calculate total training costs across all matched projects.
Args:
matched_projects: List of matched projects with training plans
Returns:
Dictionary with cost statistics
"""
if not matched_projects:
return {
"total_projects": 0,
"total_cost": 0.0,
"total_hours": 0,
"average_cost_per_project": 0.0
}
total_cost = 0.0
total_hours = 0
for project in matched_projects:
training_plans = project.get('training_plans', [])
for plan in training_plans:
total_cost += plan.get('cost', 0)
total_hours += plan.get('duration_hours', 0)
return {
"total_projects": len(matched_projects),
"total_cost": total_cost,
"total_hours": total_hours,
"average_cost_per_project": total_cost / len(matched_projects) if matched_projects else 0.0
}