Spaces:

MCP-1st-Birthday
/

EastSync-AI

Running

EastSync-AI / utils /cv_training_cost.py

Daniel Tatar

Cv reader + matching project (#13)

07273d8 15 days ago

6.63 kB

	"""Training cost estimation for CV project matching."""

	from __future__ import annotations

	from typing import Any, Dict, List
	from smolagents import CodeAgent
	from LLM.llm_models import websearch_model
	from agents.websearch_agent import WebSearchAgent


	def _print_terminal_log(action: str, details: str = ""):
	"""Print formatted log to terminal."""
	timestamp = __import__('datetime').datetime.now().strftime("%H:%M:%S")
	if details:
	print(f"[{timestamp}] [TRAINING COST] {action} :: {details}")
	else:
	print(f"[{timestamp}] [TRAINING COST] {action}")


	def estimate_training_costs_for_skills(
	missing_skills: List[str],
	project_name: str,
	log_callback=None
	) -> List[Dict[str, Any]]:
	"""
	Estimate training costs for missing skills using websearch.
	Uses the same logic as project analysis training plan generation.

	Args:
	missing_skills: List of skill names that candidate is missing
	project_name: Name of the project (for context)
	log_callback: Optional callback for logging

	Returns:
	List of training plans with costs
	"""
	if not missing_skills:
	return []

	_print_terminal_log(
	f"Estimating Training Costs",
	f"Searching for {len(missing_skills)} missing skills for {project_name}"
	)

	if log_callback:
	log_callback("💰 Training Cost Analysis", {
	"project": project_name,
	"missing_skills": len(missing_skills)
	})

	training_plans = []

	try:
	# Don't use WebSearchAgent - it has the same singleton callback issues
	# Instead, use default estimates (fast and reliable)
	_print_terminal_log("Using Default Estimates", "Skipping websearch to avoid callback issues")

	# Use default estimates for all skills (reliable and fast)
	for skill in missing_skills[:10]: # Limit to 10 skills
	# Extract skill name from dict if needed
	skill_name = skill.get('skill_name', skill) if isinstance(skill, dict) else skill

	_print_terminal_log(f"Estimating: {skill_name}", "Using default estimates")

	if log_callback:
	log_callback(f"💰 Estimating: {skill_name}", {"status": "Calculating training cost"})

	training_plans.append({
	"skill": skill_name,
	"title": f"{skill_name} - Professional Training",
	"cost": estimate_default_cost(skill_name),
	"duration_hours": estimate_default_duration(skill_name),
	"provider": "Estimated",
	"source": "default"
	})

	_print_terminal_log("✅ Cost Estimation Complete", f"Generated {len(training_plans)} training plans")

	if log_callback:
	total_cost = sum(plan['cost'] for plan in training_plans)
	total_hours = sum(plan['duration_hours'] for plan in training_plans)
	log_callback("✅ Training Costs Calculated", {
	"total_plans": len(training_plans),
	"total_cost": f"${total_cost:,.2f}",
	"total_hours": total_hours
	})

	return training_plans

	except Exception as e:
	_print_terminal_log(f"❌ ERROR: {type(e).__name__}", str(e))
	if log_callback:
	log_callback("❌ Training Cost Error", {"error": str(e)})

	# Return default estimates for all skills
	# Return default estimates
	result = []
	for skill in missing_skills[:10]:
	skill_name = skill.get('skill_name', skill) if isinstance(skill, dict) else skill
	result.append({
	"skill": skill_name,
	"title": f"{skill_name} - Professional Training",
	"cost": estimate_default_cost(skill_name),
	"duration_hours": estimate_default_duration(skill_name),
	"provider": "Estimated",
	"source": "default"
	})
	return result


	def estimate_default_cost(skill: str) -> float:
	"""Estimate default training cost based on skill complexity."""
	skill_lower = skill.lower()

	# High-complexity skills
	if any(term in skill_lower for term in ['architect', 'senior', 'lead', 'advanced', 'expert']):
	return 500.0

	# Medium-complexity technical skills
	elif any(term in skill_lower for term in ['programming', 'development', 'engineering', 'framework', 'platform']):
	return 200.0

	# Soft skills and basic technical skills
	elif any(term in skill_lower for term in ['communication', 'management', 'leadership', 'teamwork', 'basic']):
	return 100.0

	# Default for other skills
	else:
	return 150.0


	def estimate_default_duration(skill: str) -> int:
	"""Estimate default training duration based on skill complexity."""
	skill_lower = skill.lower()

	# High-complexity skills
	if any(term in skill_lower for term in ['architect', 'senior', 'lead', 'advanced', 'expert']):
	return 40

	# Medium-complexity technical skills
	elif any(term in skill_lower for term in ['programming', 'development', 'engineering', 'framework', 'platform']):
	return 20

	# Soft skills and basic technical skills
	elif any(term in skill_lower for term in ['communication', 'management', 'leadership', 'teamwork', 'basic']):
	return 8

	# Default for other skills
	else:
	return 16


	def calculate_total_training_cost(matched_projects: List[Dict[str, Any]]) -> Dict[str, Any]:
	"""
	Calculate total training costs across all matched projects.

	Args:
	matched_projects: List of matched projects with training plans

	Returns:
	Dictionary with cost statistics
	"""
	if not matched_projects:
	return {
	"total_projects": 0,
	"total_cost": 0.0,
	"total_hours": 0,
	"average_cost_per_project": 0.0
	}

	total_cost = 0.0
	total_hours = 0

	for project in matched_projects:
	training_plans = project.get('training_plans', [])
	for plan in training_plans:
	total_cost += plan.get('cost', 0)
	total_hours += plan.get('duration_hours', 0)

	return {
	"total_projects": len(matched_projects),
	"total_cost": total_cost,
	"total_hours": total_hours,
	"average_cost_per_project": total_cost / len(matched_projects) if matched_projects else 0.0
	}