Spaces:

mateenahmed
/

Video_Virality

Sleeping

Video_Virality / app /pipeline /frame_analysis.py

github-actions[bot]

Automated deployment from GitHub Actions

8b7ae7a about 1 month ago

7.57 kB

	import os
	import re
	import json
	import base64
	import openai
	from pathlib import Path
	import google.generativeai as genai
	from app.utils.logging import get_logger
	from config import make_path, OPENAI_API_KEY, GEMINI_API_KEY, DATA_DIR


	class FrameAnalyzer:
	def __init__(self, video_path: str, openai_api_key: str = "", save_dir: str = 'processed/frame-analysis'):
	# ✅ Set OpenAI key (explicit or from environment)

	# print(openai_api_key)

	if openai_api_key:
	openai.api_key = openai_api_key
	else:
	import os
	openai.api_key = os.getenv("OPENAI_API_KEY")

	self.video_path = Path(video_path)
	self.frames_dir = DATA_DIR / 'interim' / 'frames' / f'{self.video_path.stem}_'
	self.save_path = make_path(save_dir, video_path, 'frame_analysis', 'json')
	self.save_path.parent.mkdir(parents=True, exist_ok=True)

	log_file = f'{self.video_path.stem}_log.txt'
	self.logger = get_logger('frame_analysis', log_file)

	@staticmethod
	def encode_image(path: Path) -> str:
	with open(path, 'rb') as f:
	return base64.b64encode(f.read()).decode('utf-8')

	@staticmethod
	def extract_json(text: str) -> dict:
	try:
	return json.loads(text)
	except json.JSONDecodeError:
	pass

	match = re.search(r'```json\s(\{.?\})\s*```', text, re.DOTALL)
	if match:
	return json.loads(match.group(1))

	match = re.search(r'(\{.*?\})', text, re.DOTALL)
	if match:
	return json.loads(match.group(1))

	raise ValueError('No valid JSON found in GPT response')

	def gpt_analyze(self, frame_path: Path, prev_path: Path, next_path: Path) -> dict:
	prompt = """
	You are an expert video content strategist. Analyze this video frame and surrounding context.
	Determine if the lighting is poor or intentionally low for creative reasons.

	Output JSON only:
	{
	lighting: 0-100,
	is_artistic_dark: true\|false,
	composition: 0-100,
	has_text: true\|false,
	text: "string",
	hook_strength: 0-100
	}
	"""

	images = [
	{'type': 'image_url', 'image_url': {'url': f'data:image/jpeg;base64,{self.encode_image(p)}'}}
	for p in [prev_path, frame_path, next_path] if p.exists()
	]

	response = openai.chat.completions.create(
	model='gpt-4o-mini',
	messages=[
	{'role': 'user', 'content': [{'type': 'text', 'text': prompt}] + images}
	],
	temperature=0.2,
	max_tokens=400,
	)
	return self.extract_json(response.choices[0].message.content)

	def analyze(self) -> dict:
	results = {}
	all_frames = sorted(self.frames_dir.glob('_scene_.jpg'))
	center_frames = [f for f in all_frames if '_prev' not in f.name and '_next' not in f.name]

	for frame in center_frames:
	prev = frame.with_name(frame.name.replace('.jpg', '_prev.jpg'))
	next_ = frame.with_name(frame.name.replace('.jpg', '_next.jpg'))

	self.logger.info('Analyzing frame: %s', frame.name)
	try:
	result = self.gpt_analyze(frame, prev, next_)
	results[frame.name] = result
	except Exception as e:
	self.logger.error('LLM analysis failed on %s: %s', frame.name, e)
	results[frame.name] = {'error': str(e)}

	with open(self.save_path, 'w', encoding='utf-8') as f:
	json.dump(results, f, indent=2)

	self.logger.info('Frame analysis saved to %s', self.save_path)
	return results

	class HookAnalyzer:
	def __init__(self, video_path: str, gemini_api_key: str = ""):
	self.video_path = Path(video_path)
	self.frames_dir = Path('data/interim/frames') / f'{self.video_path.stem}_'
	self.audio_json = make_path('processed/audio-analysis', video_path, 'audio_analysis', 'json')
	self.output_json = make_path('processed/hook-analysis', video_path, 'hook_analysis', 'json')
	self.logger = get_logger('hook_analysis', f'{self.video_path.stem}_log.txt')

	# ✅ Set Gemini key (explicit or from environment)
	if gemini_api_key:
	genai.configure(api_key=gemini_api_key)
	else:
	genai.configure(api_key=os.getenv("GEMINI_API_KEY", ""))
	self.model = genai.GenerativeModel('gemini-2.5-pro')

	def _encode_image(self, path: Path) -> bytes:
	with open(path, 'rb') as f:
	return f.read()

	def _load_audio_summary(self) -> dict:
	with open(self.audio_json, 'r', encoding='utf-8') as f:
	return json.load(f)

	def _gemini_hook_alignment(self, audio_summary: dict, frames: list[Path]) -> dict:
	parts = [{'mime_type': 'image/jpeg', 'data': self._encode_image(f)} for f in frames if f.exists()]
	text = f"""You are a virality analyst. Analyze the opening visuals and tone:
	- Does the audio mood match the expressions and visuals?
	- Are viewers likely to be hooked in the first few seconds?

	Audio Summary: {json.dumps(audio_summary)}

	Give JSON only:
	{{
	"hook_alignment_score": 0-100,
	"facial_sync": "good\|ok\|poor\|none",
	"comment": "short summary"
	}}"""

	try:
	response = self.model.generate_content([text] + parts)
	raw_text = getattr(response, 'text', '').strip()
	self.logger.debug("Gemini raw response: %s", raw_text)
	if not raw_text:
	raise ValueError("Gemini response was empty.")

	raw_text = (
	raw_text
	.replace('```json\n', '')
	.replace('\n```', '')
	.replace('```json', '')
	.replace('```', '')
	)

	return json.loads(raw_text)
	except json.JSONDecodeError as e:
	self.logger.error("❌ Failed to parse Gemini response as JSON: %s", e)
	self.logger.debug("Gemini response was: %r", getattr(response, 'text', '<<NO TEXT>>'))
	return {
	"hook_alignment_score": -1,
	"facial_sync": "none",
	"comment": "Invalid JSON response from Gemini"
	}
	except Exception as e:
	error_msg = str(e)
	self.logger.error("❌ Gemini API call failed: %s", e)

	# Check if it's an API key error - if so, raise it to stop the pipeline
	if any(keyword in error_msg.lower() for keyword in ["api_key", "invalid", "401", "403", "authentication", "unauthorized"]):
	raise ValueError(f"Invalid Gemini API key: {error_msg}") from e

	# For other errors, return defaults
	return {
	"hook_alignment_score": -1,
	"facial_sync": "none",
	"comment": f"Gemini API error: {error_msg}"
	}

	def analyze(self) -> dict:
	audio_summary = self._load_audio_summary()
	frames = sorted(self.frames_dir.glob('_scene_.jpg'))[:3]
	result = self._gemini_hook_alignment(audio_summary, frames)

	self.output_json.parent.mkdir(parents=True, exist_ok=True)
	with open(self.output_json, 'w', encoding='utf-8') as f:
	json.dump(result, f, indent=2)

	self.logger.info('Hook analysis saved to %s', self.output_json)
	return result