Spaces:
Sleeping
Sleeping
File size: 8,269 Bytes
8b7ae7a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 |
import openai
import json
from pathlib import Path
from app.utils.logging import get_logger
from config import make_path, OPENAI_API_KEY
class VideoReport:
def __init__(self, video_path: str, openai_api_key: str = ""):
# β
Set OpenAI key (explicit or from environment)
if openai_api_key:
openai.api_key = openai_api_key
else:
import os
openai.api_key = os.getenv("OPENAI_API_KEY", "")
self.video_path = Path(video_path)
self.audio_json = make_path('processed/audio-analysis', video_path, 'audio_analysis', 'json')
self.frame_json = make_path('processed/frame-analysis', video_path, 'frame_analysis', 'json')
self.hook_json = make_path('processed/hook-analysis', video_path, 'hook_analysis', 'json')
self.output_json = make_path('reports', video_path, 'final_report', 'json')
log_filename = f'{self.video_path.stem}_log.txt'
self.logger = get_logger(name='video_report', log_file=log_filename)
self.audio_analysis = self.load_json(self.audio_json)
self.frame_analysis = self.load_json(self.frame_json)
self.hook_analysis = self.load_json(self.hook_json)
def load_json(self, path: Path):
try:
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
except Exception:
return {}
def extract_matrices(self):
return {
"tone": self.audio_analysis.get("tone", "unknown"),
"emotion": self.audio_analysis.get("emotion", "unknown"),
"pace": self.audio_analysis.get("pace", "unknown"),
"facial_sync": self.hook_analysis.get("facial_sync", "unknown")
}
def prepare_prompt(self) -> str:
prompt_sections = []
prompt_sections.append(f"""
You are an expert evaluator trained to assess the **virality potential and content quality** of short-form video ads (e.g., TikToks, Reels). You are provided with:
- A sequence of scene-selected **frames**
- A full **audio transcription**
- Detailed **audio statistics**
- And other meta-data of videos
Your task is to analyze the video and assign the **five scores** with weighted importance. Follow the criteria and format strictly.
---
### π― Scores to Judge (Each 0β100)
You must evaluate the following sub-categories:
- `hook`: Does the video grab attention in the first 3 seconds? A good hook is **surprising, emotional, funny, or visually intense**. A poor hook is **slow, random, or bland**.
- `visuals`: Are visuals high-resolution, diverse, and relevant to the message? Good visuals are **intentional and professionally framed**. Poor visuals are **static, noisy, or irrelevant**.
- `audio`: Is the audio clean, engaging, and well-synced? Quality audio has **clarity, proper levels, and supports the visuals**. Poor audio is **distracting, flat, or off-sync**.
- `engagement`: Does the video maintain interest? Strong pacing, emotional depth, or thought-provoking content improves this. Weak pacing or meaningless content hurts it.
- `visual_diversity`: Does the video use **multiple camera angles, transitions, or visual styles**? A lack of variation makes it feel stale.
---
### π Scoring Enforcement Guidelines
- Be **strict**: Low-effort content should fall well below 50
- Be **realistic**: Reward polish, creativity, clarity, and emotional impact
- Only videos with **clear intent and great execution** should reach 80+
- Penalize poor hooks, bland visuals, unclear audio, or meaningless structure
- Ensure your scores reflect meaningful differences between videos β **don't cluster everything around 60**
---
""")
if self.audio_analysis:
prompt_sections.append("Audio Analysis:\n" + json.dumps(self.audio_analysis, indent=2))
if self.frame_analysis:
prompt_sections.append("\nFrame Analysis:\n" + json.dumps(self.frame_analysis, indent=2))
if self.hook_analysis:
prompt_sections.append("\nHook Alignment Analysis:\n" + json.dumps(self.hook_analysis, indent=2))
matrices = self.extract_matrices()
prompt_sections.append("\nHere are extracted behavioral/performance matrices:\n" + json.dumps(matrices, indent=2))
prompt_sections.append(f"""
### π€ Output Format (JSON Only β No Comments or Explanations):
{{
"video_name": "{self.video_path.stem}",
"scores": {{
"hook": 0,
"visuals": 0,
"audio": 0,
"engagement": 0,
"visual_diversity": 0
}},
"matrices": {{
"tone": "",
"emotion": "",
"pace": "",
"facial_sync": ""
}},
"summary": "",
"suggestions": [
"Specific improvement 1",
"Specific improvement 2",
"Specific improvement 3",
... more if required
]
}}
""")
return "\n".join(prompt_sections)
def query_llm(self, prompt: str) -> dict:
try:
response = openai.chat.completions.create(
model='gpt-4o',
messages=[
{"role": "system", "content": "You are a professional short-video quality evaluator."},
{"role": "user", "content": prompt}
],
temperature=0.4,
)
reply = response.choices[0].message.content.strip()
cleaned = reply.replace('```json', '').replace('```', '')
result = json.loads(cleaned)
return result
except Exception as e:
self.logger.error(f"LLM generation failed: {e}")
return {
"scores": {
"hook": 0,
"visuals": 0,
"audio": 0,
"engagement": 0,
"visual_diversity": 0
},
"matrices": self.extract_matrices(),
"summary": "Failed to generate report.",
"suggestions": ["Try again", "Check input files", "Verify OpenAI key"]
}
def compute_virality_score(self, result):
weights = {
'hook': 0.18,
'visuals': 0.20,
'audio': 0.25,
'engagement': 0.27,
'visual_diversity': 0.10
}
sub_scores = result["scores"]
base_score = sum(sub_scores[key] * weights[key] for key in weights)
bonus = 0
matrices = result.get("matrices", {})
if matrices.get("emotion") in ["joy", "inspiration"]:
bonus += 6
if matrices.get("tone") in ["funny", "relatable"]:
bonus += 6
if matrices.get("facial_sync") in ["ok", "good"]:
bonus += 4
if sub_scores.get("hook", 0) <= 30:
bonus -= 6
if sub_scores.get("audio", 0) < 40:
bonus -= 5
if matrices.get("facial_sync") == "none":
bonus -= 5
final_score = max(0, min(100, int(base_score + bonus)))
return final_score
def generate(self) -> dict:
self.logger.info("Preparing prompt for LLM...")
prompt = self.prepare_prompt()
self.logger.info("Querying LLM for report generation...")
result = self.query_llm(prompt)
total_score = self.compute_virality_score(result)
final_output = {
"video_name": self.video_path.stem,
"total_score": total_score,
**result
}
self.logger.info("Saving final report...")
self.output_json.parent.mkdir(parents=True, exist_ok=True)
with open(self.output_json, 'w', encoding='utf-8') as f:
json.dump(final_output, f, indent=2)
self.logger.info("Report successfully generated at %s", self.output_json)
return final_output
|