File size: 8,271 Bytes
ad2cb5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import openai
import json
from pathlib import Path
from files.utils.logging import get_logger
from config import make_path, OPENAI_API_KEY


class VideoReport:
    def __init__(self, video_path: str, openai_api_key: str = ""):
        # βœ… Set OpenAI key (explicit or from environment)
        if openai_api_key:
            openai.api_key = openai_api_key
        else:
            import os
            openai.api_key = os.getenv("OPENAI_API_KEY", "")
        self.video_path = Path(video_path)
        self.audio_json = make_path('processed/audio-analysis', video_path, 'audio_analysis', 'json')
        self.frame_json = make_path('processed/frame-analysis', video_path, 'frame_analysis', 'json')
        self.hook_json = make_path('processed/hook-analysis', video_path, 'hook_analysis', 'json')
        self.output_json = make_path('reports', video_path, 'final_report', 'json')

        log_filename = f'{self.video_path.stem}_log.txt'
        self.logger = get_logger(name='video_report', log_file=log_filename)

        self.audio_analysis = self.load_json(self.audio_json)
        self.frame_analysis = self.load_json(self.frame_json)
        self.hook_analysis = self.load_json(self.hook_json)

    def load_json(self, path: Path):
        try:
            with open(path, 'r', encoding='utf-8') as f:
                return json.load(f)
        except Exception:
            return {}

    def extract_matrices(self):
        return {
            "tone": self.audio_analysis.get("tone", "unknown"),
            "emotion": self.audio_analysis.get("emotion", "unknown"),
            "pace": self.audio_analysis.get("pace", "unknown"),
            "facial_sync": self.hook_analysis.get("facial_sync", "unknown")
        }

    def prepare_prompt(self) -> str:
        prompt_sections = []
        prompt_sections.append(f"""
        You are an expert evaluator trained to assess the **virality potential and content quality** of short-form video ads (e.g., TikToks, Reels). You are provided with:

        - A sequence of scene-selected **frames**
        - A full **audio transcription**
        - Detailed **audio statistics**
        - And other meta-data of videos
                                
        Your task is to analyze the video and assign the **five scores** with weighted importance. Follow the criteria and format strictly.
                                
        ---
                                
        ### 🎯 Scores to Judge (Each 0–100)

        You must evaluate the following sub-categories:

        - `hook`: Does the video grab attention in the first 3 seconds? A good hook is **surprising, emotional, funny, or visually intense**. A poor hook is **slow, random, or bland**.
        
        - `visuals`: Are visuals high-resolution, diverse, and relevant to the message? Good visuals are **intentional and professionally framed**. Poor visuals are **static, noisy, or irrelevant**.
        
        - `audio`: Is the audio clean, engaging, and well-synced? Quality audio has **clarity, proper levels, and supports the visuals**. Poor audio is **distracting, flat, or off-sync**.
        
        - `engagement`: Does the video maintain interest? Strong pacing, emotional depth, or thought-provoking content improves this. Weak pacing or meaningless content hurts it.
        
        - `visual_diversity`: Does the video use **multiple camera angles, transitions, or visual styles**? A lack of variation makes it feel stale.

        ---
                                
        ### πŸ“Œ Scoring Enforcement Guidelines

        - Be **strict**: Low-effort content should fall well below 50  
        - Be **realistic**: Reward polish, creativity, clarity, and emotional impact  
        - Only videos with **clear intent and great execution** should reach 80+  
        - Penalize poor hooks, bland visuals, unclear audio, or meaningless structure  
        - Ensure your scores reflect meaningful differences between videos β€” **don't cluster everything around 60**
        
        ---
        """)

        if self.audio_analysis:
            prompt_sections.append("Audio Analysis:\n" + json.dumps(self.audio_analysis, indent=2))
        if self.frame_analysis:
            prompt_sections.append("\nFrame Analysis:\n" + json.dumps(self.frame_analysis, indent=2))
        if self.hook_analysis:
            prompt_sections.append("\nHook Alignment Analysis:\n" + json.dumps(self.hook_analysis, indent=2))

        matrices = self.extract_matrices()
        prompt_sections.append("\nHere are extracted behavioral/performance matrices:\n" + json.dumps(matrices, indent=2))

        prompt_sections.append(f"""
        ### πŸ“€ Output Format (JSON Only β€” No Comments or Explanations):
        {{
        "video_name": "{self.video_path.stem}",
        "scores": {{
            "hook": 0,
            "visuals": 0,
            "audio": 0,
            "engagement": 0,
            "visual_diversity": 0
        }},
        "matrices": {{
            "tone": "",
            "emotion": "",
            "pace": "",
            "facial_sync": ""
        }},
        "summary": "",
        "suggestions": [
            "Specific improvement 1",
            "Specific improvement 2",
            "Specific improvement 3",
            ... more if required
        ]
        }}
        """)
        return "\n".join(prompt_sections)

    def query_llm(self, prompt: str) -> dict:
        try:
            response = openai.chat.completions.create(
                model='gpt-4o',
                messages=[
                    {"role": "system", "content": "You are a professional short-video quality evaluator."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.4,
            )
            reply = response.choices[0].message.content.strip()
            cleaned = reply.replace('```json', '').replace('```', '')
            result = json.loads(cleaned)
            return result
        except Exception as e:
            self.logger.error(f"LLM generation failed: {e}")
            return {
                "scores": {
                    "hook": 0,
                    "visuals": 0,
                    "audio": 0,
                    "engagement": 0,
                    "visual_diversity": 0
                },
                "matrices": self.extract_matrices(),
                "summary": "Failed to generate report.",
                "suggestions": ["Try again", "Check input files", "Verify OpenAI key"]
            }
    
    def compute_virality_score(self, result):
        weights = {
            'hook': 0.18,
            'visuals': 0.20,
            'audio': 0.25,
            'engagement': 0.27,
            'visual_diversity': 0.10
        }

        sub_scores = result["scores"]
        base_score = sum(sub_scores[key] * weights[key] for key in weights)

        bonus = 0
        matrices = result.get("matrices", {})

        if matrices.get("emotion") in ["joy", "inspiration"]:
            bonus += 6
        if matrices.get("tone") in ["funny", "relatable"]:
            bonus += 6
        if matrices.get("facial_sync") in ["ok", "good"]:
            bonus += 4

        if sub_scores.get("hook", 0) <= 30:
            bonus -= 6
        if sub_scores.get("audio", 0) < 40:
            bonus -= 5
        if matrices.get("facial_sync") == "none":
            bonus -= 5

        final_score = max(0, min(100, int(base_score + bonus)))
        return final_score

    def generate(self) -> dict:
        self.logger.info("Preparing prompt for LLM...")
        prompt = self.prepare_prompt()

        self.logger.info("Querying LLM for report generation...")
        result = self.query_llm(prompt)
        total_score = self.compute_virality_score(result)
        final_output = {
            "video_name": self.video_path.stem,
            "total_score":  total_score,
            **result  
        }
        self.logger.info("Saving final report...")
        self.output_json.parent.mkdir(parents=True, exist_ok=True)
        with open(self.output_json, 'w', encoding='utf-8') as f:
            json.dump(final_output, f, indent=2)

        self.logger.info("Report successfully generated at %s", self.output_json)
        return final_output