amro-alasri's picture
Upload folder using huggingface_hub
e669ccc verified
# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Visualization module for Depth Anything 3 Gradio app.
This module handles visualization updates, navigation, and measurement functionality.
"""
import os
from typing import Any, Dict, List, Optional, Tuple
import cv2
import gradio as gr
import numpy as np
class VisualizationHandler:
"""
Handles visualization updates and navigation for the Gradio app.
"""
def __init__(self):
"""Initialize the visualization handler."""
def update_view_selectors(
self, processed_data: Optional[Dict[int, Dict[str, Any]]]
) -> Tuple[gr.Dropdown, gr.Dropdown]:
"""
Update view selector dropdowns based on available views.
Args:
processed_data: Processed data dictionary
Returns:
Tuple of (depth_view_selector, measure_view_selector)
"""
if processed_data is None or len(processed_data) == 0:
choices = ["View 1"]
else:
num_views = len(processed_data)
choices = [f"View {i + 1}" for i in range(num_views)]
return (
gr.Dropdown(choices=choices, value=choices[0]), # depth_view_selector
gr.Dropdown(choices=choices, value=choices[0]), # measure_view_selector
)
def get_view_data_by_index(
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
) -> Optional[Dict[str, Any]]:
"""
Get view data by index, handling bounds.
Args:
processed_data: Processed data dictionary
view_index: Index of the view to get
Returns:
View data dictionary or None
"""
if processed_data is None or len(processed_data) == 0:
return None
view_keys = list(processed_data.keys())
if view_index < 0 or view_index >= len(view_keys):
view_index = 0
return processed_data[view_keys[view_index]]
def update_depth_view(
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
) -> Optional[str]:
"""
Update depth view for a specific view index.
Args:
processed_data: Processed data dictionary
view_index: Index of the view to update
Returns:
Path to depth visualization image or None
"""
view_data = self.get_view_data_by_index(processed_data, view_index)
if view_data is None or view_data.get("depth_image") is None:
return None
# Return the depth visualization image directly
return view_data["depth_image"]
def navigate_depth_view(
self,
processed_data: Optional[Dict[int, Dict[str, Any]]],
current_selector_value: str,
direction: int,
) -> Tuple[str, Optional[str]]:
"""
Navigate depth view (direction: -1 for previous, +1 for next).
Args:
processed_data: Processed data dictionary
current_selector_value: Current selector value
direction: Direction to navigate (-1 for previous, +1 for next)
Returns:
Tuple of (new_selector_value, depth_vis)
"""
if processed_data is None or len(processed_data) == 0:
return "View 1", None
# Parse current view number
try:
current_view = int(current_selector_value.split()[1]) - 1
except: # noqa
current_view = 0
num_views = len(processed_data)
new_view = (current_view + direction) % num_views
new_selector_value = f"View {new_view + 1}"
depth_vis = self.update_depth_view(processed_data, new_view)
return new_selector_value, depth_vis
def update_measure_view(
self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List]:
"""
Update measure view for a specific view index.
Args:
processed_data: Processed data dictionary
view_index: Index of the view to update
Returns:
Tuple of (measure_image, depth_right_half, measure_points)
"""
view_data = self.get_view_data_by_index(processed_data, view_index)
if view_data is None:
return None, None, [] # image, depth_right_half, measure_points
# Get the processed (resized) image
if "image" in view_data and view_data["image"] is not None:
image = view_data["image"].copy()
else:
return None, None, []
# Ensure image is in uint8 format
if image.dtype != np.uint8:
if image.max() <= 1.0:
image = (image * 255).astype(np.uint8)
else:
image = image.astype(np.uint8)
# Extract right half of the depth visualization (pure depth part)
depth_image_path = view_data.get("depth_image", None)
depth_right_half = None
if depth_image_path and os.path.exists(depth_image_path):
try:
# Load the combined depth visualization image
depth_combined = cv2.imread(depth_image_path)
depth_combined = cv2.cvtColor(depth_combined, cv2.COLOR_BGR2RGB)
if depth_combined is not None:
height, width = depth_combined.shape[:2]
# Extract right half (depth visualization part)
depth_right_half = depth_combined[:, width // 2 :]
except Exception as e:
print(f"Error extracting depth right half: {e}")
return image, depth_right_half, []
def navigate_measure_view(
self,
processed_data: Optional[Dict[int, Dict[str, Any]]],
current_selector_value: str,
direction: int,
) -> Tuple[str, Optional[np.ndarray], Optional[str], List]:
"""
Navigate measure view (direction: -1 for previous, +1 for next).
Args:
processed_data: Processed data dictionary
current_selector_value: Current selector value
direction: Direction to navigate (-1 for previous, +1 for next)
Returns:
Tuple of (new_selector_value, measure_image, depth_image_path, measure_points)
"""
if processed_data is None or len(processed_data) == 0:
return "View 1", None, None, []
# Parse current view number
try:
current_view = int(current_selector_value.split()[1]) - 1
except: # noqa
current_view = 0
num_views = len(processed_data)
new_view = (current_view + direction) % num_views
new_selector_value = f"View {new_view + 1}"
measure_image, depth_right_half, measure_points = self.update_measure_view(
processed_data, new_view
)
return new_selector_value, measure_image, depth_right_half, measure_points
def populate_visualization_tabs(
self, processed_data: Optional[Dict[int, Dict[str, Any]]]
) -> Tuple[Optional[str], Optional[np.ndarray], Optional[str], List]:
"""
Populate the depth and measure tabs with processed data.
Args:
processed_data: Processed data dictionary
Returns:
Tuple of (depth_vis, measure_img, depth_image_path, measure_points)
"""
if processed_data is None or len(processed_data) == 0:
return None, None, None, []
# Use update function to get depth visualization
depth_vis = self.update_depth_view(processed_data, 0)
measure_img, depth_right_half, _ = self.update_measure_view(processed_data, 0)
return depth_vis, measure_img, depth_right_half, []
def reset_measure(
self, processed_data: Optional[Dict[int, Dict[str, Any]]]
) -> Tuple[Optional[np.ndarray], List, str]:
"""
Reset measure points.
Args:
processed_data: Processed data dictionary
Returns:
Tuple of (image, measure_points, text)
"""
if processed_data is None or len(processed_data) == 0:
return None, [], ""
# Return the first view image
first_view = list(processed_data.values())[0]
return first_view["image"], [], ""
def measure(
self,
processed_data: Optional[Dict[int, Dict[str, Any]]],
measure_points: List,
current_view_selector: str,
event: gr.SelectData,
) -> List:
"""
Handle measurement on images.
Args:
processed_data: Processed data dictionary
measure_points: List of current measure points
current_view_selector: Current view selector value
event: Gradio select event
Returns:
List of [image, depth_right_half, measure_points, text]
"""
try:
print(f"Measure function called with selector: {current_view_selector}")
if processed_data is None or len(processed_data) == 0:
return [None, [], "No data available"]
# Use the currently selected view instead of always using the first view
try:
current_view_index = int(current_view_selector.split()[1]) - 1
except: # noqa
current_view_index = 0
print(f"Using view index: {current_view_index}")
# Get view data safely
if current_view_index < 0 or current_view_index >= len(processed_data):
current_view_index = 0
view_keys = list(processed_data.keys())
current_view = processed_data[view_keys[current_view_index]]
if current_view is None:
return [None, [], "No view data available"]
point2d = event.index[0], event.index[1]
print(f"Clicked point: {point2d}")
measure_points.append(point2d)
# Get image and depth visualization
image, depth_right_half, _ = self.update_measure_view(
processed_data, current_view_index
)
if image is None:
return [None, [], "No image available"]
image = image.copy()
# Ensure image is in uint8 format for proper cv2 operations
try:
if image.dtype != np.uint8:
if image.max() <= 1.0:
# Image is in [0, 1] range, convert to [0, 255]
image = (image * 255).astype(np.uint8)
else:
# Image is already in [0, 255] range
image = image.astype(np.uint8)
except Exception as e:
print(f"Image conversion error: {e}")
return [None, [], f"Image conversion error: {e}"]
# Draw circles for points
try:
for p in measure_points:
if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2)
except Exception as e:
print(f"Drawing error: {e}")
return [None, [], f"Drawing error: {e}"]
# Get depth information from processed_data
depth_text = ""
try:
for i, p in enumerate(measure_points):
if (
current_view["depth"] is not None
and 0 <= p[1] < current_view["depth"].shape[0]
and 0 <= p[0] < current_view["depth"].shape[1]
):
d = current_view["depth"][p[1], p[0]]
depth_text += f"- **P{i + 1} depth: {d:.2f}m**\n"
else:
depth_text += f"- **P{i + 1}: Click position ({p[0]}, {p[1]}) - No depth information**\n" # noqa: E501
except Exception as e:
print(f"Depth text error: {e}")
depth_text = f"Error computing depth: {e}\n"
if len(measure_points) == 2:
try:
point1, point2 = measure_points
# Draw line
if (
0 <= point1[0] < image.shape[1]
and 0 <= point1[1] < image.shape[0]
and 0 <= point2[0] < image.shape[1]
and 0 <= point2[1] < image.shape[0]
):
image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2)
# Compute 3D distance using depth information and camera intrinsics
distance_text = "- **Distance: Unable to calculate 3D distance**"
if (
current_view["depth"] is not None
and 0 <= point1[1] < current_view["depth"].shape[0]
and 0 <= point1[0] < current_view["depth"].shape[1]
and 0 <= point2[1] < current_view["depth"].shape[0]
and 0 <= point2[0] < current_view["depth"].shape[1]
):
try:
# Get depth values at the two points
d1 = current_view["depth"][point1[1], point1[0]]
d2 = current_view["depth"][point2[1], point2[0]]
# Convert 2D pixel coordinates to 3D world coordinates
if current_view["intrinsics"] is not None:
# Get camera intrinsics
K = current_view["intrinsics"] # 3x3 intrinsic matrix
fx, fy = K[0, 0], K[1, 1] # focal lengths
cx, cy = K[0, 2], K[1, 2] # principal point
# Convert pixel coordinates to normalized camera coordinates
# Point 1: (u1, v1) -> (x1, y1, z1)
u1, v1 = point1[0], point1[1]
x1 = (u1 - cx) * d1 / fx
y1 = (v1 - cy) * d1 / fy
z1 = d1
# Point 2: (u2, v2) -> (x2, y2, z2)
u2, v2 = point2[0], point2[1]
x2 = (u2 - cx) * d2 / fx
y2 = (v2 - cy) * d2 / fy
z2 = d2
# Calculate 3D Euclidean distance
p1_3d = np.array([x1, y1, z1])
p2_3d = np.array([x2, y2, z2])
distance_3d = np.linalg.norm(p1_3d - p2_3d)
distance_text = f"- **Distance: {distance_3d:.2f}m**"
else:
# Fallback to simplified calculation if no intrinsics
pixel_distance = np.sqrt(
(point1[0] - point2[0]) ** 2 + (point1[1] - point2[1]) ** 2
)
avg_depth = (d1 + d2) / 2
scale_factor = avg_depth / 1000 # Rough scaling factor
estimated_3d_distance = pixel_distance * scale_factor
distance_text = f"- **Distance: {estimated_3d_distance:.2f}m (estimated, no intrinsics)**" # noqa: E501
except Exception as e:
print(f"Distance computation error: {e}")
distance_text = f"- **Distance computation error: {e}**"
measure_points = []
text = depth_text + distance_text
print(f"Measurement complete: {text}")
return [image, depth_right_half, measure_points, text]
except Exception as e:
print(f"Final measurement error: {e}")
return [None, [], f"Measurement error: {e}"]
else:
print(f"Single point measurement: {depth_text}")
return [image, depth_right_half, measure_points, depth_text]
except Exception as e:
print(f"Overall measure function error: {e}")
return [None, [], f"Measure function error: {e}"]