Spaces:

amro-alasri
/

anycoder-ca9149bd

Runtime error

App Files Files Community

anycoder-ca9149bd / depth_anything_3 /app /modules /visualization.py

amro-alasri

Upload folder using huggingface_hub

e669ccc verified 26 days ago

raw

history blame contribute delete

17.1 kB

	# Copyright (c) 2025 ByteDance Ltd. and/or its affiliates
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""
	Visualization module for Depth Anything 3 Gradio app.

	This module handles visualization updates, navigation, and measurement functionality.
	"""

	import os
	from typing import Any, Dict, List, Optional, Tuple
	import cv2
	import gradio as gr
	import numpy as np


	class VisualizationHandler:
	"""
	Handles visualization updates and navigation for the Gradio app.
	"""

	def __init__(self):
	"""Initialize the visualization handler."""

	def update_view_selectors(
	self, processed_data: Optional[Dict[int, Dict[str, Any]]]
	) -> Tuple[gr.Dropdown, gr.Dropdown]:
	"""
	Update view selector dropdowns based on available views.

	Args:
	processed_data: Processed data dictionary

	Returns:
	Tuple of (depth_view_selector, measure_view_selector)
	"""
	if processed_data is None or len(processed_data) == 0:
	choices = ["View 1"]
	else:
	num_views = len(processed_data)
	choices = [f"View {i + 1}" for i in range(num_views)]

	return (
	gr.Dropdown(choices=choices, value=choices[0]), # depth_view_selector
	gr.Dropdown(choices=choices, value=choices[0]), # measure_view_selector
	)

	def get_view_data_by_index(
	self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
	) -> Optional[Dict[str, Any]]:
	"""
	Get view data by index, handling bounds.

	Args:
	processed_data: Processed data dictionary
	view_index: Index of the view to get

	Returns:
	View data dictionary or None
	"""
	if processed_data is None or len(processed_data) == 0:
	return None

	view_keys = list(processed_data.keys())
	if view_index < 0 or view_index >= len(view_keys):
	view_index = 0

	return processed_data[view_keys[view_index]]

	def update_depth_view(
	self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
	) -> Optional[str]:
	"""
	Update depth view for a specific view index.

	Args:
	processed_data: Processed data dictionary
	view_index: Index of the view to update

	Returns:
	Path to depth visualization image or None
	"""
	view_data = self.get_view_data_by_index(processed_data, view_index)
	if view_data is None or view_data.get("depth_image") is None:
	return None

	# Return the depth visualization image directly
	return view_data["depth_image"]

	def navigate_depth_view(
	self,
	processed_data: Optional[Dict[int, Dict[str, Any]]],
	current_selector_value: str,
	direction: int,
	) -> Tuple[str, Optional[str]]:
	"""
	Navigate depth view (direction: -1 for previous, +1 for next).

	Args:
	processed_data: Processed data dictionary
	current_selector_value: Current selector value
	direction: Direction to navigate (-1 for previous, +1 for next)

	Returns:
	Tuple of (new_selector_value, depth_vis)
	"""
	if processed_data is None or len(processed_data) == 0:
	return "View 1", None

	# Parse current view number
	try:
	current_view = int(current_selector_value.split()[1]) - 1
	except: # noqa
	current_view = 0

	num_views = len(processed_data)
	new_view = (current_view + direction) % num_views

	new_selector_value = f"View {new_view + 1}"
	depth_vis = self.update_depth_view(processed_data, new_view)

	return new_selector_value, depth_vis

	def update_measure_view(
	self, processed_data: Optional[Dict[int, Dict[str, Any]]], view_index: int
	) -> Tuple[Optional[np.ndarray], Optional[np.ndarray], List]:
	"""
	Update measure view for a specific view index.

	Args:
	processed_data: Processed data dictionary
	view_index: Index of the view to update

	Returns:
	Tuple of (measure_image, depth_right_half, measure_points)
	"""
	view_data = self.get_view_data_by_index(processed_data, view_index)
	if view_data is None:
	return None, None, [] # image, depth_right_half, measure_points

	# Get the processed (resized) image
	if "image" in view_data and view_data["image"] is not None:
	image = view_data["image"].copy()
	else:
	return None, None, []

	# Ensure image is in uint8 format
	if image.dtype != np.uint8:
	if image.max() <= 1.0:
	image = (image * 255).astype(np.uint8)
	else:
	image = image.astype(np.uint8)

	# Extract right half of the depth visualization (pure depth part)
	depth_image_path = view_data.get("depth_image", None)
	depth_right_half = None

	if depth_image_path and os.path.exists(depth_image_path):
	try:
	# Load the combined depth visualization image
	depth_combined = cv2.imread(depth_image_path)
	depth_combined = cv2.cvtColor(depth_combined, cv2.COLOR_BGR2RGB)
	if depth_combined is not None:
	height, width = depth_combined.shape[:2]
	# Extract right half (depth visualization part)
	depth_right_half = depth_combined[:, width // 2 :]
	except Exception as e:
	print(f"Error extracting depth right half: {e}")

	return image, depth_right_half, []

	def navigate_measure_view(
	self,
	processed_data: Optional[Dict[int, Dict[str, Any]]],
	current_selector_value: str,
	direction: int,
	) -> Tuple[str, Optional[np.ndarray], Optional[str], List]:
	"""
	Navigate measure view (direction: -1 for previous, +1 for next).

	Args:
	processed_data: Processed data dictionary
	current_selector_value: Current selector value
	direction: Direction to navigate (-1 for previous, +1 for next)

	Returns:
	Tuple of (new_selector_value, measure_image, depth_image_path, measure_points)
	"""
	if processed_data is None or len(processed_data) == 0:
	return "View 1", None, None, []

	# Parse current view number
	try:
	current_view = int(current_selector_value.split()[1]) - 1
	except: # noqa
	current_view = 0

	num_views = len(processed_data)
	new_view = (current_view + direction) % num_views

	new_selector_value = f"View {new_view + 1}"
	measure_image, depth_right_half, measure_points = self.update_measure_view(
	processed_data, new_view
	)

	return new_selector_value, measure_image, depth_right_half, measure_points

	def populate_visualization_tabs(
	self, processed_data: Optional[Dict[int, Dict[str, Any]]]
	) -> Tuple[Optional[str], Optional[np.ndarray], Optional[str], List]:
	"""
	Populate the depth and measure tabs with processed data.

	Args:
	processed_data: Processed data dictionary

	Returns:
	Tuple of (depth_vis, measure_img, depth_image_path, measure_points)
	"""
	if processed_data is None or len(processed_data) == 0:
	return None, None, None, []

	# Use update function to get depth visualization
	depth_vis = self.update_depth_view(processed_data, 0)
	measure_img, depth_right_half, _ = self.update_measure_view(processed_data, 0)

	return depth_vis, measure_img, depth_right_half, []

	def reset_measure(
	self, processed_data: Optional[Dict[int, Dict[str, Any]]]
	) -> Tuple[Optional[np.ndarray], List, str]:
	"""
	Reset measure points.

	Args:
	processed_data: Processed data dictionary

	Returns:
	Tuple of (image, measure_points, text)
	"""
	if processed_data is None or len(processed_data) == 0:
	return None, [], ""

	# Return the first view image
	first_view = list(processed_data.values())[0]
	return first_view["image"], [], ""

	def measure(
	self,
	processed_data: Optional[Dict[int, Dict[str, Any]]],
	measure_points: List,
	current_view_selector: str,
	event: gr.SelectData,
	) -> List:
	"""
	Handle measurement on images.

	Args:
	processed_data: Processed data dictionary
	measure_points: List of current measure points
	current_view_selector: Current view selector value
	event: Gradio select event

	Returns:
	List of [image, depth_right_half, measure_points, text]
	"""
	try:
	print(f"Measure function called with selector: {current_view_selector}")

	if processed_data is None or len(processed_data) == 0:
	return [None, [], "No data available"]

	# Use the currently selected view instead of always using the first view
	try:
	current_view_index = int(current_view_selector.split()[1]) - 1
	except: # noqa
	current_view_index = 0

	print(f"Using view index: {current_view_index}")

	# Get view data safely
	if current_view_index < 0 or current_view_index >= len(processed_data):
	current_view_index = 0

	view_keys = list(processed_data.keys())
	current_view = processed_data[view_keys[current_view_index]]

	if current_view is None:
	return [None, [], "No view data available"]

	point2d = event.index[0], event.index[1]
	print(f"Clicked point: {point2d}")

	measure_points.append(point2d)

	# Get image and depth visualization
	image, depth_right_half, _ = self.update_measure_view(
	processed_data, current_view_index
	)
	if image is None:
	return [None, [], "No image available"]

	image = image.copy()

	# Ensure image is in uint8 format for proper cv2 operations
	try:
	if image.dtype != np.uint8:
	if image.max() <= 1.0:
	# Image is in [0, 1] range, convert to [0, 255]
	image = (image * 255).astype(np.uint8)
	else:
	# Image is already in [0, 255] range
	image = image.astype(np.uint8)
	except Exception as e:
	print(f"Image conversion error: {e}")
	return [None, [], f"Image conversion error: {e}"]

	# Draw circles for points
	try:
	for p in measure_points:
	if 0 <= p[0] < image.shape[1] and 0 <= p[1] < image.shape[0]:
	image = cv2.circle(image, p, radius=5, color=(255, 0, 0), thickness=2)
	except Exception as e:
	print(f"Drawing error: {e}")
	return [None, [], f"Drawing error: {e}"]

	# Get depth information from processed_data
	depth_text = ""
	try:
	for i, p in enumerate(measure_points):
	if (
	current_view["depth"] is not None
	and 0 <= p[1] < current_view["depth"].shape[0]
	and 0 <= p[0] < current_view["depth"].shape[1]
	):
	d = current_view["depth"][p[1], p[0]]
	depth_text += f"- P{i + 1} depth: {d:.2f}m\n"
	else:
	depth_text += f"- P{i + 1}: Click position ({p[0]}, {p[1]}) - No depth information\n" # noqa: E501
	except Exception as e:
	print(f"Depth text error: {e}")
	depth_text = f"Error computing depth: {e}\n"

	if len(measure_points) == 2:
	try:
	point1, point2 = measure_points
	# Draw line
	if (
	0 <= point1[0] < image.shape[1]
	and 0 <= point1[1] < image.shape[0]
	and 0 <= point2[0] < image.shape[1]
	and 0 <= point2[1] < image.shape[0]
	):
	image = cv2.line(image, point1, point2, color=(255, 0, 0), thickness=2)

	# Compute 3D distance using depth information and camera intrinsics
	distance_text = "- Distance: Unable to calculate 3D distance"
	if (
	current_view["depth"] is not None
	and 0 <= point1[1] < current_view["depth"].shape[0]
	and 0 <= point1[0] < current_view["depth"].shape[1]
	and 0 <= point2[1] < current_view["depth"].shape[0]
	and 0 <= point2[0] < current_view["depth"].shape[1]
	):
	try:
	# Get depth values at the two points
	d1 = current_view["depth"][point1[1], point1[0]]
	d2 = current_view["depth"][point2[1], point2[0]]

	# Convert 2D pixel coordinates to 3D world coordinates
	if current_view["intrinsics"] is not None:
	# Get camera intrinsics
	K = current_view["intrinsics"] # 3x3 intrinsic matrix
	fx, fy = K[0, 0], K[1, 1] # focal lengths
	cx, cy = K[0, 2], K[1, 2] # principal point

	# Convert pixel coordinates to normalized camera coordinates
	# Point 1: (u1, v1) -> (x1, y1, z1)
	u1, v1 = point1[0], point1[1]
	x1 = (u1 - cx) * d1 / fx
	y1 = (v1 - cy) * d1 / fy
	z1 = d1

	# Point 2: (u2, v2) -> (x2, y2, z2)
	u2, v2 = point2[0], point2[1]
	x2 = (u2 - cx) * d2 / fx
	y2 = (v2 - cy) * d2 / fy
	z2 = d2

	# Calculate 3D Euclidean distance
	p1_3d = np.array([x1, y1, z1])
	p2_3d = np.array([x2, y2, z2])
	distance_3d = np.linalg.norm(p1_3d - p2_3d)

	distance_text = f"- Distance: {distance_3d:.2f}m"
	else:
	# Fallback to simplified calculation if no intrinsics
	pixel_distance = np.sqrt(
	(point1[0] - point2[0]) 2 + (point1[1] - point2[1]) 2
	)
	avg_depth = (d1 + d2) / 2
	scale_factor = avg_depth / 1000 # Rough scaling factor
	estimated_3d_distance = pixel_distance * scale_factor
	distance_text = f"- Distance: {estimated_3d_distance:.2f}m (estimated, no intrinsics)" # noqa: E501

	except Exception as e:
	print(f"Distance computation error: {e}")
	distance_text = f"- Distance computation error: {e}"

	measure_points = []
	text = depth_text + distance_text
	print(f"Measurement complete: {text}")
	return [image, depth_right_half, measure_points, text]
	except Exception as e:
	print(f"Final measurement error: {e}")
	return [None, [], f"Measurement error: {e}"]
	else:
	print(f"Single point measurement: {depth_text}")
	return [image, depth_right_half, measure_points, depth_text]

	except Exception as e:
	print(f"Overall measure function error: {e}")
	return [None, [], f"Measure function error: {e}"]