VITRA / data /tools /utils_moge.py
arnoldland's picture
Initial commit
aae3ba1
import cv2
import numpy as np
import torch
from thirdparty.MoGe.moge.model.v2 import MoGeModel as MoGeModelV2
class MogePipeline:
"""
Inference pipeline for MoGeModelV2 to estimate the horizontal Field of View (FoV).
"""
def __init__(
self,
model_name: str = "Ruicheng/moge-2-vitl",
device: torch.device = torch.device("cuda")
):
"""
Initializes the pipeline and loads the MoGe model.
Args:
model_name (str): Path or name of the pre-trained MoGe model.
device (torch.device): Device to load the model onto (e.g., 'cuda').
"""
self.device = device
self.model = MoGeModelV2.from_pretrained(model_name).to(device)
def infer(self, input_image: np.ndarray) -> float:
"""
Performs inference to estimate the horizontal FoV from an image.
Args:
input_image (np.ndarray): The input image (H, W, 3) in BGR format.
Returns:
float: The estimated horizontal FoV in degrees.
"""
input_image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
input_tensor = torch.tensor(
input_image_rgb / 255.0,
dtype=torch.float32,
device=self.device
).permute(2, 0, 1)
# Run model inference (resolution_level=1 with minimal token usage)
output = self.model.infer(input_tensor, resolution_level=1)
intrinsics = output['intrinsics'].cpu().numpy()
# Calculate horizontal FoV: FoV_x = 2 * arctan(cx / fx)
fov_x_rad = 2 * np.arctan(intrinsics[0, 2] / intrinsics[0, 0])
fov_x_deg = np.rad2deg(fov_x_rad)
return fov_x_deg