|
|
import cv2 |
|
|
import numpy as np |
|
|
import torch |
|
|
|
|
|
from thirdparty.MoGe.moge.model.v2 import MoGeModel as MoGeModelV2 |
|
|
|
|
|
|
|
|
class MogePipeline: |
|
|
""" |
|
|
Inference pipeline for MoGeModelV2 to estimate the horizontal Field of View (FoV). |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
model_name: str = "Ruicheng/moge-2-vitl", |
|
|
device: torch.device = torch.device("cuda") |
|
|
): |
|
|
""" |
|
|
Initializes the pipeline and loads the MoGe model. |
|
|
|
|
|
Args: |
|
|
model_name (str): Path or name of the pre-trained MoGe model. |
|
|
device (torch.device): Device to load the model onto (e.g., 'cuda'). |
|
|
""" |
|
|
self.device = device |
|
|
self.model = MoGeModelV2.from_pretrained(model_name).to(device) |
|
|
|
|
|
def infer(self, input_image: np.ndarray) -> float: |
|
|
""" |
|
|
Performs inference to estimate the horizontal FoV from an image. |
|
|
|
|
|
Args: |
|
|
input_image (np.ndarray): The input image (H, W, 3) in BGR format. |
|
|
|
|
|
Returns: |
|
|
float: The estimated horizontal FoV in degrees. |
|
|
""" |
|
|
|
|
|
input_image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB) |
|
|
input_tensor = torch.tensor( |
|
|
input_image_rgb / 255.0, |
|
|
dtype=torch.float32, |
|
|
device=self.device |
|
|
).permute(2, 0, 1) |
|
|
|
|
|
|
|
|
output = self.model.infer(input_tensor, resolution_level=1) |
|
|
|
|
|
intrinsics = output['intrinsics'].cpu().numpy() |
|
|
|
|
|
|
|
|
fov_x_rad = 2 * np.arctan(intrinsics[0, 2] / intrinsics[0, 0]) |
|
|
fov_x_deg = np.rad2deg(fov_x_rad) |
|
|
|
|
|
return fov_x_deg |