from ultralytics import YOLO import cv2 import numpy as np model = YOLO("boxes.pt") cap = cv2.VideoCapture("test.mp4") pts_src = np.array([[0, 129], [1275, 303], [1274, 601], [3, 294]], dtype=np.float32) width = int(np.linalg.norm(pts_src[0] - pts_src[1])) height = int(np.linalg.norm(pts_src[0] - pts_src[3])) pts_dst = np.array([[0, 0], [width, 0], [width, height], [0, height]], dtype=np.float32) M = cv2.getPerspectiveTransform(pts_src, pts_dst) M_inv = cv2.getPerspectiveTransform(pts_dst, pts_src) print(f"📍 Fixed tilted ROI defined ({width}x{height})") while cap.isOpened(): success, frame = cap.read() if not success: break cv2.polylines(frame, [pts_src.astype(int)], True, (255, 0, 0), 2) roi_frame = cv2.warpPerspective(frame, M, (width, height)) results = model(roi_frame, imgsz=640, conf=0.05, verbose=False) boxes = results[0].boxes class_names = model.names for box, cls_id in zip(boxes.xyxy, boxes.cls): class_name = class_names[int(cls_id)] if class_name == "box": x1, y1, x2, y2 = map(int, box[:4]) pts_box = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32) pts_box_transformed = cv2.perspectiveTransform(np.array([pts_box]), M_inv)[0].astype(int) cv2.polylines(frame, [pts_box_transformed], True, (0, 255, 0), 1) label = "BOX" (text_w, text_h), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 1) x_text, y_text = pts_box_transformed[0] cv2.rectangle(frame, (x_text, y_text - text_h - 8), (x_text + text_w + 5, y_text), (0, 255, 0), -1) cv2.putText(frame, label, (x_text + 2, y_text - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 1, cv2.LINE_AA) frame_small = cv2.resize(frame, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_LINEAR) cv2.imshow("Logistics Counter (Fixed Tilted ROI)", frame_small) if cv2.waitKey(1) & 0xFF == ord("q"): break cap.release() cv2.destroyAllWindows()