Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import copy | |
| from typing import List, Tuple | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from mmcv.ops import nms | |
| from mmengine.structures import InstanceData | |
| from torch import Tensor | |
| from mmdet.registry import MODELS | |
| from mmdet.utils import ConfigType, InstanceList, MultiConfig, OptInstanceList | |
| from .guided_anchor_head import GuidedAnchorHead | |
| class GARPNHead(GuidedAnchorHead): | |
| """Guided-Anchor-based RPN head.""" | |
| def __init__(self, | |
| in_channels: int, | |
| num_classes: int = 1, | |
| init_cfg: MultiConfig = dict( | |
| type='Normal', | |
| layer='Conv2d', | |
| std=0.01, | |
| override=dict( | |
| type='Normal', | |
| name='conv_loc', | |
| std=0.01, | |
| bias_prob=0.01)), | |
| **kwargs) -> None: | |
| super().__init__( | |
| num_classes=num_classes, | |
| in_channels=in_channels, | |
| init_cfg=init_cfg, | |
| **kwargs) | |
| def _init_layers(self) -> None: | |
| """Initialize layers of the head.""" | |
| self.rpn_conv = nn.Conv2d( | |
| self.in_channels, self.feat_channels, 3, padding=1) | |
| super(GARPNHead, self)._init_layers() | |
| def forward_single(self, x: Tensor) -> Tuple[Tensor]: | |
| """Forward feature of a single scale level.""" | |
| x = self.rpn_conv(x) | |
| x = F.relu(x, inplace=True) | |
| (cls_score, bbox_pred, shape_pred, | |
| loc_pred) = super().forward_single(x) | |
| return cls_score, bbox_pred, shape_pred, loc_pred | |
| def loss_by_feat( | |
| self, | |
| cls_scores: List[Tensor], | |
| bbox_preds: List[Tensor], | |
| shape_preds: List[Tensor], | |
| loc_preds: List[Tensor], | |
| batch_gt_instances: InstanceList, | |
| batch_img_metas: List[dict], | |
| batch_gt_instances_ignore: OptInstanceList = None) -> dict: | |
| """Calculate the loss based on the features extracted by the detection | |
| head. | |
| Args: | |
| cls_scores (list[Tensor]): Box scores for each scale level | |
| has shape (N, num_anchors * num_classes, H, W). | |
| bbox_preds (list[Tensor]): Box energies / deltas for each scale | |
| level with shape (N, num_anchors * 4, H, W). | |
| shape_preds (list[Tensor]): shape predictions for each scale | |
| level with shape (N, 1, H, W). | |
| loc_preds (list[Tensor]): location predictions for each scale | |
| level with shape (N, num_anchors * 2, H, W). | |
| batch_gt_instances (list[:obj:`InstanceData`]): Batch of | |
| gt_instance. It usually includes ``bboxes`` and ``labels`` | |
| attributes. | |
| batch_img_metas (list[dict]): Meta information of each image, e.g., | |
| image size, scaling factor, etc. | |
| batch_gt_instances_ignore (list[:obj:`InstanceData`], optional): | |
| Batch of gt_instances_ignore. It includes ``bboxes`` attribute | |
| data that is ignored during training and testing. | |
| Defaults to None. | |
| Returns: | |
| dict: A dictionary of loss components. | |
| """ | |
| losses = super().loss_by_feat( | |
| cls_scores, | |
| bbox_preds, | |
| shape_preds, | |
| loc_preds, | |
| batch_gt_instances, | |
| batch_img_metas, | |
| batch_gt_instances_ignore=batch_gt_instances_ignore) | |
| return dict( | |
| loss_rpn_cls=losses['loss_cls'], | |
| loss_rpn_bbox=losses['loss_bbox'], | |
| loss_anchor_shape=losses['loss_shape'], | |
| loss_anchor_loc=losses['loss_loc']) | |
| def _predict_by_feat_single(self, | |
| cls_scores: List[Tensor], | |
| bbox_preds: List[Tensor], | |
| mlvl_anchors: List[Tensor], | |
| mlvl_masks: List[Tensor], | |
| img_meta: dict, | |
| cfg: ConfigType, | |
| rescale: bool = False) -> InstanceData: | |
| """Transform a single image's features extracted from the head into | |
| bbox results. | |
| Args: | |
| cls_scores (list[Tensor]): Box scores from all scale | |
| levels of a single image, each item has shape | |
| (num_priors * num_classes, H, W). | |
| bbox_preds (list[Tensor]): Box energies / deltas from | |
| all scale levels of a single image, each item has shape | |
| (num_priors * 4, H, W). | |
| mlvl_anchors (list[Tensor]): Each element in the list is | |
| the anchors of a single level in feature pyramid. it has | |
| shape (num_priors, 4). | |
| mlvl_masks (list[Tensor]): Each element in the list is location | |
| masks of a single level. | |
| img_meta (dict): Image meta info. | |
| cfg (:obj:`ConfigDict` or dict): Test / postprocessing | |
| configuration, if None, test_cfg would be used. | |
| rescale (bool): If True, return boxes in original image space. | |
| Defaults to False. | |
| Returns: | |
| :obj:`InstanceData`: Detection results of each image | |
| after the post process. | |
| Each item usually contains following keys. | |
| - scores (Tensor): Classification scores, has a shape | |
| (num_instance, ) | |
| - labels (Tensor): Labels of bboxes, has a shape (num_instances, ). | |
| - bboxes (Tensor): Has a shape (num_instances, 4), the last | |
| dimension 4 arrange as (x1, y1, x2, y2). | |
| """ | |
| cfg = self.test_cfg if cfg is None else cfg | |
| cfg = copy.deepcopy(cfg) | |
| assert cfg.nms.get('type', 'nms') == 'nms', 'GARPNHead only support ' \ | |
| 'naive nms.' | |
| mlvl_proposals = [] | |
| for idx in range(len(cls_scores)): | |
| rpn_cls_score = cls_scores[idx] | |
| rpn_bbox_pred = bbox_preds[idx] | |
| anchors = mlvl_anchors[idx] | |
| mask = mlvl_masks[idx] | |
| assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] | |
| # if no location is kept, end. | |
| if mask.sum() == 0: | |
| continue | |
| rpn_cls_score = rpn_cls_score.permute(1, 2, 0) | |
| if self.use_sigmoid_cls: | |
| rpn_cls_score = rpn_cls_score.reshape(-1) | |
| scores = rpn_cls_score.sigmoid() | |
| else: | |
| rpn_cls_score = rpn_cls_score.reshape(-1, 2) | |
| # remind that we set FG labels to [0, num_class-1] | |
| # since mmdet v2.0 | |
| # BG cat_id: num_class | |
| scores = rpn_cls_score.softmax(dim=1)[:, :-1] | |
| # filter scores, bbox_pred w.r.t. mask. | |
| # anchors are filtered in get_anchors() beforehand. | |
| scores = scores[mask] | |
| rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, | |
| 4)[mask, :] | |
| if scores.dim() == 0: | |
| rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0) | |
| anchors = anchors.unsqueeze(0) | |
| scores = scores.unsqueeze(0) | |
| # filter anchors, bbox_pred, scores w.r.t. scores | |
| if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: | |
| _, topk_inds = scores.topk(cfg.nms_pre) | |
| rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] | |
| anchors = anchors[topk_inds, :] | |
| scores = scores[topk_inds] | |
| # get proposals w.r.t. anchors and rpn_bbox_pred | |
| proposals = self.bbox_coder.decode( | |
| anchors, rpn_bbox_pred, max_shape=img_meta['img_shape']) | |
| # filter out too small bboxes | |
| if cfg.min_bbox_size >= 0: | |
| w = proposals[:, 2] - proposals[:, 0] | |
| h = proposals[:, 3] - proposals[:, 1] | |
| valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size) | |
| if not valid_mask.all(): | |
| proposals = proposals[valid_mask] | |
| scores = scores[valid_mask] | |
| # NMS in current level | |
| proposals, _ = nms(proposals, scores, cfg.nms.iou_threshold) | |
| proposals = proposals[:cfg.nms_post, :] | |
| mlvl_proposals.append(proposals) | |
| proposals = torch.cat(mlvl_proposals, 0) | |
| if cfg.get('nms_across_levels', False): | |
| # NMS across multi levels | |
| proposals, _ = nms(proposals[:, :4], proposals[:, -1], | |
| cfg.nms.iou_threshold) | |
| proposals = proposals[:cfg.max_per_img, :] | |
| else: | |
| scores = proposals[:, 4] | |
| num = min(cfg.max_per_img, proposals.shape[0]) | |
| _, topk_inds = scores.topk(num) | |
| proposals = proposals[topk_inds, :] | |
| bboxes = proposals[:, :-1] | |
| scores = proposals[:, -1] | |
| if rescale: | |
| assert img_meta.get('scale_factor') is not None | |
| bboxes /= bboxes.new_tensor(img_meta['scale_factor']).repeat( | |
| (1, 2)) | |
| results = InstanceData() | |
| results.bboxes = bboxes | |
| results.scores = scores | |
| results.labels = scores.new_zeros(scores.size(0), dtype=torch.long) | |
| return results | |