UniAD2.0_R101_nuScenes / config /base_track_map.py
Zizizi-hao's picture
UniAD V2.0 training config file
39bf596 verified
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
dataset_type = 'NuScenesE2EDataset'
data_root = 'data/nuscenes/'
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=True)
file_client_args = dict(backend='disk')
train_pipeline = [
dict(
type='LoadMultiViewImageFromFilesInCeph',
to_float32=True,
file_client_args=dict(backend='disk'),
img_root=''),
dict(type='PhotoMetricDistortionMultiViewImage'),
dict(
type='LoadAnnotations3D_E2E',
with_bbox_3d=True,
with_label_3d=True,
with_attr_label=False,
with_future_anns=True,
with_ins_inds_3d=True,
ins_inds_add_1=True),
dict(
type='GenerateOccFlowLabels',
grid_conf=dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0]),
ignore_index=255,
only_vehicle=True,
filter_invisible=False),
dict(
type='ObjectRangeFilterTrack',
point_cloud_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]),
dict(
type='ObjectNameFilterTrack',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]),
dict(
type='NormalizeMultiviewImage',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='PadMultiViewImage', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]),
dict(
type='CustomCollect3D',
keys=[
'gt_bboxes_3d', 'gt_labels_3d', 'gt_inds', 'img', 'timestamp',
'l2g_r_mat', 'l2g_t', 'gt_fut_traj', 'gt_fut_traj_mask',
'gt_past_traj', 'gt_past_traj_mask', 'gt_sdc_bbox', 'gt_sdc_label',
'gt_sdc_fut_traj', 'gt_sdc_fut_traj_mask', 'gt_lane_labels',
'gt_lane_bboxes', 'gt_lane_masks', 'gt_segmentation',
'gt_instance', 'gt_centerness', 'gt_offset', 'gt_flow',
'gt_backward_flow', 'gt_occ_has_invalid_frame',
'gt_occ_img_is_valid', 'gt_future_boxes', 'gt_future_labels',
'sdc_planning', 'sdc_planning_mask', 'command'
])
]
test_pipeline = [
dict(
type='LoadMultiViewImageFromFilesInCeph',
to_float32=True,
file_client_args=dict(backend='disk'),
img_root=''),
dict(
type='NormalizeMultiviewImage',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='PadMultiViewImage', size_divisor=32),
dict(
type='LoadAnnotations3D_E2E',
with_bbox_3d=False,
with_label_3d=False,
with_attr_label=False,
with_future_anns=True,
with_ins_inds_3d=False,
ins_inds_add_1=True),
dict(
type='GenerateOccFlowLabels',
grid_conf=dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0]),
ignore_index=255,
only_vehicle=True,
filter_invisible=False),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1600, 900),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
],
with_label=False),
dict(
type='CustomCollect3D',
keys=[
'img', 'timestamp', 'l2g_r_mat', 'l2g_t', 'gt_lane_labels',
'gt_lane_bboxes', 'gt_lane_masks', 'gt_segmentation',
'gt_instance', 'gt_centerness', 'gt_offset', 'gt_flow',
'gt_backward_flow', 'gt_occ_has_invalid_frame',
'gt_occ_img_is_valid', 'sdc_planning', 'sdc_planning_mask',
'command'
])
])
]
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=dict(backend='disk')),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=dict(backend='disk')),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
],
with_label=False),
dict(type='Collect3D', keys=['points'])
]
data = dict(
samples_per_gpu=1,
workers_per_gpu=8,
train=dict(
type='NuScenesE2EDataset',
data_root='data/nuscenes/',
ann_file='data/infos/nuscenes_infos_temporal_train.pkl',
pipeline=[
dict(
type='LoadMultiViewImageFromFilesInCeph',
to_float32=True,
file_client_args=dict(backend='disk'),
img_root=''),
dict(type='PhotoMetricDistortionMultiViewImage'),
dict(
type='LoadAnnotations3D_E2E',
with_bbox_3d=True,
with_label_3d=True,
with_attr_label=False,
with_future_anns=True,
with_ins_inds_3d=True,
ins_inds_add_1=True),
dict(
type='GenerateOccFlowLabels',
grid_conf=dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0]),
ignore_index=255,
only_vehicle=True,
filter_invisible=False),
dict(
type='ObjectRangeFilterTrack',
point_cloud_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]),
dict(
type='ObjectNameFilterTrack',
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
]),
dict(
type='NormalizeMultiviewImage',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='PadMultiViewImage', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone'
]),
dict(
type='CustomCollect3D',
keys=[
'gt_bboxes_3d', 'gt_labels_3d', 'gt_inds', 'img',
'timestamp', 'l2g_r_mat', 'l2g_t', 'gt_fut_traj',
'gt_fut_traj_mask', 'gt_past_traj', 'gt_past_traj_mask',
'gt_sdc_bbox', 'gt_sdc_label', 'gt_sdc_fut_traj',
'gt_sdc_fut_traj_mask', 'gt_lane_labels', 'gt_lane_bboxes',
'gt_lane_masks', 'gt_segmentation', 'gt_instance',
'gt_centerness', 'gt_offset', 'gt_flow',
'gt_backward_flow', 'gt_occ_has_invalid_frame',
'gt_occ_img_is_valid', 'gt_future_boxes',
'gt_future_labels', 'sdc_planning', 'sdc_planning_mask',
'command'
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=True),
test_mode=False,
box_type_3d='LiDAR',
file_client_args=dict(backend='disk'),
use_valid_flag=True,
patch_size=[102.4, 102.4],
canvas_size=(200, 200),
bev_size=(200, 200),
queue_length=5,
predict_steps=12,
past_steps=4,
fut_steps=4,
use_nonlinear_optimizer=True,
occ_receptive_field=3,
occ_n_future=6,
occ_filter_invalid_sample=False),
val=dict(
type='NuScenesE2EDataset',
data_root='data/nuscenes/',
ann_file='data/infos/nuscenes_infos_temporal_val.pkl',
pipeline=[
dict(
type='LoadMultiViewImageFromFilesInCeph',
to_float32=True,
file_client_args=dict(backend='disk'),
img_root=''),
dict(
type='NormalizeMultiviewImage',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='PadMultiViewImage', size_divisor=32),
dict(
type='LoadAnnotations3D_E2E',
with_bbox_3d=False,
with_label_3d=False,
with_attr_label=False,
with_future_anns=True,
with_ins_inds_3d=False,
ins_inds_add_1=True),
dict(
type='GenerateOccFlowLabels',
grid_conf=dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0]),
ignore_index=255,
only_vehicle=True,
filter_invisible=False),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1600, 900),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(
type='CustomCollect3D',
keys=[
'img', 'timestamp', 'l2g_r_mat', 'l2g_t',
'gt_lane_labels', 'gt_lane_bboxes',
'gt_lane_masks', 'gt_segmentation', 'gt_instance',
'gt_centerness', 'gt_offset', 'gt_flow',
'gt_backward_flow', 'gt_occ_has_invalid_frame',
'gt_occ_img_is_valid', 'sdc_planning',
'sdc_planning_mask', 'command'
])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=True),
test_mode=True,
box_type_3d='LiDAR',
file_client_args=dict(backend='disk'),
patch_size=[102.4, 102.4],
canvas_size=(200, 200),
bev_size=(200, 200),
predict_steps=12,
past_steps=4,
fut_steps=4,
use_nonlinear_optimizer=True,
samples_per_gpu=1,
eval_mod=['det', 'track', 'map'],
occ_receptive_field=3,
occ_n_future=6,
occ_filter_invalid_sample=False),
test=dict(
type='NuScenesE2EDataset',
data_root='data/nuscenes/',
ann_file='data/infos/nuscenes_infos_temporal_val.pkl',
pipeline=[
dict(
type='LoadMultiViewImageFromFilesInCeph',
to_float32=True,
file_client_args=dict(backend='disk'),
img_root=''),
dict(
type='NormalizeMultiviewImage',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='PadMultiViewImage', size_divisor=32),
dict(
type='LoadAnnotations3D_E2E',
with_bbox_3d=False,
with_label_3d=False,
with_attr_label=False,
with_future_anns=True,
with_ins_inds_3d=False,
ins_inds_add_1=True),
dict(
type='GenerateOccFlowLabels',
grid_conf=dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0]),
ignore_index=255,
only_vehicle=True,
filter_invisible=False),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1600, 900),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(
type='CustomCollect3D',
keys=[
'img', 'timestamp', 'l2g_r_mat', 'l2g_t',
'gt_lane_labels', 'gt_lane_bboxes',
'gt_lane_masks', 'gt_segmentation', 'gt_instance',
'gt_centerness', 'gt_offset', 'gt_flow',
'gt_backward_flow', 'gt_occ_has_invalid_frame',
'gt_occ_img_is_valid', 'sdc_planning',
'sdc_planning_mask', 'command'
])
])
],
classes=[
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
],
modality=dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=True),
test_mode=True,
box_type_3d='LiDAR',
file_client_args=dict(backend='disk'),
patch_size=[102.4, 102.4],
canvas_size=(200, 200),
bev_size=(200, 200),
predict_steps=12,
past_steps=4,
fut_steps=4,
occ_n_future=6,
use_nonlinear_optimizer=True,
eval_mod=['det', 'map', 'track']),
shuffler_sampler=dict(type='DistributedGroupSampler'),
nonshuffler_sampler=dict(type='DistributedSampler'))
evaluation = dict(
interval=6,
pipeline=[
dict(
type='LoadMultiViewImageFromFilesInCeph',
to_float32=True,
file_client_args=dict(backend='disk'),
img_root=''),
dict(
type='NormalizeMultiviewImage',
mean=[103.53, 116.28, 123.675],
std=[1.0, 1.0, 1.0],
to_rgb=False),
dict(type='PadMultiViewImage', size_divisor=32),
dict(
type='LoadAnnotations3D_E2E',
with_bbox_3d=False,
with_label_3d=False,
with_attr_label=False,
with_future_anns=True,
with_ins_inds_3d=False,
ins_inds_add_1=True),
dict(
type='GenerateOccFlowLabels',
grid_conf=dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0]),
ignore_index=255,
only_vehicle=True,
filter_invisible=False),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1600, 900),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=[
'car', 'truck', 'construction_vehicle', 'bus',
'trailer', 'barrier', 'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone'
],
with_label=False),
dict(
type='CustomCollect3D',
keys=[
'img', 'timestamp', 'l2g_r_mat', 'l2g_t',
'gt_lane_labels', 'gt_lane_bboxes', 'gt_lane_masks',
'gt_segmentation', 'gt_instance', 'gt_centerness',
'gt_offset', 'gt_flow', 'gt_backward_flow',
'gt_occ_has_invalid_frame', 'gt_occ_img_is_valid',
'sdc_planning', 'sdc_planning_mask', 'command'
])
])
],
planning_evaluation_strategy='uniad')
checkpoint_config = dict(interval=1)
log_config = dict(
interval=10,
hooks=[dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = 'projects/work_dirs/stage1_track_map/base_track_map/'
load_from = 'ckpts/bevformer_r101_dcn_24ep.pth'
resume_from = None
workflow = [('train', 1)]
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
voxel_size = [0.2, 0.2, 8]
patch_size = [102.4, 102.4]
img_norm_cfg = dict(
mean=[103.53, 116.28, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
_dim_ = 256
_pos_dim_ = 128
_ffn_dim_ = 512
_num_levels_ = 4
bev_h_ = 200
bev_w_ = 200
_feed_dim_ = 512
_dim_half_ = 128
canvas_size = (200, 200)
queue_length = 5
predict_steps = 12
predict_modes = 6
fut_steps = 4
past_steps = 4
use_nonlinear_optimizer = True
occ_n_future = 4
occ_n_future_plan = 6
occ_n_future_max = 6
planning_steps = 6
use_col_optim = True
planning_evaluation_strategy = 'uniad'
occflow_grid_conf = dict(
xbound=[-50.0, 50.0, 0.5],
ybound=[-50.0, 50.0, 0.5],
zbound=[-10.0, 10.0, 20.0])
train_gt_iou_threshold = 0.3
model = dict(
type='UniAD',
gt_iou_threshold=0.3,
queue_length=5,
use_grid_mask=True,
video_test_mode=True,
num_query=900,
num_classes=10,
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
img_backbone=dict(
type='ResNet',
depth=101,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=4,
norm_cfg=dict(type='BN2d', requires_grad=False),
norm_eval=True,
style='caffe',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
img_neck=dict(
type='FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
start_level=0,
add_extra_convs='on_output',
num_outs=4,
relu_before_extra_convs=True),
freeze_img_backbone=True,
freeze_img_neck=False,
freeze_bn=False,
score_thresh=0.4,
filter_score_thresh=0.35,
qim_args=dict(
qim_type='QIMBase',
merger_dropout=0,
update_query_pos=True,
fp_ratio=0.3,
random_drop=0.1),
mem_args=dict(
memory_bank_type='MemoryBank',
memory_bank_score_thresh=0.0,
memory_bank_len=4),
loss_cfg=dict(
type='ClipMatcher',
num_classes=10,
weight_dict=None,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
assigner=dict(
type='HungarianAssigner3DTrack',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
loss_past_traj_weight=0.0),
pts_bbox_head=dict(
type='BEVFormerTrackHead',
bev_h=200,
bev_w=200,
num_query=900,
num_classes=10,
in_channels=256,
sync_cls_avg_factor=True,
with_box_refine=True,
as_two_stage=False,
past_steps=4,
fut_steps=4,
transformer=dict(
type='PerceptionTransformer',
rotate_prev_bev=True,
use_shift=True,
use_can_bus=True,
embed_dims=256,
encoder=dict(
type='BEVFormerEncoder',
num_layers=6,
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
num_points_in_pillar=4,
return_intermediate=False,
transformerlayers=dict(
type='BEVFormerLayer',
attn_cfgs=[
dict(
type='TemporalSelfAttention',
embed_dims=256,
num_levels=1),
dict(
type='SpatialCrossAttention',
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
deformable_attention=dict(
type='MSDeformableAttention3D',
embed_dims=256,
num_points=8,
num_levels=4),
embed_dims=256)
],
feedforward_channels=512,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm'))),
decoder=dict(
type='DetectionTransformerDecoder',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(
type='CustomMSDeformableAttention',
embed_dims=256,
num_levels=1)
],
feedforward_channels=512,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm')))),
bbox_coder=dict(
type='NMSFreeCoder',
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
max_num=300,
voxel_size=[0.2, 0.2, 8],
num_classes=10),
positional_encoding=dict(
type='LearnedPositionalEncoding',
num_feats=128,
row_num_embed=200,
col_num_embed=200),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=0.25),
loss_iou=dict(type='GIoULoss', loss_weight=0.0)),
seg_head=dict(
type='PansegformerHead',
bev_h=200,
bev_w=200,
canvas_size=(200, 200),
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
num_query=300,
num_classes=4,
num_things_classes=3,
num_stuff_classes=1,
in_channels=2048,
sync_cls_avg_factor=True,
as_two_stage=False,
with_box_refine=True,
transformer=dict(
type='SegDeformableTransformer',
encoder=dict(
type='DetrTransformerEncoder',
num_layers=6,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=dict(
type='MultiScaleDeformableAttention',
embed_dims=256,
num_levels=4),
feedforward_channels=512,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
decoder=dict(
type='DeformableDetrTransformerDecoder',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(
type='MultiScaleDeformableAttention',
embed_dims=256,
num_levels=4)
],
feedforward_channels=512,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm')))),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=128,
normalize=True,
offset=-0.5),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=5.0),
loss_iou=dict(type='GIoULoss', loss_weight=2.0),
loss_mask=dict(type='DiceLoss', loss_weight=2.0),
thing_transformer_head=dict(
type='SegMaskHead', d_model=256, nhead=8, num_decoder_layers=4),
stuff_transformer_head=dict(
type='SegMaskHead',
d_model=256,
nhead=8,
num_decoder_layers=6,
self_attn=True),
train_cfg=dict(
assigner=dict(
type='HungarianAssigner',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(
type='BBoxL1Cost', weight=5.0, box_format='xywh'),
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0)),
assigner_with_mask=dict(
type='HungarianAssigner_multi_info',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(
type='BBoxL1Cost', weight=5.0, box_format='xywh'),
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0),
mask_cost=dict(type='DiceCost', weight=2.0)),
sampler=dict(type='PseudoSampler'),
sampler_with_mask=dict(type='PseudoSampler_segformer'))),
train_cfg=dict(
pts=dict(
grid_size=[512, 512, 1],
voxel_size=[0.2, 0.2, 8],
point_cloud_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0],
out_size_factor=4,
assigner=dict(
type='HungarianAssigner3D',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
iou_cost=dict(type='IoUCost', weight=0.0),
pc_range=[-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]))))
info_root = 'data/infos/'
ann_file_train = 'data/infos/nuscenes_infos_temporal_train.pkl'
ann_file_val = 'data/infos/nuscenes_infos_temporal_val.pkl'
ann_file_test = 'data/infos/nuscenes_infos_temporal_val.pkl'
optimizer = dict(
type='AdamW',
lr=0.0002,
paramwise_cfg=dict(custom_keys=dict(img_backbone=dict(lr_mult=0.1))),
weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.3333333333333333,
min_lr_ratio=0.001)
total_epochs = 6
runner = dict(type='EpochBasedRunner', max_epochs=6)
find_unused_parameters = True
logger_name = 'mmdet'
gpu_ids = range(0, 1)