def Task(self): p = super(StarNetPedCycModel0704, self).Task() p.train.learning_rate = 7e-4 builder = starnet.Builder() builder.linear_params_init = py_utils.WeightInit.KaimingUniformFanInRelu( ) gin_layer_sizes = [32, 256, 512, 256, 256, 128] num_laser_features = 1 gin_layers = [ # Each layer should expect as input - 2 * dims of the last layer's # output. We assume a middle layer that's the size of 2 * dim_out. [dim_in * 2, dim_out * 2, dim_out] for (dim_in, dim_out) in zip(gin_layer_sizes[:-1], gin_layer_sizes[1:]) ] p.cell_feature_dims = sum(gin_layer_sizes) # Disable BN on first layer p.cell_featurizer = builder.GINFeaturizerV2('feat', gin_layer_sizes[0], gin_layers, num_laser_features, fc_use_bn=False) p.anchor_projected_feature_dims = 512 class_name_to_idx = kitti_input_generator.KITTILabelExtractor.KITTI_CLASS_NAMES num_classes = len(class_name_to_idx) p.per_class_loss_weight = [0.] * num_classes p.per_class_loss_weight[class_name_to_idx.index('Pedestrian')] = 3.5 p.per_class_loss_weight[class_name_to_idx.index('Cyclist')] = 3.25 p.focal_loss_alpha = 0.9 p.focal_loss_gamma = 1.25 p.use_oriented_per_class_nms = True p.max_nms_boxes = 1024 p.nms_iou_threshold = [0.0] * num_classes p.nms_iou_threshold[class_name_to_idx.index('Cyclist')] = 0.49 p.nms_iou_threshold[class_name_to_idx.index('Pedestrian')] = 0.32 p.nms_score_threshold = [1.0] * num_classes p.nms_score_threshold[class_name_to_idx.index('Cyclist')] = 0.11 p.nms_score_threshold[class_name_to_idx.index('Pedestrian')] = 0.23 p.output_decoder.filter_predictions_outside_frustum = True p.output_decoder.truncation_threshold = 0.65 # Equally weight pedestrian and cyclist moderate classes. p.output_decoder.ap_metric.metric_weights = { 'easy': np.array([0.0, 0.0, 0.0]), 'moderate': np.array([0.0, 1.0, 1.0]), 'hard': np.array([0.0, 0.0, 0.0]) } return p
def Task(self): p = super(StarNetCarModel0701, self).Task() # Builder configuration. builder = starnet.Builder() builder.linear_params_init = py_utils.WeightInit.KaimingUniformFanInRelu( ) gin_layer_sizes = [32, 256, 512, 256, 256, 128] num_laser_features = 1 gin_layers = [ # Each layer should expect as input - 2 * dims of the last layer's # output. We assume a middle layer that's the size of 2 * dim_out. [dim_in * 2, dim_out * 2, dim_out] for (dim_in, dim_out) in zip(gin_layer_sizes[:-1], gin_layer_sizes[1:]) ] p.cell_feature_dims = sum(gin_layer_sizes) p.cell_featurizer = builder.GINFeaturizerV2( name='feat', fc_dims=gin_layer_sizes[0], mlp_dims=gin_layers, num_laser_features=num_laser_features, fc_use_bn=False) p.anchor_projected_feature_dims = 512 # Loss and training params p.train.learning_rate = 0.001 / 2. # Divide by batch size. p.focal_loss_alpha = 0.2 p.focal_loss_gamma = 3.0 class_name_to_idx = kitti_input_generator.KITTILabelExtractor.KITTI_CLASS_NAMES num_classes = len(class_name_to_idx) p.per_class_loss_weight = [0.] * num_classes p.per_class_loss_weight[class_name_to_idx.index('Car')] = 1. # Decoding / NMS params. p.use_oriented_per_class_nms = True p.max_nms_boxes = 512 p.nms_iou_threshold = [0.0] * num_classes p.nms_iou_threshold[class_name_to_idx.index('Car')] = 0.0831011 p.nms_score_threshold = [1.0] * num_classes p.nms_score_threshold[class_name_to_idx.index('Car')] = 0.321310 p.output_decoder.truncation_threshold = 0.65 p.output_decoder.filter_predictions_outside_frustum = True return p
def Task(self): metadata = waymo_metadata.WaymoMetadata() num_classes = len(metadata.ClassNames()) p = starnet.ModelV2.Params( num_classes, num_anchor_bboxes_offsets=self.NUM_ANCHOR_BBOX_OFFSETS, num_anchor_bboxes_rotations=self.NUM_ANCHOR_BBOX_ROTATIONS, num_anchor_bboxes_dimensions=self.NUM_ANCHOR_BBOX_DIMENSIONS, num_laser_features=3) # Update the Point Cloud Featurizer architecture starnet_builder = starnet.Builder() starnet_builder.linear_params_init = ( py_utils.WeightInit.KaimingUniformFanInRelu()) gin_layers = [[ self.GIN_HIDDEN_DIMS * 2, self.GIN_HIDDEN_DIMS * 4, self.GIN_HIDDEN_DIMS ]] * self.NUM_GIN_LAYERS # pyformat: disable p.cell_featurizer = starnet_builder.GINFeaturizerV2( 'feat', num_laser_features=3, fc_dims=self.GIN_HIDDEN_DIMS, mlp_dims=gin_layers, fc_use_bn=False) p.cell_feature_dims = self.GIN_HIDDEN_DIMS * (self.NUM_GIN_LAYERS + 1) p.output_decoder = waymo_decoder.WaymoOpenDatasetDecoder.Params() p.max_nms_boxes = 512 p.use_oriented_per_class_nms = True # Note: Sub-classes need to set nms_iou_threshold and nms_score_threshold # appropriately. p.nms_iou_threshold = [0.0] * num_classes # TODO(jngiam): 1.1 for untrained classes is needed to avoid an issue # with boxutils error. p.nms_score_threshold = [1.1] * num_classes p.name = 'starnet' tp = p.train tp.optimizer = optimizer.Adam.Params() tp.clip_gradient_norm_to_value = 5 ep = p.eval # Train set uses a smaller decoding set, so we can # safely eval over the entire input. ep.samples_per_summary = 0 # To be tuned. p.train.l2_regularizer_weight = 1e-8 cluster = cluster_factory.Current() train_cluster_p = cluster.params.Copy() train_cluster_p.job = 'trainer_client' train_cluster_p.mode = 'sync' # When running a decoding only job, there are no trainer workers, so we set # worker replicas to 1 as a dummy value. if train_cluster_p.worker.replicas <= 0: train_cluster_p.worker.replicas = 1 # Set learning rate and schedule. with cluster_factory.Cluster(train_cluster_p): train_input_p = self.Train() # Adapted from V1 tuning. tp.ema_decay = 0.99 # TODO(b/148537111): consider setting this to True. tp.ema_decay_moving_vars = False tp.learning_rate = 0.001 lr_util.SetExponentialLR(train_p=tp, train_input_p=train_input_p, exp_start_epoch=5, total_epoch=75) p.dimension_loss_weight = .3 p.location_loss_weight = 3. p.loss_weight_classification = 1. p.loss_weight_localization = 3. p.rotation_loss_weight = 0.3 return p