Exemplo n.º 1
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True

        hidden_size = config.fc_hidden
        target_class = config.num_classes
        # self.resnet = resnet18(num_classes=hidden_size)
        #self.resnet = ResNet(block=BasicBlock, layers=[1, 1, 1, 1], num_classes=hidden_size)
        # self.resnet = ResNet(config.in_channels, 18)
        self.fpn = FPN([256]* 4, 4)

        self.fpn_seq = FPN([128,128,128,70], 4)
        #cnn feature map has a total number of 228 dimensions.
        self.dropout = nn.Dropout(config.dropout)
        self.fc1 = nn.Linear(hidden_size, target_class)
        self.num_classes = config.num_classes
Exemplo n.º 2
0
    def __init__(self, name_scope, is_test=False):
        super(ResNet, self).__init__(name_scope)
        self.conv1 = fluid.dygraph.Conv2D(name_scope + "_conv1",
                                          num_filters=64,
                                          filter_size=7,
                                          stride=2,
                                          padding=3,
                                          dilation=1)
        self.bn1 = fluid.dygraph.BatchNorm(name_scope + "_bn1",
                                           64,
                                           act="relu",
                                           is_test=is_test)
        self.maxPooling = fluid.dygraph.Pool2D(name_scope + "maxpooling",
                                               pool_size=2,
                                               pool_stride=2,
                                               pool_type='max')

        self.block1 = Make_layer(name_scope + "_block1",
                                 64,
                                 layernums=3,
                                 stride=1,
                                 is_test=is_test)
        self.block2 = Make_layer(name_scope + "_block2",
                                 128,
                                 layernums=4,
                                 stride=2,
                                 is_test=is_test)
        self.block3 = Make_layer(name_scope + "_block3",
                                 256,
                                 layernums=4,
                                 stride=2,
                                 is_test=is_test)
        self.block4 = Make_layer(name_scope + "_block4",
                                 256,
                                 layernums=6,
                                 stride=2,
                                 is_test=is_test)
        self.block5 = Make_layer(name_scope + "_block5",
                                 256,
                                 layernums=6,
                                 stride=2,
                                 is_test=is_test)
        self.block6 = Make_layer(name_scope + "_block6",
                                 256,
                                 layernums=4,
                                 stride=2,
                                 is_test=is_test)  # 最后一层的输出是带激活函数的
        self.fpn = FPN(name_scope + "_FPN", is_test=is_test)
    def __build_model(self):
        if self.graph is None:
            self.graph = tf.Graph()
        # self.strategy = tf.distribute.MirroredStrategy()
        # with self.strategy.scope():
        with self.graph.as_default():

                self.is_training = tf.placeholder(tf.bool, shape=(), name='is_training')

                img_size_1 = 370
                img_size_2 = 1224
                c_dim = 3
                self.train_inputs_rgb = tf.placeholder(tf.float32, 
                                                    [None, img_size_1, img_size_2, c_dim], 
                                                    name='train_inputs_rgb')

                if self.params['use_fv']:
                    c_dim = 64
                    self.train_inputs_fv_lidar = tf.placeholder(tf.float32, 
                                                        [None, img_size_1, img_size_2, c_dim], 
                                                        name='train_inputs_fv_lidar')

                img_size_1 = 512
                img_size_2 = 448
                c_dim = 32
                self.train_inputs_lidar = tf.placeholder(tf.float32, 
                                    [None, img_size_1, img_size_2, c_dim], 
                                    name='train_inputs_lidar')
                self.label_weights = tf.placeholder(tf.float32, shape=(None, 128, 112, 2, 1)) # target

                self.y_true = tf.placeholder(tf.float32, shape=(None, 128, 112, 2, 9)) # target

                self.y_true_img = tf.placeholder(tf.float32, shape=(None, 24, 78, 2)) # target

                self.Tr_velo_to_cam = tf.placeholder(tf.float32, shape=(None, 4, 4))
                self.R0_rect = tf.placeholder(tf.float32, shape=(None, 4, 4))
                self.P3 = tf.placeholder(tf.float32, shape=(None, 3, 4))
                self.shift_h =  tf.placeholder(tf.float32, shape=[None, 1])
                self.shift_w = tf.placeholder(tf.float32, shape=[None, 1])

                self.train_fusion_rgb = tf.placeholder(tf.bool, shape=())
                if self.params['use_fv']:
                    self.train_fusion_fv_lidar = tf.placeholder(tf.bool, shape=())

                with tf.variable_scope("image_branch"):
                    self.cnn = ResNetBuilder().build(branch=self.CONST.IMAGE_BRANCH, img_height=370, img_width=1224, img_channels=3)
                    self.cnn.build_model(self.train_inputs_rgb, is_training=self.is_training)
                    with tf.variable_scope("image_head"): 
                        fpn_images = FPN(self.cnn.res_groups, "fpn_rgb", is_training=self.is_training)

                        last_features_layer_image = fpn_images[2]

                        for i in range(self.params['res_blocks_image']):
                            last_features_layer_image = resblock(last_features_layer_image, 192, scope='fpn_res_'+str(i))

                        self.detection_layer = conv(last_features_layer_image, 2, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out')

                self.model_loss_img = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y_true_img, logits=self.detection_layer))
            
                head_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        "image_branch/image_head")

                self.opt_img = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss_img, var_list=head_only_vars)

                img_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        "image_branch")

                self.opt_img_all = tf.train.AdamOptimizer(1e-4).minimize(self.model_loss_img, var_list=img_only_vars)


                self.equality = tf.where(self.y_true_img >= 0.5, tf.equal(tf.cast(tf.sigmoid(self.detection_layer) >= 0.5, tf.float32), self.y_true_img), tf.zeros_like(self.y_true_img, dtype=tf.bool))
                self.accuracy = tf.reduce_sum(tf.cast(self.equality, tf.float32)) / tf.cast(tf.count_nonzero(self.y_true_img), tf.float32)

                if self.params['use_fv']:
                    with tf.variable_scope("lidar_fv_branch"):
                        self.cnn_fv_lidar = ResNetBuilder().build(branch=self.CONST.FV_BRANCH, img_height=370, img_width=1224, img_channels=32)
                        self.cnn_fv_lidar.build_model(self.train_inputs_fv_lidar, is_training=self.is_training)
                        fpn_fv_lidar = FPN(self.cnn_fv_lidar.res_groups, "fpn_fv_lidar")
                        fpn_fv_lidar[2] = conv(fpn_fv_lidar[2], 192, kernel=1, stride=1, scope='post_fv_conv', reuse=False)

                self.debug_layers = {}

                with tf.variable_scope("lidar_branch"):
                    self.cnn_lidar = ResNetBuilder().build(branch=self.CONST.BEV_BRANCH, img_height=512, img_width=448, img_channels=32)
                    self.cnn_lidar.build_model(self.train_inputs_lidar, is_training=self.is_training)
                    x_temp = self.cnn_lidar.train_logits
                    
                if self.params['fusion']:
                    with tf.variable_scope('fusion'):
                        if self.params['use_fv']:
                            att = AttentionFusionLayerFunc(last_features_layer_image, fpn_fv_lidar[2], self.train_fusion_fv_lidar, x_temp, 'attention_fusion_3')
                            self.debug_layers['attention_output'] = att
                            x_new = tf.concat([att, x_temp], axis=-1)
                            with tf.variable_scope('post_fusion_conv'):
                                x_new = conv(x_new, 256, kernel=1, stride=1, scope='atention_fusion_3_post_conv', reuse=False)
                                x_new = batch_norm(x_new, is_training=self.is_training)
                                x_new = relu(x_new)
                                self.debug_layers['attention_module_output'] = x_new
                        else:
                            att = AttentionFusionLayerFunc(last_features_layer_image, None, None, x_temp, 'attention_fusion_3')
                            self.debug_layers['attention_output'] = att
                            x_new = tf.concat([att, x_temp], axis=-1)
                            with tf.variable_scope('post_fusion_conv'):
                                x_new = conv(x_new, 256, kernel=1, stride=1, scope='atention_fusion_3_post_conv', reuse=False)
                                x_new = batch_norm(x_new, is_training=self.is_training)
                                x_new = relu(x_new)
                                self.debug_layers['attention_module_output'] = x_new


                with tf.variable_scope("lidar_branch"):
                    if not self.params['fusion']:
                        x_new = x_temp
                    x_temp = tf.cond(self.train_fusion_rgb, lambda: x_new, lambda: x_temp)

                    self.cnn_lidar.res_groups.append(x_temp)
                    fpn_lidar = FPN(self.cnn_lidar.res_groups[:3], "fpn_lidar", is_training=self.is_training)
                    # fpn_lidar = FPN(self.cnn_lidar.res_groups[:], "fpn_lidar", is_training=self.is_training)

                    self.debug_layers['fpn_lidar'] = fpn_lidar
                    
                    fpn_lidar[0] = maxpool2d(fpn_lidar[0], scope='maxpool_fpn0')
                    fpn_lidar[2] = upsample(fpn_lidar[2], size=(2, 2), scope='fpn_upsample_1', use_deconv=True, kernel_size=4)
                    # fpn_lidar[3] = upsample(fpn_lidar[3], size=(4, 4), scope='fpn_upsample_2', use_deconv=True)

                    fpn_lidar = tf.concat(fpn_lidar[:], 3)
                 
                    self.debug_layers['fpn_lidar_output'] = fpn_lidar

                    num_conv_blocks=4
                    for i in range(num_conv_blocks):
                        fpn_lidar = conv(fpn_lidar, 128, kernel=3, stride=1, padding='SAME', use_bias=True, scope='conv_post_fpn_'+str(i))
                        fpn_lidar = batch_norm(fpn_lidar, scope='bn_post_fpn_' + str(i))
                        fpn_lidar = relu(fpn_lidar)
                        self.debug_layers['fpn_lidar_output_post_conv_'+str(i)] = fpn_lidar

                    
                    if self.params['focal_loss']:
                        final_output_1_7 = conv(fpn_lidar, 8, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1')
                        final_output_2_7 = conv(fpn_lidar, 8, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2')
                        final_output_1_8 = conv(fpn_lidar, 1, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1_8', focal_init=self.params['focal_init'])
                        final_output_2_8 = conv(fpn_lidar, 1, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2_8', focal_init=self.params['focal_init'])

                        final_output_1 = tf.concat([final_output_1_7, final_output_1_8], -1)
                        final_output_2 = tf.concat([final_output_2_7, final_output_2_8], -1)
                    else:
                        final_output_1 = conv(fpn_lidar, 9, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1')
                        final_output_2 = conv(fpn_lidar, 9, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2')
                    

                    final_output_1 = tf.expand_dims(final_output_1, 3)
                    final_output_2 = tf.expand_dims(final_output_2, 3)

                    self.debug_layers['final_layer'] = tf.concat([final_output_1, final_output_2], 3)

                    self.final_output = tf.concat([final_output_1, final_output_2], 3)

                    self.anchors = tf.placeholder(tf.float32, [None, 128, 112, 2, 6])

                    self.use_nms = tf.placeholder(tf.bool, shape=[])
                    self.final_output = tf.cond(self.use_nms, lambda: nms(self.final_output, 0.5), lambda: self.final_output)

                    # self.final_output = adjust_predictions(self.final_output, self.anchors)

                with tf.variable_scope('Loss'):
                        cls_loss_instance = ClsLoss('classification_loss')
                        reg_loss_instance = RegLoss('regression_loss')
                        loss_calculator = LossCalculator()
                        loss_params = {'focal_loss': self.params['focal_loss'], 'weight': self.params['weight_loss'], 'mse': self.params['mse_loss']}
                        self.classification_loss, self.loc_reg_loss, self.dim_reg_loss, self.theta_reg_loss, self.dir_reg_loss = loss_calculator(
                                                            self.y_true,
                                                            self.final_output, 
                                                            cls_loss_instance, 
                                                            reg_loss_instance,
                                                            **loss_params)
                        self.regression_loss = 100 * self.loc_reg_loss + 50 * self.dim_reg_loss + 500 * self.theta_reg_loss + 10*self.dir_reg_loss
                        self.model_loss = 0
                        if self.params['train_cls']:
                            self.model_loss += self.classification_loss
                        if self.params['train_reg']:
                            self.model_loss += self.regression_loss
                     
                self.global_step = tf.Variable(0, name='global_step', trainable=False)

                self.lidar_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        "lidar_branch")
                self.decay_rate = tf.train.exponential_decay(self.params['lr'], self.global_step, self.params['decay_steps'], 
                                                            self.params['decay_rate'], self.params['staircase'])  

                self.learning_rate_placeholder = tf.placeholder(tf.float32, [], name='learning_rate')
                self.opt_lidar = tf.train.AdamOptimizer(self.learning_rate_placeholder)
                self.train_op_lidar = self.opt_lidar.minimize(self.model_loss,\
                                                                            var_list=self.lidar_only_vars,\
                                                                            global_step=self.global_step)
              

                if self.params['fusion']:
                    fusion_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                            "fusion")  
                    # fusion_only_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                    #                         "image_branch/fpn_rgb"))
                    self.train_op_fusion = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss,\
                                                                                var_list=fusion_only_vars,\
                                                                                global_step=self.global_step)
                    if self.params['use_fv']:
                        fv_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                                "lidar_fv_branch") 
                        fv_only_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                                "fv_fusion"))
                        fv_only_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                                "fusion/post_fusion_conv"))
                        self.train_op_fv = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss,\
                                                                                    var_list=fv_only_vars,\
                                                                                    global_step=self.global_step)

                else:
                    self.train_op_fusion = None


                self.train_op = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss, global_step=self.global_step)


                self.saver = tf.train.Saver(max_to_keep=2)

                self.best_saver = tf.train.Saver(max_to_keep=2)

                self.lr_summary = tf.summary.scalar('learning_rate', tf.squeeze(self.decay_rate))
                self.model_loss_batches_summary = tf.summary.scalar('model_loss_batches', self.model_loss)
                self.cls_loss_batches_summary = tf.summary.scalar('classification_loss_batches', self.classification_loss)
                # self.cls_loss_2_batches_summary = tf.summary.scalar('classification_loss_2_batches', self.classification_loss_2)
                self.reg_loss_batches_summary = tf.summary.scalar('regression_loss_batches', self.regression_loss)
                self.loc_reg_loss_batches_summary = tf.summary.scalar('loc_regression_loss_batches', self.loc_reg_loss)
                self.dim_reg_loss_batches_summary = tf.summary.scalar('dim_regression_loss_batches', self.dim_reg_loss)
                self.theta_reg_loss_batches_summary = tf.summary.scalar('theta_regression_loss_batches', self.theta_reg_loss)
                self.dir_reg_loss_batches_summary = tf.summary.scalar('dir_regression_loss_batches', self.dir_reg_loss)
                # self.near_reg_loss_batches_summary = tf.summary.scalar('nearby_regression_loss_batches', self.near_regression_loss)

                self.merged = tf.summary.merge([self.lr_summary, self.model_loss_batches_summary, \
                                            self.cls_loss_batches_summary, self.reg_loss_batches_summary,\
                                            self.loc_reg_loss_batches_summary, self.dim_reg_loss_batches_summary,\
                                            self.theta_reg_loss_batches_summary, self.dir_reg_loss_batches_summary])

                
                self.model_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.model_loss_summary = tf.summary.scalar('model_loss', self.model_loss_placeholder)
                self.cls_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.cls_loss_summary = tf.summary.scalar('classification_loss', self.cls_loss_placeholder)
                self.reg_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.reg_loss_summary = tf.summary.scalar('regression_loss', self.reg_loss_placeholder)

                self.theta_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.theta_loss_summary = tf.summary.scalar('theta_loss', self.theta_loss_placeholder)
                self.dir_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.dir_loss_summary = tf.summary.scalar('dir_loss', self.dir_loss_placeholder)
                self.loc_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.loc_loss_summary = tf.summary.scalar('loc_loss', self.loc_loss_placeholder)
                self.dim_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.dim_loss_summary = tf.summary.scalar('dim_loss', self.dim_loss_placeholder)

                self.lr_summary2 = tf.summary.scalar('lr_ph', self.learning_rate_placeholder)



                self.images_summary_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])
                self.images_summary = tf.summary.image('images', self.images_summary_placeholder)

                self.images_summary_fusion_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3])
                self.images_summary_fusion = tf.summary.image('images_fusion', self.images_summary_fusion_placeholder)

                self.images_summary_segmentation_cars_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, 24, 78, 1])
                self.images_summary_segmentation_cars = tf.summary.image('images_segmantation_cars', self.images_summary_segmentation_cars_placeholder)
                self.images_summary_segmentation_road_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, 24, 78, 1])
                self.images_summary_segmentation_road = tf.summary.image('images_segmentation_road', self.images_summary_segmentation_road_placeholder)

                self.accuracy_image_summary_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.accuracy_image_summary = tf.summary.scalar('accuracy_image', self.accuracy_image_summary_placeholder)
                self.model_loss_image_summary_placeholder = tf.placeholder(dtype=tf.float32, shape=[])
                self.model_loss_image_summary = tf.summary.scalar('model_loss_image', self.model_loss_image_summary_placeholder)

                self.train_writer = tf.summary.FileWriter('./training_files/train', self.graph)
                self.validation_writer = tf.summary.FileWriter('./training_files/test')
Exemplo n.º 4
0
    def __build_model(self):

        self.debug_layers = {}

        if self.graph is None:
            self.graph = tf.Graph()

        with self.graph.as_default():
            self.is_training = tf.placeholder(tf.bool,
                                              shape=(),
                                              name='is_training')

            # self.cls_training = tf.placeholder(tf.bool, shape=(), name='cls_training')
            # self.reg_training = tf.placeholder(tf.bool, shape=(), name='reg_training')

            img_size_1 = 370
            img_size_2 = 1224
            c_dim = 3
            self.train_inputs_rgb = tf.placeholder(
                tf.float32, [None, img_size_1, img_size_2, c_dim],
                name='train_inputs_rgb')

            img_size_1 = 512
            img_size_2 = 448
            c_dim = 41
            self.train_inputs_lidar = tf.placeholder(
                tf.float32, [None, img_size_1, img_size_2, c_dim],
                name='train_inputs_lidar')

            self.y_true = tf.placeholder(tf.float32,
                                         shape=(None, 128, 112, 2,
                                                9))  # target

            self.y_true_img = tf.placeholder(tf.float32,
                                             shape=(None, 24, 78, 2))  # target
            self.train_fusion_rgb = tf.placeholder(tf.bool, shape=())

            with tf.variable_scope("image_branch"):
                self.cnn = ResNetBuilder().build(
                    branch=self.CONST.IMAGE_BRANCH,
                    img_height=370,
                    img_width=1224,
                    img_channels=3)
                self.cnn.build_model(self.train_inputs_rgb,
                                     is_training=self.is_training)

            with tf.variable_scope("lidar_branch"):
                self.cnn_lidar = ResNetBuilder().build(
                    branch=self.CONST.BEV_BRANCH,
                    img_height=512,
                    img_width=448,
                    img_channels=32)
                self.cnn_lidar.build_model(self.train_inputs_lidar,
                                           is_training=self.is_training)

            if self.params['fusion']:
                self.cnn_lidar.res_groups2 = []
                self.cnn.res_groups2 = []
                with tf.variable_scope('fusion'):
                    kernels_lidar = [9, 5, 5]
                    strides_lidar = [5, 3, 3]
                    kernels_rgb = [7, 5, 5]
                    strides_rgb = [4, 3, 3]
                    for i in range(3):

                        att_lidar, att_rgb = AttentionFusionLayerFunc3(
                            self.cnn.res_groups[i],
                            None,
                            None,
                            self.cnn_lidar.res_groups[i],
                            'attention_fusion_' + str(i),
                            kernel_lidar=kernels_lidar[i],
                            kernel_rgb=kernels_rgb[i],
                            stride_lidar=strides_lidar[i],
                            stride_rgb=strides_rgb[i])

                        with tf.variable_scope('cond'):
                            with tf.variable_scope('cond_img'):
                                # self.cnn.res_groups[i] = tf.cond(self.train_fusion_rgb, lambda: att_rgb, lambda: self.cnn.res_groups[i])
                                self.cnn.res_groups2.append(
                                    tf.cond(self.train_fusion_rgb,
                                            lambda: att_rgb,
                                            lambda: self.cnn.res_groups[i]))
                            with tf.variable_scope('cond_lidar'):
                                # self.cnn_lidar.res_groups[i] = tf.cond(self.train_fusion_rgb, lambda: att_lidar, lambda: self.cnn_lidar.res_groups[i])
                                self.cnn_lidar.res_groups2.append(
                                    tf.cond(
                                        self.train_fusion_rgb,
                                        lambda: att_lidar,
                                        lambda: self.cnn_lidar.res_groups[i]))

                            self.debug_layers['attention_output_rgb_' +
                                              str(i)] = att_rgb
                            self.debug_layers['attention_output_lidar_' +
                                              str(i)] = att_lidar
            else:
                self.cnn_lidar.res_groups2 = self.cnn_lidar.res_groups
                self.cnn.res_groups2 = self.cnn.res_groups

            with tf.variable_scope("image_branch"):

                if self.params['fusion']:
                    self.cnn.res_groups2.append(self.cnn.res_groups[3])

                with tf.variable_scope("image_head"):
                    with tf.variable_scope("fpn"):
                        self.fpn_images = FPN(self.cnn.res_groups2,
                                              "fpn_rgb",
                                              is_training=self.is_training)

                    last_features_layer_image = self.fpn_images[2]

                    for i in range(self.params['res_blocks_image']):
                        last_features_layer_image = resblock(
                            last_features_layer_image,
                            192,
                            scope='fpn_res_' + str(i),
                            is_training=self.is_training)

                    self.detection_layer = conv(last_features_layer_image,
                                                2,
                                                kernel=1,
                                                stride=1,
                                                padding='SAME',
                                                use_bias=True,
                                                scope='conv_out')

            with tf.variable_scope("lidar_branch"):
                with tf.variable_scope("fpn"):
                    # fpn_lidar = FPN(self.cnn_lidar.res_groups2[:3], "fpn_lidar", is_training=self.is_training)

                    # self.debug_layers['fpn_lidar'] = fpn_lidar

                    # fpn_lidar[0] = maxpool2d(fpn_lidar[0], scope='maxpool_fpn0')
                    # fpn_lidar[2] = upsample(fpn_lidar[2], size=(2, 2), scope='fpn_upsample_1', use_deconv=True, kernel_size=4)
                    # # fpn_lidar[3] = upsample(fpn_lidar[3], size=(4, 4), scope='fpn_upsample_2', use_deconv=True)

                    # # fpn_lidar[0] = maxpool2d(fpn_lidar[0], scope='maxpool_fpn0')
                    # fpn_lidar[1] = upsample(fpn_lidar[1], size=(2, 2), scope='fpn_upsample_1', use_deconv=True, kernel_size=4)
                    # fpn_lidar[2] = upsample(fpn_lidar[2], size=(4, 4), scope='fpn_upsample_2', use_deconv=True, kernel_size=4)

                    # fpn_lidar = tf.concat(fpn_lidar[:], 3)

                    # self.debug_layers['fpn_lidar_output'] = fpn_lidar

                    # for i in range(1):
                    #     temp = conv(fpn_lidar, 128, kernel=3, stride=1, padding='SAME', use_bias=True, scope='conv_post_fpn_'+str(i))
                    #     temp = batch_norm(temp, is_training=self.is_training, scope='bn_post_fpn_' + str(i))
                    #     temp = relu(temp)
                    #     # temp = dropout(temp, rate=0.1, scope='dropout_post_fpn_0', training=self.is_training)
                    #     fpn_lidar = temp

                    fpn_lidar1 = self.cnn_lidar.train_logits

                    fpn_lidar2 = self.cnn_lidar.train_logits

                    num_conv_blocks = 2
                    for i in range(0, num_conv_blocks):
                        temp = conv(fpn_lidar1,
                                    128,
                                    kernel=3,
                                    stride=1,
                                    padding='SAME',
                                    use_bias=True,
                                    scope='conv_post_fpn_1_' + str(i))
                        temp = batch_norm(temp,
                                          scope='bn_post_fpn_1_' + str(i))
                        temp = relu(temp)
                        # fpn_lidar = fpn_lidar + temp
                        fpn_lidar1 = temp
                        # fpn_lidar = dropout(fpn_lidar, rate=0.3, scope='fpn_lidar_dropout_'+str(i))
                        self.debug_layers['fpn_lidar_output_post_conv_1_' +
                                          str(i)] = fpn_lidar1

                    num_conv_blocks = 2
                    for i in range(0, num_conv_blocks):
                        temp = conv(fpn_lidar2,
                                    128,
                                    kernel=3,
                                    stride=1,
                                    padding='SAME',
                                    use_bias=True,
                                    scope='conv_post_fpn_2_' + str(i))
                        temp = batch_norm(temp,
                                          scope='bn_post_fpn_2_' + str(i))
                        temp = relu(temp)
                        # fpn_lidar = fpn_lidar + temp
                        fpn_lidar2 = temp
                        # fpn_lidar = dropout(fpn_lidar, rate=0.3, scope='fpn_lidar_dropout_'+str(i))
                        self.debug_layers['fpn_lidar_output_post_conv_2_' +
                                          str(i)] = fpn_lidar2

                    if self.params['focal_loss']:
                        final_output_1_7 = conv(fpn_lidar1,
                                                8,
                                                kernel=1,
                                                stride=1,
                                                padding='SAME',
                                                use_bias=True,
                                                scope='conv_out_1')
                        final_output_2_7 = conv(fpn_lidar1,
                                                8,
                                                kernel=1,
                                                stride=1,
                                                padding='SAME',
                                                use_bias=True,
                                                scope='conv_out_2')

                        final_output_1_8 = conv(
                            fpn_lidar2,
                            1,
                            kernel=1,
                            stride=1,
                            padding='SAME',
                            use_bias=True,
                            scope='conv_out_1_8',
                            focal_init=self.params['focal_init'])
                        final_output_2_8 = conv(
                            fpn_lidar2,
                            1,
                            kernel=1,
                            stride=1,
                            padding='SAME',
                            use_bias=True,
                            scope='conv_out_2_8',
                            focal_init=self.params['focal_init'])

                        final_output_1 = tf.concat(
                            [final_output_1_7, final_output_1_8], -1)
                        final_output_2 = tf.concat(
                            [final_output_2_7, final_output_2_8], -1)
                    else:
                        final_output_1 = conv(fpn_lidar,
                                              9,
                                              kernel=1,
                                              stride=1,
                                              padding='SAME',
                                              use_bias=True,
                                              scope='conv_out_1')
                        final_output_2 = conv(fpn_lidar,
                                              9,
                                              kernel=1,
                                              stride=1,
                                              padding='SAME',
                                              use_bias=True,
                                              scope='conv_out_2')

                final_output_1 = tf.expand_dims(final_output_1, 3)
                final_output_2 = tf.expand_dims(final_output_2, 3)

                self.debug_layers['final_layer'] = tf.concat(
                    [final_output_1, final_output_2], 3)

                self.final_output = tf.concat([final_output_1, final_output_2],
                                              3)

                # self.anchors = tf.placeholder(tf.float32, [None, 128, 112, 2, 6])

                # self.use_nms = tf.placeholder(tf.bool, shape=[])
                # self.final_output = tf.cond(self.use_nms, lambda: nms(self.final_output, 0.5), lambda: self.final_output)

                ############################
                #  under lidar_branch scope
                ############################
                with tf.variable_scope("loss_weights"):
                    self.loc_weight = tf.get_variable(
                        'loc_weight',
                        shape=(),
                        initializer=tf.constant_initializer(1),
                        dtype=tf.float32)
                    self.dim_weight = tf.get_variable(
                        'dim_weight',
                        shape=(),
                        initializer=tf.constant_initializer(1),
                        dtype=tf.float32)
                    self.theta_weight = tf.get_variable(
                        'theta_weight',
                        shape=(),
                        initializer=tf.constant_initializer(1),
                        dtype=tf.float32)
                    self.cls_weight = tf.get_variable(
                        'cls_weight',
                        shape=(),
                        initializer=tf.constant_initializer(1),
                        dtype=tf.float32)

            with tf.variable_scope('Loss'):
                cls_loss_instance = ClsLoss('classification_loss')
                reg_loss_instance = RegLoss('regression_loss')
                loss_calculator = LossCalculator()
                loss_params = {
                    'focal_loss': self.params['focal_loss'],
                    'weight': self.params['weight_loss'],
                    'mse': self.params['mse_loss']
                }
                self.classification_loss, self.loc_reg_loss, self.dim_reg_loss,\
                            self.theta_reg_loss, self.dir_reg_loss,\
                            self.precision, self.recall, self.iou, self.iou_loc, self.iou_dim, self.theta_accuracy,\
                            self.recall_pos, self.recall_neg, self.iou_loc_x, self.iou_loc_y, self.iou_loc_z = loss_calculator(
                                                    self.y_true,
                                                    self.final_output,
                                                    cls_loss_instance,
                                                    reg_loss_instance,
                                                    **loss_params)

                # fusion
                # self.regression_loss_fusion = 0.5 * ((1-self.iou_loc)*(1-self.iou)) * tf.exp(-self.loc_weight) * self.loc_reg_loss + self.loc_weight +\
                #              ((1-self.iou_dim)*(1-self.iou)) * tf.exp(-self.dim_weight) * self.dim_reg_loss + self.dim_weight +\
                #               0.1 * ((1-self.theta_accuracy)**2) * tf.exp(-self.theta_weight) * self.theta_reg_loss + self.theta_weight
                #             #   + 10 * self.dir_reg_loss
                # self.model_loss_fusion = 0
                # if self.params['train_cls']:
                #     self.model_loss_fusion += (1-self.precision)*(1-self.recall) * tf.exp(-self.cls_weight) * self.classification_loss + self.cls_weight
                # if self.params['train_reg']:
                #     self.model_loss_fusion += self.regression_loss_fusion

                ## F1 SCORE
                # loc_ratios = np.array([2.4375, 1., 9.375 ])
                # self.iou_loc_weights = self.iou_loc_x * loc_ratios[0]/np.sum(loc_ratios) + self.iou_loc_y * loc_ratios[1]/np.sum(loc_ratios) + self.iou_loc_z * loc_ratios[2]/np.sum(loc_ratios)
                # self.regression_loss = ((1-self.iou_loc_weights)*(1-self.iou)) + \
                #              ((1-self.iou_dim)*(1-self.iou)) +\
                #                100 * self.theta_reg_loss
                #             #    + 10*self.dir_reg_loss
                # self.model_loss = 0
                # if self.params['train_cls']:
                #     self.model_loss += (self.recall_pos) + self.recall_neg
                # if self.params['train_reg']:
                #     self.model_loss += self.regression_loss
                ## end f1 score

                # self.model_loss += tf.cond(self.cls_training, lambda: self.recall_pos + self.recall_neg, lambda: tf.constant(0, dtype=tf.float32))
                # self.model_loss += tf.cond(self.reg_training, lambda: self.regression_loss, lambda: tf.constant(0, dtype=tf.float32))

                # WORKING - BEV

                # self.regression_loss_bev = 0
                # if self.params['train_loc'] == 1:
                #     self.regression_loss_bev += 100 * self.loc_reg_loss
                # if self.params['train_dim'] == 1:
                #     self.regression_loss_bev += 5 * self.dim_reg_loss
                # if self.params['train_theta'] == 1:
                #     self.regression_loss_bev += 1000 * self.theta_reg_loss
                # self.model_loss_bev = 0
                # if self.params['train_cls']:
                #     self.model_loss_bev +=  0.5 * self.classification_loss
                # if self.params['train_reg']:
                #     self.model_loss_bev +=  1 * self.regression_loss_bev
                # if self.params['train_dir'] == 1:
                #     self.model_loss_bev += 0.1 * self.dir_reg_loss

                self.regression_loss_bev = 0
                if self.params['train_loc'] == 1:
                    self.regression_loss_bev += 1000 * (
                        2 - self.iou_loc - self.iou) * self.loc_reg_loss
                if self.params['train_dim'] == 1:
                    self.regression_loss_bev += 100 * (
                        2 - self.iou_dim - self.iou) * self.dim_reg_loss
                if self.params['train_theta'] == 1:
                    self.regression_loss_bev += 1000 * self.theta_reg_loss
                self.model_loss_bev = 0
                if self.params['train_cls']:
                    self.model_loss_bev += 50 * (
                        2 - self.recall -
                        self.precision) * self.classification_loss
                if self.params['train_reg']:
                    self.model_loss_bev += 1 * self.regression_loss_bev
                if self.params['train_dir'] == 1:
                    self.model_loss_bev += 0.1 * self.dir_reg_loss

                # self.regression_loss_bev = 0
                # if self.params['train_loc'] == 1:
                #     self.regression_loss_bev += (2 - self.iou_loc - self.iou) * 30 * self.loc_reg_loss
                # if self.params['train_dim'] == 1:
                #     self.regression_loss_bev += (2 - self.iou_dim - self.iou) * 50 * self.dim_reg_loss
                # if self.params['train_theta'] == 1:
                #     self.regression_loss_bev += 100 * self.theta_reg_loss
                # if self.params['train_dir'] == 1:
                #     self.regression_loss_bev += 1 * self.dir_reg_loss
                # self.model_loss_bev = 0
                # if self.params['train_cls']:
                #     self.model_loss_bev += 1 * (2 - self.recall - self.precision) * 1 * self.classification_loss
                # if self.params['train_reg']:
                #     self.model_loss_bev +=  5 * self.regression_loss_bev

                # self.regression_loss = tf.cond(self.train_fusion_rgb, lambda: self.regression_loss_fusion, lambda: self.regression_loss_bev)
                # self.model_loss = tf.cond(self.train_fusion_rgb, lambda: self.model_loss_fusion, lambda: self.model_loss_bev)

                self.regression_loss = self.regression_loss_bev
                self.model_loss = self.model_loss_bev

                # for end to end

                # self.regression_loss = 20 * self.loc_reg_loss + 15 * self.dim_reg_loss + 10 * self.theta_reg_loss + 0.1 * self.dir_reg_loss
                # self.model_loss = 0
                # if self.params['train_cls']:
                #     self.model_loss += 0.3 * self.classification_loss
                # if self.params['train_reg']:
                #     self.model_loss += self.regression_loss

                # self.regression_loss =  self.loc_reg_loss *((1-self.iou_loc)*(1-self.iou)) +\
                #                         5 * 0.1 * self.dim_reg_loss * ((1-self.iou_dim)*(1-self.iou)) +\
                #                         0.1 * self.theta_reg_loss * ((1-self.theta_accuracy)**2)
                # self.model_loss = 0
                # if self.params['train_cls']:
                #     self.model_loss +=  2e-1 * 0.1 * self.classification_loss * (1-self.precision)*(1-self.recall)
                # if self.params['train_reg']:
                #     self.model_loss += self.regression_loss

            self.model_loss_img = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=self.y_true_img, logits=self.detection_layer))
            head_only_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch/image_head")
            self.opt_img = tf.train.AdamOptimizer(1e-3).minimize(
                self.model_loss_img, var_list=head_only_vars)
            self.img_only_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch")
            self.img_only_vars.extend(
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  "fusion/cond/cond_img"))
            self.opt_img_all = tf.train.AdamOptimizer(1e-4).minimize(
                self.model_loss_img, var_list=self.img_only_vars)

            self.equality = tf.where(
                self.y_true_img >= 0.5,
                tf.equal(
                    tf.cast(
                        tf.sigmoid(self.detection_layer) >= 0.5, tf.float32),
                    self.y_true_img),
                tf.zeros_like(self.y_true_img, dtype=tf.bool))
            self.accuracy = tf.reduce_sum(tf.cast(
                self.equality, tf.float32)) / tf.cast(
                    tf.count_nonzero(self.y_true_img), tf.float32)

            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            self.lidar_only_vars = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, "lidar_branch")
            self.lidar_only_vars.extend(
                tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  "fusion/cond/cond_lidar"))
            self.decay_rate = tf.train.exponential_decay(
                self.params['lr'], self.global_step,
                self.params['decay_steps'], self.params['decay_rate'],
                self.params['staircase'])

            self.learning_rate_placeholder = tf.placeholder(
                tf.float32, [], name='learning_rate')
            self.opt_lidar = tf.train.AdamOptimizer(
                self.learning_rate_placeholder)
            self.train_op_lidar = self.opt_lidar.minimize(self.model_loss,\
                                                                        var_list=self.lidar_only_vars,\
                                                                        global_step=self.global_step)

            if self.params['fusion']:
                self.fusion_only_vars = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, "fusion")
                self.fusion_only_vars.extend(
                    tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                      "image_branch/image_head/fpn"))
                self.fusion_only_vars.extend(
                    tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                      "lidar_branch/fpn"))
                self.train_op_fusion = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss,\
                                                                            var_list=self.fusion_only_vars,\
                                                                            global_step=self.global_step)

            else:
                self.train_op_fusion = None

            self.train_op = tf.train.AdamOptimizer(1e-3).minimize(
                self.model_loss, global_step=self.global_step)

            self.saver = tf.train.Saver(max_to_keep=1)

            self.best_saver = tf.train.Saver(max_to_keep=1)

            self.lr_summary = tf.summary.scalar('learning_rate',
                                                tf.squeeze(self.decay_rate))
            self.model_loss_batches_summary = tf.summary.scalar(
                'model_loss_batches', self.model_loss)
            self.cls_loss_batches_summary = tf.summary.scalar(
                'classification_loss_batches', self.classification_loss)
            self.reg_loss_batches_summary = tf.summary.scalar(
                'regression_loss_batches', self.regression_loss)
            self.loc_reg_loss_batches_summary = tf.summary.scalar(
                'loc_regression_loss_batches', self.loc_reg_loss)
            self.dim_reg_loss_batches_summary = tf.summary.scalar(
                'dim_regression_loss_batches', self.dim_reg_loss)
            self.theta_reg_loss_batches_summary = tf.summary.scalar(
                'theta_regression_loss_batches', self.theta_reg_loss)
            self.dir_reg_loss_batches_summary = tf.summary.scalar(
                'dir_regression_loss_batches', self.dir_reg_loss)

            self.precision_summary = tf.summary.scalar('precision_batches',
                                                       self.precision)
            self.recall_summary = tf.summary.scalar('recall_batches',
                                                    self.recall)

            self.iou_summary = tf.summary.scalar('iou_batches', self.iou)
            self.iou_loc_summary = tf.summary.scalar('iou_loc_batches',
                                                     self.iou_loc)
            self.iou_dim_summary = tf.summary.scalar('iou_dim_batches',
                                                     self.iou_dim)
            self.theta_accuracy_summary = tf.summary.scalar(
                'theta_accuracy_batches', self.theta_accuracy)

            self.cls_weight_summary = tf.summary.scalar(
                'cls_weight_summary', self.cls_weight)
            self.loc_weight_summary = tf.summary.scalar(
                'loc_weight_summary', self.loc_weight)
            self.dim_weight_summary = tf.summary.scalar(
                'dim_weight_summary', self.dim_weight)
            self.theta_weight_summary = tf.summary.scalar(
                'theta_weight_summary', self.theta_weight)

            self.recall_pos_summary = tf.summary.scalar(
                'recall_pos_summary', self.recall_pos)
            self.recall_neg_summary = tf.summary.scalar(
                'recall_neg_summary', self.recall_neg)

            # self.iou_loc_x_summary = tf.summary.scalar('iou_loc_x_summary', self.iou_loc_x)
            # self.iou_loc_y_summary = tf.summary.scalar('iou_loc_y_summary', self.iou_loc_y)
            # self.iou_loc_z_summary = tf.summary.scalar('iou_loc_z_summary', self.iou_loc_z)
            # self.iou_loc_weights_summary = tf.summary.scalar('iou_loc_weights_summary', self.iou_loc_weights)

            self.merged = tf.summary.merge([self.lr_summary, self.model_loss_batches_summary, \
                                        self.cls_loss_batches_summary, self.reg_loss_batches_summary,\
                                        self.loc_reg_loss_batches_summary, self.dim_reg_loss_batches_summary,\
                                        self.theta_reg_loss_batches_summary, self.dir_reg_loss_batches_summary,\
                                        self.precision_summary, self.recall_summary,\
                                        self.iou_summary, self.iou_loc_summary, self.iou_dim_summary,\
                                        self.theta_accuracy_summary,\
                                        self.cls_weight_summary, self.loc_weight_summary, self.dim_weight_summary,self.theta_weight_summary,\
                                        self.recall_pos_summary, self.recall_neg_summary,\
                                            # self.iou_loc_x_summary, self.iou_loc_y_summary, self.iou_loc_z_summary, self.iou_loc_weights_summary

                                        ])

            # self.merged = tf.summary.merge([self.lr_summary, self.model_loss_batches_summary, \
            #                             self.cls_loss_batches_summary, self.reg_loss_batches_summary,\
            #                             self.loc_reg_loss_batches_summary, self.dim_reg_loss_batches_summary,\
            #                             self.theta_reg_loss_batches_summary, self.dir_reg_loss_batches_summary])

            self.model_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                         shape=[])
            self.model_loss_summary = tf.summary.scalar(
                'model_loss', self.model_loss_placeholder)
            self.cls_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                       shape=[])
            self.cls_loss_summary = tf.summary.scalar(
                'classification_loss', self.cls_loss_placeholder)
            self.reg_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                       shape=[])
            self.reg_loss_summary = tf.summary.scalar(
                'regression_loss', self.reg_loss_placeholder)

            self.theta_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                         shape=[])
            self.theta_loss_summary = tf.summary.scalar(
                'theta_loss', self.theta_loss_placeholder)
            self.dir_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                       shape=[])
            self.dir_loss_summary = tf.summary.scalar(
                'dir_loss', self.dir_loss_placeholder)
            self.loc_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                       shape=[])
            self.loc_loss_summary = tf.summary.scalar(
                'loc_loss', self.loc_loss_placeholder)
            self.dim_loss_placeholder = tf.placeholder(dtype=tf.float32,
                                                       shape=[])
            self.dim_loss_summary = tf.summary.scalar(
                'dim_loss', self.dim_loss_placeholder)

            self.lr_summary2 = tf.summary.scalar(
                'lr_ph', self.learning_rate_placeholder)

            self.images_summary_placeholder = tf.placeholder(
                dtype=tf.float32, shape=[None, None, None, 3])
            self.images_summary = tf.summary.image(
                'images', self.images_summary_placeholder)

            self.images_summary_fusion_placeholder = tf.placeholder(
                dtype=tf.float32, shape=[None, None, None, 3])
            self.images_summary_fusion = tf.summary.image(
                'images_fusion', self.images_summary_fusion_placeholder)

            self.images_summary_segmentation_cars_placeholder = tf.placeholder(
                dtype=tf.float32, shape=[None, 24, 78, 1])
            self.images_summary_segmentation_cars = tf.summary.image(
                'images_segmantation_cars',
                self.images_summary_segmentation_cars_placeholder)
            self.images_summary_segmentation_road_placeholder = tf.placeholder(
                dtype=tf.float32, shape=[None, 24, 78, 1])
            self.images_summary_segmentation_road = tf.summary.image(
                'images_segmentation_road',
                self.images_summary_segmentation_road_placeholder)

            self.accuracy_image_summary_placeholder = tf.placeholder(
                dtype=tf.float32, shape=[])
            self.accuracy_image_summary = tf.summary.scalar(
                'accuracy_image', self.accuracy_image_summary_placeholder)
            self.model_loss_image_summary_placeholder = tf.placeholder(
                dtype=tf.float32, shape=[])
            self.model_loss_image_summary = tf.summary.scalar(
                'model_loss_image', self.model_loss_image_summary_placeholder)

            self.train_writer = tf.summary.FileWriter('./training_files/train',
                                                      self.graph)
            self.validation_writer = tf.summary.FileWriter(
                './training_files/test')
Exemplo n.º 5
0
def test():
    #vis = visdom.Visdom()
    # print(model_path)
    os.environ["CUDA_VISIBLE_DEVICES"] = '1'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    fcn_model = FPN([2, 4, 23, 3], 2, back_bone="resnet")
    if not torch.cuda.is_available():
        fcn_model.load_state_dict(torch.load(model_path, map_location='cpu'))
    else:
        fcn_model.load_state_dict(torch.load(model_path))
    # print(fcn_model)
    # fcn_model=torch.load(model_path)
    fcn_model = fcn_model.to(device)
    fcn_model.eval()

    if os.path.exists(TEST_RESULT):
        shutil.rmtree(TEST_RESULT)

    os.mkdir(TEST_RESULT)

    if os.path.exists(submit_path):
        shutil.rmtree(submit_path)

    os.mkdir(submit_path)
    for index, (bag, bag1, bag_msk, not_care, name,
                shape) in enumerate(test_dataloader):
        with torch.no_grad():
            bag = bag.to(device)
            bag1 = bag1.to(device)
            bag_msk = bag_msk.to(device)
            output = fcn_model(bag)
            output_np = output.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            output_np = output_np[0, 0, :, :]

            output1 = fcn_model(bag1)
            output_np1 = output1.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            output_np1 = output_np1[0, 0, :, :]
            output_np1 = cv2.resize(output_np1,
                                    (output_np.shape[1], output_np.shape[0]))

            output2 = np.zeros((2, output_np.shape[0], output_np.shape[1]))
            output2[0, :, :] = output_np
            output2[1, :, :] = output_np
            # output_np = np.append(output_np,output_np1,axis=0)
            # print(output2.shape)
            output_np = np.max(output2, axis=0)

            # output_np = np.argmin(output_np, axis=1)
            # output_np = np.squeeze(output_np[0, ...])
            bag_msk_np = bag_msk.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            bag_msk_np = np.argmin(bag_msk_np, axis=1)
            bag_msk_np = np.squeeze(bag_msk_np[0, ...])

            # output_np = output_np[0,0,:,:]
            ind = np.where(output_np > 0.5)
            output_np = np.zeros(output_np.shape, dtype=np.uint8)
            output_np[ind] = 255

            output_np = cv2.resize(output_np, (shape[1], shape[0]))
            # img = cv2.cvtColor(output_np, cv2.COLOR_GRAY2RGB)
            # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            ret, thresh = cv2.threshold(output_np, 230, 255,
                                        cv2.THRESH_BINARY_INV)
            contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE,
                                                   cv2.CHAIN_APPROX_SIMPLE)
            bboxes = []
            for i, c in enumerate(contours):
                if i == 0:
                    continue
                # 找面积最小的矩形
                area = cv2.contourArea(c)
                # print(area)
                if area < 20.0:
                    continue
                rect = cv2.minAreaRect(c)
                # 得到最小矩形的坐标
                bbox = cv2.boxPoints(rect)
                bbox = bbox.astype('int32')
                bboxes.append(bbox.reshape(-1))
                # print('1')

            bag_msk_np = 255 * bag_msk_np
            bag_msk_np = np.array(bag_msk_np, dtype="uint8")
            for bbox in bboxes:
                # cv2.drawContours(bag_msk_np, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2)
                cv2.polylines(bag_msk_np, [bbox.reshape(4, 2)], 1, 120)
                # cv2.fillPoly(bag_msk_np, [bbox.reshape(4, 2)], 120)
                # print(bbox.reshape(4, 2))
            seq = []
            if bboxes is not None:
                seq.extend([
                    ','.join([str(int(b)) for b in box]) + '\n'
                    for box in bboxes
                ])
            with open(
                    os.path.join(submit_path,
                                 'res_' + os.path.basename(name[0]) + '.txt'),
                    'w') as f:
                f.writelines(seq)
Exemplo n.º 6
0
 def load_fpn(self, cfg):
     fpn = FPN(cfg)
     self.model_list.append(fpn)
Exemplo n.º 7
0
def train(epo_num=50, show_vgg_params=False):

    #vis = visdom.Visdom()
    os.environ["CUDA_VISIBLE_DEVICES"] = '3'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(device)
    # res_model = resnet50(True)
    fcn_model = FPN([2, 4, 23, 3], 2, back_bone="resnet")
    if not torch.cuda.is_available():
        fcn_model.load_state_dict(torch.load(model_path, map_location='cpu'))
    else:
        fcn_model.load_state_dict(torch.load(model_path))
    # optimizer = optim.Adam(net.parameters(), lr=lr)
    fcn_model = fcn_model.to(device)

    criterion = nn.BCELoss().to(device)

    # criterion = nn.BCEWithLogitsLoss().to(device)
    optimizer = optim.Adam(fcn_model.parameters(), lr=1e-4)
    # optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7)
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [1,6,11], gamma=0.1, last_epoch=-1)

    all_train_iter_loss = []
    all_test_iter_loss = []

    if os.path.exists(TRAIN_RESULT):
        shutil.rmtree(TRAIN_RESULT)

    os.mkdir(TRAIN_RESULT)

    # start timing
    prev_time = datetime.now()
    for epo in range(epo_num):

        train_loss = 0
        fcn_model.train()
        for index, (bag, bag_msk, nc) in enumerate(train_dataloader):
            # bag.shape is torch.Size([4, 3, 160, 160])
            # bag_msk.shape is torch.Size([4, 2, 160, 160])

            bag = bag.to(device)
            bag_msk = bag_msk.to(device)
            nc = nc.to(device)

            optimizer.zero_grad()
            output = fcn_model(bag)
            output = torch.sigmoid(
                output)  # output.shape is torch.Size([4, 2, 160, 160])
            loss = criterion(output * (1 - nc), bag_msk * (1 - nc))
            loss.backward()
            iter_loss = loss.item()
            all_train_iter_loss.append(iter_loss)
            train_loss += iter_loss
            optimizer.step()

            output_np = output.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            output_np = np.argmin(output_np, axis=1)
            #print("size of output is {}".format(output_np.shape))
            bag_msk_np = bag_msk.cpu().detach().numpy().copy(
            )  # bag_msk_np.shape = (4, 2, 160, 160)
            bag_msk_np = np.argmin(bag_msk_np, axis=1)

            if np.mod(index, 50) == 0:
                print('epoch {}, {}/{},train loss is {}'.format(
                    epo, index, len(train_dataloader), iter_loss))
                cv2.imwrite(TRAIN_RESULT + str(index) + "_train.jpg",
                            255 * np.squeeze(output_np[0, ...]))

        test_loss = 0
        fcn_model.eval()
        num_test = 0
        with torch.no_grad():
            for index, (bag, bag_msk, nc) in enumerate(test_dataloader):

                bag = bag.to(device)
                bag_msk = bag_msk.to(device)
                nc = nc.to(device)

                optimizer.zero_grad()
                output = fcn_model(bag)
                output = torch.sigmoid(
                    output)  # output.shape is torch.Size([4, 2, 160, 160])
                loss = criterion(output * (1 - nc), bag_msk * (1 - nc))
                iter_loss = loss.item()

                test_loss += iter_loss
                num_test = index + 1
                output_np = output.cpu().detach().numpy().copy(
                )  # output_np.shape = (4, 2, 160, 160)
                output_np = np.argmin(output_np, axis=1)
                bag_msk_np = bag_msk.cpu().detach().numpy().copy(
                )  # bag_msk_np.shape = (4, 2, 160, 160)
                bag_msk_np = np.argmin(bag_msk_np, axis=1)

                if np.mod(index, 10) == 0:
                    # plt.subplot(1, 2, 1)
                    # plt.imshow(np.squeeze(bag_msk_np[0, ...]), 'gray')
                    # plt.subplot(1, 2, 2)
                    # plt.imshow(np.squeeze(output_np[0, ...]), 'gray')
                    # plt.pause(0.5)
                    # plt.savefig("Result/"+str(index)+"_test.png")
                    cv2.imwrite(TRAIN_RESULT + str(index) + "_test.jpg",
                                255 * np.squeeze(output_np[0, ...]))
        all_test_iter_loss.append(test_loss / num_test)

        cur_time = datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        prev_time = cur_time

        print('epoch train loss = %f, epoch test loss = %f, %s' %
              (train_loss / len(train_dataloader),
               test_loss / len(test_dataloader), time_str))

        draw_loss_plot(all_train_iter_loss, all_test_iter_loss)

        # if np.mod(epo+1, 10) == 0:
        #torch.save(fcn_model, 'checkpoints/fcn_model_{}.pt'.format(epo+1))
        torch.save(fcn_model.state_dict(),
                   'model_fpn/fcn_{0}.model'.format(epo))
        #torch.save(fcn_model, 'model/fcn_{0}.model'.format(epo+1))
        print('saveing model/fcn_{0}.model'.format(epo))
Exemplo n.º 8
0
def test():
    #vis = visdom.Visdom()
    # print(model_path)
    os.environ["CUDA_VISIBLE_DEVICES"] = '2'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    fcn_model = FPN([2, 4, 23, 3], 2, back_bone="resnet")
    if not torch.cuda.is_available():
        fcn_model.load_state_dict(torch.load(model_path, map_location='cpu'))
    else:
        fcn_model.load_state_dict(torch.load(model_path))
    # print(fcn_model)
    # fcn_model=torch.load(model_path)
    fcn_model = fcn_model.to(device)
    fcn_model.eval()
    miou = 0
    num = 0
    if os.path.exists(TEST_RESULT):
        shutil.rmtree(TEST_RESULT)

    os.mkdir(TEST_RESULT)

    for index, (bag, bag1, bag_msk, not_care, name,
                shape) in enumerate(test_dataloader):
        with torch.no_grad():
            bag = bag.to(device)
            bag1 = bag1.to(device)
            bag_msk = bag_msk.to(device)
            output = fcn_model(bag)
            output_np = output.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            output_np = output_np[0, 0, :, :]

            output1 = fcn_model(bag1)
            output_np1 = output1.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            output_np1 = output_np1[0, 0, :, :]
            output_np1 = cv2.resize(output_np1,
                                    (output_np.shape[1], output_np.shape[0]))

            output2 = np.zeros((2, output_np.shape[0], output_np.shape[1]))
            output2[0, :, :] = output_np
            output2[1, :, :] = output_np
            # output_np = np.append(output_np,output_np1,axis=0)
            # print(output2.shape)
            output_np = np.max(output2, axis=0)

            # output_np = np.argmin(output_np, axis=1)
            # output_np = np.squeeze(output_np[0, ...])
            bag_msk_np = bag_msk.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            bag_msk_np = np.argmin(bag_msk_np, axis=1)
            bag_msk_np = np.squeeze(bag_msk_np[0, ...])

            # output_np = output_np[0,0,:,:]
            ind = np.where(output_np > 0.5)
            output_np = np.zeros(output_np.shape)
            output_np[ind] = 1

            # print(name)
            # print(type(bag_msk_np))
            cv2.imwrite("test_result_fpn/" + name[0] + "_test.jpg",
                        255 * output_np)
            cv2.imwrite("test_result_fpn/" + name[0] + "_gt.jpg",
                        255 * bag_msk_np)

            not_care = not_care.cpu().detach().numpy().copy(
            )  # output_np.shape = (4, 2, 160, 160)
            # not_care = np.argmin(not_care, axis=1)
            # print(not_care.shape)
            not_care = np.squeeze(not_care[0, ...])
            # not_care=np.squeeze(not_care[0, ...])
            # print(not_care.shape)
            ind = np.where(not_care == 1)
            output_np[ind] = 0

            inter = np.sum(np.multiply(output_np, bag_msk_np))
            union = np.sum(output_np) + np.sum(bag_msk_np) - inter
            # print(inter)
            # print(union)
            if union == 0:
                continue
            miou += inter / union
            num = index
    miou = miou / (num + 1)
    print("MIOU is {}".format(miou))