def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True hidden_size = config.fc_hidden target_class = config.num_classes # self.resnet = resnet18(num_classes=hidden_size) #self.resnet = ResNet(block=BasicBlock, layers=[1, 1, 1, 1], num_classes=hidden_size) # self.resnet = ResNet(config.in_channels, 18) self.fpn = FPN([256]* 4, 4) self.fpn_seq = FPN([128,128,128,70], 4) #cnn feature map has a total number of 228 dimensions. self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(hidden_size, target_class) self.num_classes = config.num_classes
def __init__(self, name_scope, is_test=False): super(ResNet, self).__init__(name_scope) self.conv1 = fluid.dygraph.Conv2D(name_scope + "_conv1", num_filters=64, filter_size=7, stride=2, padding=3, dilation=1) self.bn1 = fluid.dygraph.BatchNorm(name_scope + "_bn1", 64, act="relu", is_test=is_test) self.maxPooling = fluid.dygraph.Pool2D(name_scope + "maxpooling", pool_size=2, pool_stride=2, pool_type='max') self.block1 = Make_layer(name_scope + "_block1", 64, layernums=3, stride=1, is_test=is_test) self.block2 = Make_layer(name_scope + "_block2", 128, layernums=4, stride=2, is_test=is_test) self.block3 = Make_layer(name_scope + "_block3", 256, layernums=4, stride=2, is_test=is_test) self.block4 = Make_layer(name_scope + "_block4", 256, layernums=6, stride=2, is_test=is_test) self.block5 = Make_layer(name_scope + "_block5", 256, layernums=6, stride=2, is_test=is_test) self.block6 = Make_layer(name_scope + "_block6", 256, layernums=4, stride=2, is_test=is_test) # 最后一层的输出是带激活函数的 self.fpn = FPN(name_scope + "_FPN", is_test=is_test)
def __build_model(self): if self.graph is None: self.graph = tf.Graph() # self.strategy = tf.distribute.MirroredStrategy() # with self.strategy.scope(): with self.graph.as_default(): self.is_training = tf.placeholder(tf.bool, shape=(), name='is_training') img_size_1 = 370 img_size_2 = 1224 c_dim = 3 self.train_inputs_rgb = tf.placeholder(tf.float32, [None, img_size_1, img_size_2, c_dim], name='train_inputs_rgb') if self.params['use_fv']: c_dim = 64 self.train_inputs_fv_lidar = tf.placeholder(tf.float32, [None, img_size_1, img_size_2, c_dim], name='train_inputs_fv_lidar') img_size_1 = 512 img_size_2 = 448 c_dim = 32 self.train_inputs_lidar = tf.placeholder(tf.float32, [None, img_size_1, img_size_2, c_dim], name='train_inputs_lidar') self.label_weights = tf.placeholder(tf.float32, shape=(None, 128, 112, 2, 1)) # target self.y_true = tf.placeholder(tf.float32, shape=(None, 128, 112, 2, 9)) # target self.y_true_img = tf.placeholder(tf.float32, shape=(None, 24, 78, 2)) # target self.Tr_velo_to_cam = tf.placeholder(tf.float32, shape=(None, 4, 4)) self.R0_rect = tf.placeholder(tf.float32, shape=(None, 4, 4)) self.P3 = tf.placeholder(tf.float32, shape=(None, 3, 4)) self.shift_h = tf.placeholder(tf.float32, shape=[None, 1]) self.shift_w = tf.placeholder(tf.float32, shape=[None, 1]) self.train_fusion_rgb = tf.placeholder(tf.bool, shape=()) if self.params['use_fv']: self.train_fusion_fv_lidar = tf.placeholder(tf.bool, shape=()) with tf.variable_scope("image_branch"): self.cnn = ResNetBuilder().build(branch=self.CONST.IMAGE_BRANCH, img_height=370, img_width=1224, img_channels=3) self.cnn.build_model(self.train_inputs_rgb, is_training=self.is_training) with tf.variable_scope("image_head"): fpn_images = FPN(self.cnn.res_groups, "fpn_rgb", is_training=self.is_training) last_features_layer_image = fpn_images[2] for i in range(self.params['res_blocks_image']): last_features_layer_image = resblock(last_features_layer_image, 192, scope='fpn_res_'+str(i)) self.detection_layer = conv(last_features_layer_image, 2, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out') self.model_loss_img = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y_true_img, logits=self.detection_layer)) head_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch/image_head") self.opt_img = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss_img, var_list=head_only_vars) img_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch") self.opt_img_all = tf.train.AdamOptimizer(1e-4).minimize(self.model_loss_img, var_list=img_only_vars) self.equality = tf.where(self.y_true_img >= 0.5, tf.equal(tf.cast(tf.sigmoid(self.detection_layer) >= 0.5, tf.float32), self.y_true_img), tf.zeros_like(self.y_true_img, dtype=tf.bool)) self.accuracy = tf.reduce_sum(tf.cast(self.equality, tf.float32)) / tf.cast(tf.count_nonzero(self.y_true_img), tf.float32) if self.params['use_fv']: with tf.variable_scope("lidar_fv_branch"): self.cnn_fv_lidar = ResNetBuilder().build(branch=self.CONST.FV_BRANCH, img_height=370, img_width=1224, img_channels=32) self.cnn_fv_lidar.build_model(self.train_inputs_fv_lidar, is_training=self.is_training) fpn_fv_lidar = FPN(self.cnn_fv_lidar.res_groups, "fpn_fv_lidar") fpn_fv_lidar[2] = conv(fpn_fv_lidar[2], 192, kernel=1, stride=1, scope='post_fv_conv', reuse=False) self.debug_layers = {} with tf.variable_scope("lidar_branch"): self.cnn_lidar = ResNetBuilder().build(branch=self.CONST.BEV_BRANCH, img_height=512, img_width=448, img_channels=32) self.cnn_lidar.build_model(self.train_inputs_lidar, is_training=self.is_training) x_temp = self.cnn_lidar.train_logits if self.params['fusion']: with tf.variable_scope('fusion'): if self.params['use_fv']: att = AttentionFusionLayerFunc(last_features_layer_image, fpn_fv_lidar[2], self.train_fusion_fv_lidar, x_temp, 'attention_fusion_3') self.debug_layers['attention_output'] = att x_new = tf.concat([att, x_temp], axis=-1) with tf.variable_scope('post_fusion_conv'): x_new = conv(x_new, 256, kernel=1, stride=1, scope='atention_fusion_3_post_conv', reuse=False) x_new = batch_norm(x_new, is_training=self.is_training) x_new = relu(x_new) self.debug_layers['attention_module_output'] = x_new else: att = AttentionFusionLayerFunc(last_features_layer_image, None, None, x_temp, 'attention_fusion_3') self.debug_layers['attention_output'] = att x_new = tf.concat([att, x_temp], axis=-1) with tf.variable_scope('post_fusion_conv'): x_new = conv(x_new, 256, kernel=1, stride=1, scope='atention_fusion_3_post_conv', reuse=False) x_new = batch_norm(x_new, is_training=self.is_training) x_new = relu(x_new) self.debug_layers['attention_module_output'] = x_new with tf.variable_scope("lidar_branch"): if not self.params['fusion']: x_new = x_temp x_temp = tf.cond(self.train_fusion_rgb, lambda: x_new, lambda: x_temp) self.cnn_lidar.res_groups.append(x_temp) fpn_lidar = FPN(self.cnn_lidar.res_groups[:3], "fpn_lidar", is_training=self.is_training) # fpn_lidar = FPN(self.cnn_lidar.res_groups[:], "fpn_lidar", is_training=self.is_training) self.debug_layers['fpn_lidar'] = fpn_lidar fpn_lidar[0] = maxpool2d(fpn_lidar[0], scope='maxpool_fpn0') fpn_lidar[2] = upsample(fpn_lidar[2], size=(2, 2), scope='fpn_upsample_1', use_deconv=True, kernel_size=4) # fpn_lidar[3] = upsample(fpn_lidar[3], size=(4, 4), scope='fpn_upsample_2', use_deconv=True) fpn_lidar = tf.concat(fpn_lidar[:], 3) self.debug_layers['fpn_lidar_output'] = fpn_lidar num_conv_blocks=4 for i in range(num_conv_blocks): fpn_lidar = conv(fpn_lidar, 128, kernel=3, stride=1, padding='SAME', use_bias=True, scope='conv_post_fpn_'+str(i)) fpn_lidar = batch_norm(fpn_lidar, scope='bn_post_fpn_' + str(i)) fpn_lidar = relu(fpn_lidar) self.debug_layers['fpn_lidar_output_post_conv_'+str(i)] = fpn_lidar if self.params['focal_loss']: final_output_1_7 = conv(fpn_lidar, 8, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1') final_output_2_7 = conv(fpn_lidar, 8, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2') final_output_1_8 = conv(fpn_lidar, 1, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1_8', focal_init=self.params['focal_init']) final_output_2_8 = conv(fpn_lidar, 1, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2_8', focal_init=self.params['focal_init']) final_output_1 = tf.concat([final_output_1_7, final_output_1_8], -1) final_output_2 = tf.concat([final_output_2_7, final_output_2_8], -1) else: final_output_1 = conv(fpn_lidar, 9, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1') final_output_2 = conv(fpn_lidar, 9, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2') final_output_1 = tf.expand_dims(final_output_1, 3) final_output_2 = tf.expand_dims(final_output_2, 3) self.debug_layers['final_layer'] = tf.concat([final_output_1, final_output_2], 3) self.final_output = tf.concat([final_output_1, final_output_2], 3) self.anchors = tf.placeholder(tf.float32, [None, 128, 112, 2, 6]) self.use_nms = tf.placeholder(tf.bool, shape=[]) self.final_output = tf.cond(self.use_nms, lambda: nms(self.final_output, 0.5), lambda: self.final_output) # self.final_output = adjust_predictions(self.final_output, self.anchors) with tf.variable_scope('Loss'): cls_loss_instance = ClsLoss('classification_loss') reg_loss_instance = RegLoss('regression_loss') loss_calculator = LossCalculator() loss_params = {'focal_loss': self.params['focal_loss'], 'weight': self.params['weight_loss'], 'mse': self.params['mse_loss']} self.classification_loss, self.loc_reg_loss, self.dim_reg_loss, self.theta_reg_loss, self.dir_reg_loss = loss_calculator( self.y_true, self.final_output, cls_loss_instance, reg_loss_instance, **loss_params) self.regression_loss = 100 * self.loc_reg_loss + 50 * self.dim_reg_loss + 500 * self.theta_reg_loss + 10*self.dir_reg_loss self.model_loss = 0 if self.params['train_cls']: self.model_loss += self.classification_loss if self.params['train_reg']: self.model_loss += self.regression_loss self.global_step = tf.Variable(0, name='global_step', trainable=False) self.lidar_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "lidar_branch") self.decay_rate = tf.train.exponential_decay(self.params['lr'], self.global_step, self.params['decay_steps'], self.params['decay_rate'], self.params['staircase']) self.learning_rate_placeholder = tf.placeholder(tf.float32, [], name='learning_rate') self.opt_lidar = tf.train.AdamOptimizer(self.learning_rate_placeholder) self.train_op_lidar = self.opt_lidar.minimize(self.model_loss,\ var_list=self.lidar_only_vars,\ global_step=self.global_step) if self.params['fusion']: fusion_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fusion") # fusion_only_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, # "image_branch/fpn_rgb")) self.train_op_fusion = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss,\ var_list=fusion_only_vars,\ global_step=self.global_step) if self.params['use_fv']: fv_only_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "lidar_fv_branch") fv_only_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fv_fusion")) fv_only_vars.extend(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fusion/post_fusion_conv")) self.train_op_fv = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss,\ var_list=fv_only_vars,\ global_step=self.global_step) else: self.train_op_fusion = None self.train_op = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss, global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=2) self.best_saver = tf.train.Saver(max_to_keep=2) self.lr_summary = tf.summary.scalar('learning_rate', tf.squeeze(self.decay_rate)) self.model_loss_batches_summary = tf.summary.scalar('model_loss_batches', self.model_loss) self.cls_loss_batches_summary = tf.summary.scalar('classification_loss_batches', self.classification_loss) # self.cls_loss_2_batches_summary = tf.summary.scalar('classification_loss_2_batches', self.classification_loss_2) self.reg_loss_batches_summary = tf.summary.scalar('regression_loss_batches', self.regression_loss) self.loc_reg_loss_batches_summary = tf.summary.scalar('loc_regression_loss_batches', self.loc_reg_loss) self.dim_reg_loss_batches_summary = tf.summary.scalar('dim_regression_loss_batches', self.dim_reg_loss) self.theta_reg_loss_batches_summary = tf.summary.scalar('theta_regression_loss_batches', self.theta_reg_loss) self.dir_reg_loss_batches_summary = tf.summary.scalar('dir_regression_loss_batches', self.dir_reg_loss) # self.near_reg_loss_batches_summary = tf.summary.scalar('nearby_regression_loss_batches', self.near_regression_loss) self.merged = tf.summary.merge([self.lr_summary, self.model_loss_batches_summary, \ self.cls_loss_batches_summary, self.reg_loss_batches_summary,\ self.loc_reg_loss_batches_summary, self.dim_reg_loss_batches_summary,\ self.theta_reg_loss_batches_summary, self.dir_reg_loss_batches_summary]) self.model_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.model_loss_summary = tf.summary.scalar('model_loss', self.model_loss_placeholder) self.cls_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.cls_loss_summary = tf.summary.scalar('classification_loss', self.cls_loss_placeholder) self.reg_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.reg_loss_summary = tf.summary.scalar('regression_loss', self.reg_loss_placeholder) self.theta_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.theta_loss_summary = tf.summary.scalar('theta_loss', self.theta_loss_placeholder) self.dir_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.dir_loss_summary = tf.summary.scalar('dir_loss', self.dir_loss_placeholder) self.loc_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.loc_loss_summary = tf.summary.scalar('loc_loss', self.loc_loss_placeholder) self.dim_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.dim_loss_summary = tf.summary.scalar('dim_loss', self.dim_loss_placeholder) self.lr_summary2 = tf.summary.scalar('lr_ph', self.learning_rate_placeholder) self.images_summary_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) self.images_summary = tf.summary.image('images', self.images_summary_placeholder) self.images_summary_fusion_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) self.images_summary_fusion = tf.summary.image('images_fusion', self.images_summary_fusion_placeholder) self.images_summary_segmentation_cars_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, 24, 78, 1]) self.images_summary_segmentation_cars = tf.summary.image('images_segmantation_cars', self.images_summary_segmentation_cars_placeholder) self.images_summary_segmentation_road_placeholder = tf.placeholder(dtype=tf.float32, shape=[None, 24, 78, 1]) self.images_summary_segmentation_road = tf.summary.image('images_segmentation_road', self.images_summary_segmentation_road_placeholder) self.accuracy_image_summary_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.accuracy_image_summary = tf.summary.scalar('accuracy_image', self.accuracy_image_summary_placeholder) self.model_loss_image_summary_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.model_loss_image_summary = tf.summary.scalar('model_loss_image', self.model_loss_image_summary_placeholder) self.train_writer = tf.summary.FileWriter('./training_files/train', self.graph) self.validation_writer = tf.summary.FileWriter('./training_files/test')
def __build_model(self): self.debug_layers = {} if self.graph is None: self.graph = tf.Graph() with self.graph.as_default(): self.is_training = tf.placeholder(tf.bool, shape=(), name='is_training') # self.cls_training = tf.placeholder(tf.bool, shape=(), name='cls_training') # self.reg_training = tf.placeholder(tf.bool, shape=(), name='reg_training') img_size_1 = 370 img_size_2 = 1224 c_dim = 3 self.train_inputs_rgb = tf.placeholder( tf.float32, [None, img_size_1, img_size_2, c_dim], name='train_inputs_rgb') img_size_1 = 512 img_size_2 = 448 c_dim = 41 self.train_inputs_lidar = tf.placeholder( tf.float32, [None, img_size_1, img_size_2, c_dim], name='train_inputs_lidar') self.y_true = tf.placeholder(tf.float32, shape=(None, 128, 112, 2, 9)) # target self.y_true_img = tf.placeholder(tf.float32, shape=(None, 24, 78, 2)) # target self.train_fusion_rgb = tf.placeholder(tf.bool, shape=()) with tf.variable_scope("image_branch"): self.cnn = ResNetBuilder().build( branch=self.CONST.IMAGE_BRANCH, img_height=370, img_width=1224, img_channels=3) self.cnn.build_model(self.train_inputs_rgb, is_training=self.is_training) with tf.variable_scope("lidar_branch"): self.cnn_lidar = ResNetBuilder().build( branch=self.CONST.BEV_BRANCH, img_height=512, img_width=448, img_channels=32) self.cnn_lidar.build_model(self.train_inputs_lidar, is_training=self.is_training) if self.params['fusion']: self.cnn_lidar.res_groups2 = [] self.cnn.res_groups2 = [] with tf.variable_scope('fusion'): kernels_lidar = [9, 5, 5] strides_lidar = [5, 3, 3] kernels_rgb = [7, 5, 5] strides_rgb = [4, 3, 3] for i in range(3): att_lidar, att_rgb = AttentionFusionLayerFunc3( self.cnn.res_groups[i], None, None, self.cnn_lidar.res_groups[i], 'attention_fusion_' + str(i), kernel_lidar=kernels_lidar[i], kernel_rgb=kernels_rgb[i], stride_lidar=strides_lidar[i], stride_rgb=strides_rgb[i]) with tf.variable_scope('cond'): with tf.variable_scope('cond_img'): # self.cnn.res_groups[i] = tf.cond(self.train_fusion_rgb, lambda: att_rgb, lambda: self.cnn.res_groups[i]) self.cnn.res_groups2.append( tf.cond(self.train_fusion_rgb, lambda: att_rgb, lambda: self.cnn.res_groups[i])) with tf.variable_scope('cond_lidar'): # self.cnn_lidar.res_groups[i] = tf.cond(self.train_fusion_rgb, lambda: att_lidar, lambda: self.cnn_lidar.res_groups[i]) self.cnn_lidar.res_groups2.append( tf.cond( self.train_fusion_rgb, lambda: att_lidar, lambda: self.cnn_lidar.res_groups[i])) self.debug_layers['attention_output_rgb_' + str(i)] = att_rgb self.debug_layers['attention_output_lidar_' + str(i)] = att_lidar else: self.cnn_lidar.res_groups2 = self.cnn_lidar.res_groups self.cnn.res_groups2 = self.cnn.res_groups with tf.variable_scope("image_branch"): if self.params['fusion']: self.cnn.res_groups2.append(self.cnn.res_groups[3]) with tf.variable_scope("image_head"): with tf.variable_scope("fpn"): self.fpn_images = FPN(self.cnn.res_groups2, "fpn_rgb", is_training=self.is_training) last_features_layer_image = self.fpn_images[2] for i in range(self.params['res_blocks_image']): last_features_layer_image = resblock( last_features_layer_image, 192, scope='fpn_res_' + str(i), is_training=self.is_training) self.detection_layer = conv(last_features_layer_image, 2, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out') with tf.variable_scope("lidar_branch"): with tf.variable_scope("fpn"): # fpn_lidar = FPN(self.cnn_lidar.res_groups2[:3], "fpn_lidar", is_training=self.is_training) # self.debug_layers['fpn_lidar'] = fpn_lidar # fpn_lidar[0] = maxpool2d(fpn_lidar[0], scope='maxpool_fpn0') # fpn_lidar[2] = upsample(fpn_lidar[2], size=(2, 2), scope='fpn_upsample_1', use_deconv=True, kernel_size=4) # # fpn_lidar[3] = upsample(fpn_lidar[3], size=(4, 4), scope='fpn_upsample_2', use_deconv=True) # # fpn_lidar[0] = maxpool2d(fpn_lidar[0], scope='maxpool_fpn0') # fpn_lidar[1] = upsample(fpn_lidar[1], size=(2, 2), scope='fpn_upsample_1', use_deconv=True, kernel_size=4) # fpn_lidar[2] = upsample(fpn_lidar[2], size=(4, 4), scope='fpn_upsample_2', use_deconv=True, kernel_size=4) # fpn_lidar = tf.concat(fpn_lidar[:], 3) # self.debug_layers['fpn_lidar_output'] = fpn_lidar # for i in range(1): # temp = conv(fpn_lidar, 128, kernel=3, stride=1, padding='SAME', use_bias=True, scope='conv_post_fpn_'+str(i)) # temp = batch_norm(temp, is_training=self.is_training, scope='bn_post_fpn_' + str(i)) # temp = relu(temp) # # temp = dropout(temp, rate=0.1, scope='dropout_post_fpn_0', training=self.is_training) # fpn_lidar = temp fpn_lidar1 = self.cnn_lidar.train_logits fpn_lidar2 = self.cnn_lidar.train_logits num_conv_blocks = 2 for i in range(0, num_conv_blocks): temp = conv(fpn_lidar1, 128, kernel=3, stride=1, padding='SAME', use_bias=True, scope='conv_post_fpn_1_' + str(i)) temp = batch_norm(temp, scope='bn_post_fpn_1_' + str(i)) temp = relu(temp) # fpn_lidar = fpn_lidar + temp fpn_lidar1 = temp # fpn_lidar = dropout(fpn_lidar, rate=0.3, scope='fpn_lidar_dropout_'+str(i)) self.debug_layers['fpn_lidar_output_post_conv_1_' + str(i)] = fpn_lidar1 num_conv_blocks = 2 for i in range(0, num_conv_blocks): temp = conv(fpn_lidar2, 128, kernel=3, stride=1, padding='SAME', use_bias=True, scope='conv_post_fpn_2_' + str(i)) temp = batch_norm(temp, scope='bn_post_fpn_2_' + str(i)) temp = relu(temp) # fpn_lidar = fpn_lidar + temp fpn_lidar2 = temp # fpn_lidar = dropout(fpn_lidar, rate=0.3, scope='fpn_lidar_dropout_'+str(i)) self.debug_layers['fpn_lidar_output_post_conv_2_' + str(i)] = fpn_lidar2 if self.params['focal_loss']: final_output_1_7 = conv(fpn_lidar1, 8, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1') final_output_2_7 = conv(fpn_lidar1, 8, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2') final_output_1_8 = conv( fpn_lidar2, 1, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1_8', focal_init=self.params['focal_init']) final_output_2_8 = conv( fpn_lidar2, 1, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2_8', focal_init=self.params['focal_init']) final_output_1 = tf.concat( [final_output_1_7, final_output_1_8], -1) final_output_2 = tf.concat( [final_output_2_7, final_output_2_8], -1) else: final_output_1 = conv(fpn_lidar, 9, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_1') final_output_2 = conv(fpn_lidar, 9, kernel=1, stride=1, padding='SAME', use_bias=True, scope='conv_out_2') final_output_1 = tf.expand_dims(final_output_1, 3) final_output_2 = tf.expand_dims(final_output_2, 3) self.debug_layers['final_layer'] = tf.concat( [final_output_1, final_output_2], 3) self.final_output = tf.concat([final_output_1, final_output_2], 3) # self.anchors = tf.placeholder(tf.float32, [None, 128, 112, 2, 6]) # self.use_nms = tf.placeholder(tf.bool, shape=[]) # self.final_output = tf.cond(self.use_nms, lambda: nms(self.final_output, 0.5), lambda: self.final_output) ############################ # under lidar_branch scope ############################ with tf.variable_scope("loss_weights"): self.loc_weight = tf.get_variable( 'loc_weight', shape=(), initializer=tf.constant_initializer(1), dtype=tf.float32) self.dim_weight = tf.get_variable( 'dim_weight', shape=(), initializer=tf.constant_initializer(1), dtype=tf.float32) self.theta_weight = tf.get_variable( 'theta_weight', shape=(), initializer=tf.constant_initializer(1), dtype=tf.float32) self.cls_weight = tf.get_variable( 'cls_weight', shape=(), initializer=tf.constant_initializer(1), dtype=tf.float32) with tf.variable_scope('Loss'): cls_loss_instance = ClsLoss('classification_loss') reg_loss_instance = RegLoss('regression_loss') loss_calculator = LossCalculator() loss_params = { 'focal_loss': self.params['focal_loss'], 'weight': self.params['weight_loss'], 'mse': self.params['mse_loss'] } self.classification_loss, self.loc_reg_loss, self.dim_reg_loss,\ self.theta_reg_loss, self.dir_reg_loss,\ self.precision, self.recall, self.iou, self.iou_loc, self.iou_dim, self.theta_accuracy,\ self.recall_pos, self.recall_neg, self.iou_loc_x, self.iou_loc_y, self.iou_loc_z = loss_calculator( self.y_true, self.final_output, cls_loss_instance, reg_loss_instance, **loss_params) # fusion # self.regression_loss_fusion = 0.5 * ((1-self.iou_loc)*(1-self.iou)) * tf.exp(-self.loc_weight) * self.loc_reg_loss + self.loc_weight +\ # ((1-self.iou_dim)*(1-self.iou)) * tf.exp(-self.dim_weight) * self.dim_reg_loss + self.dim_weight +\ # 0.1 * ((1-self.theta_accuracy)**2) * tf.exp(-self.theta_weight) * self.theta_reg_loss + self.theta_weight # # + 10 * self.dir_reg_loss # self.model_loss_fusion = 0 # if self.params['train_cls']: # self.model_loss_fusion += (1-self.precision)*(1-self.recall) * tf.exp(-self.cls_weight) * self.classification_loss + self.cls_weight # if self.params['train_reg']: # self.model_loss_fusion += self.regression_loss_fusion ## F1 SCORE # loc_ratios = np.array([2.4375, 1., 9.375 ]) # self.iou_loc_weights = self.iou_loc_x * loc_ratios[0]/np.sum(loc_ratios) + self.iou_loc_y * loc_ratios[1]/np.sum(loc_ratios) + self.iou_loc_z * loc_ratios[2]/np.sum(loc_ratios) # self.regression_loss = ((1-self.iou_loc_weights)*(1-self.iou)) + \ # ((1-self.iou_dim)*(1-self.iou)) +\ # 100 * self.theta_reg_loss # # + 10*self.dir_reg_loss # self.model_loss = 0 # if self.params['train_cls']: # self.model_loss += (self.recall_pos) + self.recall_neg # if self.params['train_reg']: # self.model_loss += self.regression_loss ## end f1 score # self.model_loss += tf.cond(self.cls_training, lambda: self.recall_pos + self.recall_neg, lambda: tf.constant(0, dtype=tf.float32)) # self.model_loss += tf.cond(self.reg_training, lambda: self.regression_loss, lambda: tf.constant(0, dtype=tf.float32)) # WORKING - BEV # self.regression_loss_bev = 0 # if self.params['train_loc'] == 1: # self.regression_loss_bev += 100 * self.loc_reg_loss # if self.params['train_dim'] == 1: # self.regression_loss_bev += 5 * self.dim_reg_loss # if self.params['train_theta'] == 1: # self.regression_loss_bev += 1000 * self.theta_reg_loss # self.model_loss_bev = 0 # if self.params['train_cls']: # self.model_loss_bev += 0.5 * self.classification_loss # if self.params['train_reg']: # self.model_loss_bev += 1 * self.regression_loss_bev # if self.params['train_dir'] == 1: # self.model_loss_bev += 0.1 * self.dir_reg_loss self.regression_loss_bev = 0 if self.params['train_loc'] == 1: self.regression_loss_bev += 1000 * ( 2 - self.iou_loc - self.iou) * self.loc_reg_loss if self.params['train_dim'] == 1: self.regression_loss_bev += 100 * ( 2 - self.iou_dim - self.iou) * self.dim_reg_loss if self.params['train_theta'] == 1: self.regression_loss_bev += 1000 * self.theta_reg_loss self.model_loss_bev = 0 if self.params['train_cls']: self.model_loss_bev += 50 * ( 2 - self.recall - self.precision) * self.classification_loss if self.params['train_reg']: self.model_loss_bev += 1 * self.regression_loss_bev if self.params['train_dir'] == 1: self.model_loss_bev += 0.1 * self.dir_reg_loss # self.regression_loss_bev = 0 # if self.params['train_loc'] == 1: # self.regression_loss_bev += (2 - self.iou_loc - self.iou) * 30 * self.loc_reg_loss # if self.params['train_dim'] == 1: # self.regression_loss_bev += (2 - self.iou_dim - self.iou) * 50 * self.dim_reg_loss # if self.params['train_theta'] == 1: # self.regression_loss_bev += 100 * self.theta_reg_loss # if self.params['train_dir'] == 1: # self.regression_loss_bev += 1 * self.dir_reg_loss # self.model_loss_bev = 0 # if self.params['train_cls']: # self.model_loss_bev += 1 * (2 - self.recall - self.precision) * 1 * self.classification_loss # if self.params['train_reg']: # self.model_loss_bev += 5 * self.regression_loss_bev # self.regression_loss = tf.cond(self.train_fusion_rgb, lambda: self.regression_loss_fusion, lambda: self.regression_loss_bev) # self.model_loss = tf.cond(self.train_fusion_rgb, lambda: self.model_loss_fusion, lambda: self.model_loss_bev) self.regression_loss = self.regression_loss_bev self.model_loss = self.model_loss_bev # for end to end # self.regression_loss = 20 * self.loc_reg_loss + 15 * self.dim_reg_loss + 10 * self.theta_reg_loss + 0.1 * self.dir_reg_loss # self.model_loss = 0 # if self.params['train_cls']: # self.model_loss += 0.3 * self.classification_loss # if self.params['train_reg']: # self.model_loss += self.regression_loss # self.regression_loss = self.loc_reg_loss *((1-self.iou_loc)*(1-self.iou)) +\ # 5 * 0.1 * self.dim_reg_loss * ((1-self.iou_dim)*(1-self.iou)) +\ # 0.1 * self.theta_reg_loss * ((1-self.theta_accuracy)**2) # self.model_loss = 0 # if self.params['train_cls']: # self.model_loss += 2e-1 * 0.1 * self.classification_loss * (1-self.precision)*(1-self.recall) # if self.params['train_reg']: # self.model_loss += self.regression_loss self.model_loss_img = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=self.y_true_img, logits=self.detection_layer)) head_only_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch/image_head") self.opt_img = tf.train.AdamOptimizer(1e-3).minimize( self.model_loss_img, var_list=head_only_vars) self.img_only_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch") self.img_only_vars.extend( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fusion/cond/cond_img")) self.opt_img_all = tf.train.AdamOptimizer(1e-4).minimize( self.model_loss_img, var_list=self.img_only_vars) self.equality = tf.where( self.y_true_img >= 0.5, tf.equal( tf.cast( tf.sigmoid(self.detection_layer) >= 0.5, tf.float32), self.y_true_img), tf.zeros_like(self.y_true_img, dtype=tf.bool)) self.accuracy = tf.reduce_sum(tf.cast( self.equality, tf.float32)) / tf.cast( tf.count_nonzero(self.y_true_img), tf.float32) self.global_step = tf.Variable(0, name='global_step', trainable=False) self.lidar_only_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "lidar_branch") self.lidar_only_vars.extend( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fusion/cond/cond_lidar")) self.decay_rate = tf.train.exponential_decay( self.params['lr'], self.global_step, self.params['decay_steps'], self.params['decay_rate'], self.params['staircase']) self.learning_rate_placeholder = tf.placeholder( tf.float32, [], name='learning_rate') self.opt_lidar = tf.train.AdamOptimizer( self.learning_rate_placeholder) self.train_op_lidar = self.opt_lidar.minimize(self.model_loss,\ var_list=self.lidar_only_vars,\ global_step=self.global_step) if self.params['fusion']: self.fusion_only_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, "fusion") self.fusion_only_vars.extend( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "image_branch/image_head/fpn")) self.fusion_only_vars.extend( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "lidar_branch/fpn")) self.train_op_fusion = tf.train.AdamOptimizer(1e-3).minimize(self.model_loss,\ var_list=self.fusion_only_vars,\ global_step=self.global_step) else: self.train_op_fusion = None self.train_op = tf.train.AdamOptimizer(1e-3).minimize( self.model_loss, global_step=self.global_step) self.saver = tf.train.Saver(max_to_keep=1) self.best_saver = tf.train.Saver(max_to_keep=1) self.lr_summary = tf.summary.scalar('learning_rate', tf.squeeze(self.decay_rate)) self.model_loss_batches_summary = tf.summary.scalar( 'model_loss_batches', self.model_loss) self.cls_loss_batches_summary = tf.summary.scalar( 'classification_loss_batches', self.classification_loss) self.reg_loss_batches_summary = tf.summary.scalar( 'regression_loss_batches', self.regression_loss) self.loc_reg_loss_batches_summary = tf.summary.scalar( 'loc_regression_loss_batches', self.loc_reg_loss) self.dim_reg_loss_batches_summary = tf.summary.scalar( 'dim_regression_loss_batches', self.dim_reg_loss) self.theta_reg_loss_batches_summary = tf.summary.scalar( 'theta_regression_loss_batches', self.theta_reg_loss) self.dir_reg_loss_batches_summary = tf.summary.scalar( 'dir_regression_loss_batches', self.dir_reg_loss) self.precision_summary = tf.summary.scalar('precision_batches', self.precision) self.recall_summary = tf.summary.scalar('recall_batches', self.recall) self.iou_summary = tf.summary.scalar('iou_batches', self.iou) self.iou_loc_summary = tf.summary.scalar('iou_loc_batches', self.iou_loc) self.iou_dim_summary = tf.summary.scalar('iou_dim_batches', self.iou_dim) self.theta_accuracy_summary = tf.summary.scalar( 'theta_accuracy_batches', self.theta_accuracy) self.cls_weight_summary = tf.summary.scalar( 'cls_weight_summary', self.cls_weight) self.loc_weight_summary = tf.summary.scalar( 'loc_weight_summary', self.loc_weight) self.dim_weight_summary = tf.summary.scalar( 'dim_weight_summary', self.dim_weight) self.theta_weight_summary = tf.summary.scalar( 'theta_weight_summary', self.theta_weight) self.recall_pos_summary = tf.summary.scalar( 'recall_pos_summary', self.recall_pos) self.recall_neg_summary = tf.summary.scalar( 'recall_neg_summary', self.recall_neg) # self.iou_loc_x_summary = tf.summary.scalar('iou_loc_x_summary', self.iou_loc_x) # self.iou_loc_y_summary = tf.summary.scalar('iou_loc_y_summary', self.iou_loc_y) # self.iou_loc_z_summary = tf.summary.scalar('iou_loc_z_summary', self.iou_loc_z) # self.iou_loc_weights_summary = tf.summary.scalar('iou_loc_weights_summary', self.iou_loc_weights) self.merged = tf.summary.merge([self.lr_summary, self.model_loss_batches_summary, \ self.cls_loss_batches_summary, self.reg_loss_batches_summary,\ self.loc_reg_loss_batches_summary, self.dim_reg_loss_batches_summary,\ self.theta_reg_loss_batches_summary, self.dir_reg_loss_batches_summary,\ self.precision_summary, self.recall_summary,\ self.iou_summary, self.iou_loc_summary, self.iou_dim_summary,\ self.theta_accuracy_summary,\ self.cls_weight_summary, self.loc_weight_summary, self.dim_weight_summary,self.theta_weight_summary,\ self.recall_pos_summary, self.recall_neg_summary,\ # self.iou_loc_x_summary, self.iou_loc_y_summary, self.iou_loc_z_summary, self.iou_loc_weights_summary ]) # self.merged = tf.summary.merge([self.lr_summary, self.model_loss_batches_summary, \ # self.cls_loss_batches_summary, self.reg_loss_batches_summary,\ # self.loc_reg_loss_batches_summary, self.dim_reg_loss_batches_summary,\ # self.theta_reg_loss_batches_summary, self.dir_reg_loss_batches_summary]) self.model_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.model_loss_summary = tf.summary.scalar( 'model_loss', self.model_loss_placeholder) self.cls_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.cls_loss_summary = tf.summary.scalar( 'classification_loss', self.cls_loss_placeholder) self.reg_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.reg_loss_summary = tf.summary.scalar( 'regression_loss', self.reg_loss_placeholder) self.theta_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.theta_loss_summary = tf.summary.scalar( 'theta_loss', self.theta_loss_placeholder) self.dir_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.dir_loss_summary = tf.summary.scalar( 'dir_loss', self.dir_loss_placeholder) self.loc_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.loc_loss_summary = tf.summary.scalar( 'loc_loss', self.loc_loss_placeholder) self.dim_loss_placeholder = tf.placeholder(dtype=tf.float32, shape=[]) self.dim_loss_summary = tf.summary.scalar( 'dim_loss', self.dim_loss_placeholder) self.lr_summary2 = tf.summary.scalar( 'lr_ph', self.learning_rate_placeholder) self.images_summary_placeholder = tf.placeholder( dtype=tf.float32, shape=[None, None, None, 3]) self.images_summary = tf.summary.image( 'images', self.images_summary_placeholder) self.images_summary_fusion_placeholder = tf.placeholder( dtype=tf.float32, shape=[None, None, None, 3]) self.images_summary_fusion = tf.summary.image( 'images_fusion', self.images_summary_fusion_placeholder) self.images_summary_segmentation_cars_placeholder = tf.placeholder( dtype=tf.float32, shape=[None, 24, 78, 1]) self.images_summary_segmentation_cars = tf.summary.image( 'images_segmantation_cars', self.images_summary_segmentation_cars_placeholder) self.images_summary_segmentation_road_placeholder = tf.placeholder( dtype=tf.float32, shape=[None, 24, 78, 1]) self.images_summary_segmentation_road = tf.summary.image( 'images_segmentation_road', self.images_summary_segmentation_road_placeholder) self.accuracy_image_summary_placeholder = tf.placeholder( dtype=tf.float32, shape=[]) self.accuracy_image_summary = tf.summary.scalar( 'accuracy_image', self.accuracy_image_summary_placeholder) self.model_loss_image_summary_placeholder = tf.placeholder( dtype=tf.float32, shape=[]) self.model_loss_image_summary = tf.summary.scalar( 'model_loss_image', self.model_loss_image_summary_placeholder) self.train_writer = tf.summary.FileWriter('./training_files/train', self.graph) self.validation_writer = tf.summary.FileWriter( './training_files/test')
def test(): #vis = visdom.Visdom() # print(model_path) os.environ["CUDA_VISIBLE_DEVICES"] = '1' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') fcn_model = FPN([2, 4, 23, 3], 2, back_bone="resnet") if not torch.cuda.is_available(): fcn_model.load_state_dict(torch.load(model_path, map_location='cpu')) else: fcn_model.load_state_dict(torch.load(model_path)) # print(fcn_model) # fcn_model=torch.load(model_path) fcn_model = fcn_model.to(device) fcn_model.eval() if os.path.exists(TEST_RESULT): shutil.rmtree(TEST_RESULT) os.mkdir(TEST_RESULT) if os.path.exists(submit_path): shutil.rmtree(submit_path) os.mkdir(submit_path) for index, (bag, bag1, bag_msk, not_care, name, shape) in enumerate(test_dataloader): with torch.no_grad(): bag = bag.to(device) bag1 = bag1.to(device) bag_msk = bag_msk.to(device) output = fcn_model(bag) output_np = output.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) output_np = output_np[0, 0, :, :] output1 = fcn_model(bag1) output_np1 = output1.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) output_np1 = output_np1[0, 0, :, :] output_np1 = cv2.resize(output_np1, (output_np.shape[1], output_np.shape[0])) output2 = np.zeros((2, output_np.shape[0], output_np.shape[1])) output2[0, :, :] = output_np output2[1, :, :] = output_np # output_np = np.append(output_np,output_np1,axis=0) # print(output2.shape) output_np = np.max(output2, axis=0) # output_np = np.argmin(output_np, axis=1) # output_np = np.squeeze(output_np[0, ...]) bag_msk_np = bag_msk.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) bag_msk_np = np.argmin(bag_msk_np, axis=1) bag_msk_np = np.squeeze(bag_msk_np[0, ...]) # output_np = output_np[0,0,:,:] ind = np.where(output_np > 0.5) output_np = np.zeros(output_np.shape, dtype=np.uint8) output_np[ind] = 255 output_np = cv2.resize(output_np, (shape[1], shape[0])) # img = cv2.cvtColor(output_np, cv2.COLOR_GRAY2RGB) # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, thresh = cv2.threshold(output_np, 230, 255, cv2.THRESH_BINARY_INV) contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) bboxes = [] for i, c in enumerate(contours): if i == 0: continue # 找面积最小的矩形 area = cv2.contourArea(c) # print(area) if area < 20.0: continue rect = cv2.minAreaRect(c) # 得到最小矩形的坐标 bbox = cv2.boxPoints(rect) bbox = bbox.astype('int32') bboxes.append(bbox.reshape(-1)) # print('1') bag_msk_np = 255 * bag_msk_np bag_msk_np = np.array(bag_msk_np, dtype="uint8") for bbox in bboxes: # cv2.drawContours(bag_msk_np, [bbox.reshape(4, 2)], -1, (0, 255, 0), 2) cv2.polylines(bag_msk_np, [bbox.reshape(4, 2)], 1, 120) # cv2.fillPoly(bag_msk_np, [bbox.reshape(4, 2)], 120) # print(bbox.reshape(4, 2)) seq = [] if bboxes is not None: seq.extend([ ','.join([str(int(b)) for b in box]) + '\n' for box in bboxes ]) with open( os.path.join(submit_path, 'res_' + os.path.basename(name[0]) + '.txt'), 'w') as f: f.writelines(seq)
def load_fpn(self, cfg): fpn = FPN(cfg) self.model_list.append(fpn)
def train(epo_num=50, show_vgg_params=False): #vis = visdom.Visdom() os.environ["CUDA_VISIBLE_DEVICES"] = '3' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) # res_model = resnet50(True) fcn_model = FPN([2, 4, 23, 3], 2, back_bone="resnet") if not torch.cuda.is_available(): fcn_model.load_state_dict(torch.load(model_path, map_location='cpu')) else: fcn_model.load_state_dict(torch.load(model_path)) # optimizer = optim.Adam(net.parameters(), lr=lr) fcn_model = fcn_model.to(device) criterion = nn.BCELoss().to(device) # criterion = nn.BCEWithLogitsLoss().to(device) optimizer = optim.Adam(fcn_model.parameters(), lr=1e-4) # optimizer = optim.SGD(fcn_model.parameters(), lr=1e-2, momentum=0.7) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [1,6,11], gamma=0.1, last_epoch=-1) all_train_iter_loss = [] all_test_iter_loss = [] if os.path.exists(TRAIN_RESULT): shutil.rmtree(TRAIN_RESULT) os.mkdir(TRAIN_RESULT) # start timing prev_time = datetime.now() for epo in range(epo_num): train_loss = 0 fcn_model.train() for index, (bag, bag_msk, nc) in enumerate(train_dataloader): # bag.shape is torch.Size([4, 3, 160, 160]) # bag_msk.shape is torch.Size([4, 2, 160, 160]) bag = bag.to(device) bag_msk = bag_msk.to(device) nc = nc.to(device) optimizer.zero_grad() output = fcn_model(bag) output = torch.sigmoid( output) # output.shape is torch.Size([4, 2, 160, 160]) loss = criterion(output * (1 - nc), bag_msk * (1 - nc)) loss.backward() iter_loss = loss.item() all_train_iter_loss.append(iter_loss) train_loss += iter_loss optimizer.step() output_np = output.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) output_np = np.argmin(output_np, axis=1) #print("size of output is {}".format(output_np.shape)) bag_msk_np = bag_msk.cpu().detach().numpy().copy( ) # bag_msk_np.shape = (4, 2, 160, 160) bag_msk_np = np.argmin(bag_msk_np, axis=1) if np.mod(index, 50) == 0: print('epoch {}, {}/{},train loss is {}'.format( epo, index, len(train_dataloader), iter_loss)) cv2.imwrite(TRAIN_RESULT + str(index) + "_train.jpg", 255 * np.squeeze(output_np[0, ...])) test_loss = 0 fcn_model.eval() num_test = 0 with torch.no_grad(): for index, (bag, bag_msk, nc) in enumerate(test_dataloader): bag = bag.to(device) bag_msk = bag_msk.to(device) nc = nc.to(device) optimizer.zero_grad() output = fcn_model(bag) output = torch.sigmoid( output) # output.shape is torch.Size([4, 2, 160, 160]) loss = criterion(output * (1 - nc), bag_msk * (1 - nc)) iter_loss = loss.item() test_loss += iter_loss num_test = index + 1 output_np = output.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) output_np = np.argmin(output_np, axis=1) bag_msk_np = bag_msk.cpu().detach().numpy().copy( ) # bag_msk_np.shape = (4, 2, 160, 160) bag_msk_np = np.argmin(bag_msk_np, axis=1) if np.mod(index, 10) == 0: # plt.subplot(1, 2, 1) # plt.imshow(np.squeeze(bag_msk_np[0, ...]), 'gray') # plt.subplot(1, 2, 2) # plt.imshow(np.squeeze(output_np[0, ...]), 'gray') # plt.pause(0.5) # plt.savefig("Result/"+str(index)+"_test.png") cv2.imwrite(TRAIN_RESULT + str(index) + "_test.jpg", 255 * np.squeeze(output_np[0, ...])) all_test_iter_loss.append(test_loss / num_test) cur_time = datetime.now() h, remainder = divmod((cur_time - prev_time).seconds, 3600) m, s = divmod(remainder, 60) time_str = "Time %02d:%02d:%02d" % (h, m, s) prev_time = cur_time print('epoch train loss = %f, epoch test loss = %f, %s' % (train_loss / len(train_dataloader), test_loss / len(test_dataloader), time_str)) draw_loss_plot(all_train_iter_loss, all_test_iter_loss) # if np.mod(epo+1, 10) == 0: #torch.save(fcn_model, 'checkpoints/fcn_model_{}.pt'.format(epo+1)) torch.save(fcn_model.state_dict(), 'model_fpn/fcn_{0}.model'.format(epo)) #torch.save(fcn_model, 'model/fcn_{0}.model'.format(epo+1)) print('saveing model/fcn_{0}.model'.format(epo))
def test(): #vis = visdom.Visdom() # print(model_path) os.environ["CUDA_VISIBLE_DEVICES"] = '2' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') fcn_model = FPN([2, 4, 23, 3], 2, back_bone="resnet") if not torch.cuda.is_available(): fcn_model.load_state_dict(torch.load(model_path, map_location='cpu')) else: fcn_model.load_state_dict(torch.load(model_path)) # print(fcn_model) # fcn_model=torch.load(model_path) fcn_model = fcn_model.to(device) fcn_model.eval() miou = 0 num = 0 if os.path.exists(TEST_RESULT): shutil.rmtree(TEST_RESULT) os.mkdir(TEST_RESULT) for index, (bag, bag1, bag_msk, not_care, name, shape) in enumerate(test_dataloader): with torch.no_grad(): bag = bag.to(device) bag1 = bag1.to(device) bag_msk = bag_msk.to(device) output = fcn_model(bag) output_np = output.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) output_np = output_np[0, 0, :, :] output1 = fcn_model(bag1) output_np1 = output1.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) output_np1 = output_np1[0, 0, :, :] output_np1 = cv2.resize(output_np1, (output_np.shape[1], output_np.shape[0])) output2 = np.zeros((2, output_np.shape[0], output_np.shape[1])) output2[0, :, :] = output_np output2[1, :, :] = output_np # output_np = np.append(output_np,output_np1,axis=0) # print(output2.shape) output_np = np.max(output2, axis=0) # output_np = np.argmin(output_np, axis=1) # output_np = np.squeeze(output_np[0, ...]) bag_msk_np = bag_msk.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) bag_msk_np = np.argmin(bag_msk_np, axis=1) bag_msk_np = np.squeeze(bag_msk_np[0, ...]) # output_np = output_np[0,0,:,:] ind = np.where(output_np > 0.5) output_np = np.zeros(output_np.shape) output_np[ind] = 1 # print(name) # print(type(bag_msk_np)) cv2.imwrite("test_result_fpn/" + name[0] + "_test.jpg", 255 * output_np) cv2.imwrite("test_result_fpn/" + name[0] + "_gt.jpg", 255 * bag_msk_np) not_care = not_care.cpu().detach().numpy().copy( ) # output_np.shape = (4, 2, 160, 160) # not_care = np.argmin(not_care, axis=1) # print(not_care.shape) not_care = np.squeeze(not_care[0, ...]) # not_care=np.squeeze(not_care[0, ...]) # print(not_care.shape) ind = np.where(not_care == 1) output_np[ind] = 0 inter = np.sum(np.multiply(output_np, bag_msk_np)) union = np.sum(output_np) + np.sum(bag_msk_np) - inter # print(inter) # print(union) if union == 0: continue miou += inter / union num = index miou = miou / (num + 1) print("MIOU is {}".format(miou))