def inference(self): """ inference function :return: """ if self.is_training: # list as many types of layers as possible, even if they are not used now with slim.arg_scope(self.fpn_arg_scope()): final_bbox, final_scores, final_category = self.fpn(img_batch=self.images_batch, gtboxes_batch=self.gtboxes_batch) self.losses() # ------add detect summary---------------- gtboxes_and_label = tf.reshape(self.gtboxes_batch, [-1, 5]) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=self.images_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=self.images_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) else: final_bbox, final_scores, final_category = self.fpn(img_batch=self.images_batch, gtboxes_batch=self.gtboxes_batch) return final_bbox, final_scores, final_category
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) retinanet = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) if cfgs.NET_NAME in [ 'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d' ]: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func(backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle( gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [ -1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([ img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w ]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) total_loss_dict = { 'cls_loss': tf.constant(0., tf.float32), 'reg_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope( [ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer( 0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func( get_gtboxes_and_label, inp=[ inputs_list[i][1], inputs_list[i][2], inputs_list[i][3] ], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape( gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape( gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box( image=img, offset_height=0, offset_width=0, target_height=tf.cast( img_shape[0], tf.int32), target_width=tf.cast( img_shape[1], tf.int32)) outputs = retinanet.build_whole_detection_network( input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_h[:, :-1], labels=gtboxes_and_label_h[:, -1], method=0) gtboxes_in_img_r = draw_boxes_with_categories( img_batch=img, boxes=gtboxes_and_label_r[:, :-1], labels=gtboxes_and_label_r[:, -1], method=1) tf.summary.image( 'Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image( 'Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image( 'Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[ k] += loss_dict[k] / num_gpu total_losses /= num_gpu total_loss_dict['total_losses'] += total_losses if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n( regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms( grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] if cfgs.MUTILPY_BIAS_GRADIENT is not None: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in grads: scale = 1. if '/biases:' in var.name: scale *= cfgs.MUTILPY_BIAS_GRADIENT if 'conv_new' in var.name: scale *= 3. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients( final_gvs, global_step=global_step) else: apply_gradient_op = optimizer.apply_gradients( grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( 0.9999, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = retinanet.get_restorer() saver = tf.train.Saver(max_to_keep=5) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_gpu): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***' * 20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp - 1) * num_gpu, step * num_gpu)) print("""per_cost_time:%.3fs""" % ((end - start) / num_gpu)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) if np.isnan(total_loss_dict_['total_losses']): sys.exit(0) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary( summary_str, (global_stepnp - 1) * num_gpu) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu) == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, '{}_'.format(cfgs.DATASET_NAME) + str( (global_stepnp - 1) * num_gpu) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # optimizer = tf.train.AdamOptimizer(lr) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients_YJR'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=30) # Create session #allow_soft_placement=True自动将无法放到GPU上的操作放回到CPU config = tf.ConfigProto(allow_soft_placement=True) #让GPU按需分配,不一定占用某个GPU的全部内存 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) #日志保存地址 summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) timer = Timer() for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \ fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \ sess.run( [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss]) end = time.time() print('{}: step:{}/{}iter'.format(training_time, global_stepnp, cfgs.MAX_ITERATION)) print('>>> rpn_loc_loss:{:.4f} | rpn_cla_loss:{:.4f} | rpn_total_loss:{:.4f}\n' '>>> fast_rcnn_loc_loss:{:.4f} | fast_rcnn_cla_loss:{:.4f} | fast_rcnn_total_loss:{:.4f}\n' '>>> single_time:{:.4f}s | total_loss:{:.4f} '\ .format(rpnLocLoss, rpnClsLoss,rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss,(end-start), totalLoss)) print( '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' ) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = slim.get_or_create_global_step() lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, cfgs.NUM_GPU*cfgs.BATCH_SIZE) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=True, batch_size=cfgs.BATCH_SIZE) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE * cfgs.NUM_GPU, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) # data processing inputs_list = [] for i in range(cfgs.NUM_GPU): start = i*cfgs.BATCH_SIZE end = (i+1)*cfgs.BATCH_SIZE img = img_batch[start:end, :, :, :] if cfgs.NET_NAME in ['resnet101_v1d', 'resnet50_v1d']: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label = tf.cast(tf.reshape(gtboxes_and_label_batch[start:end, :, :], [cfgs.BATCH_SIZE, -1, 5]), tf.float32) num_objects = num_objects_batch[start:end] num_objects = tf.cast(tf.reshape(num_objects, [cfgs.BATCH_SIZE, -1, ]), tf.float32) img_h = img_h_batch[start:end] img_w = img_w_batch[start:end] # img_h = tf.cast(tf.reshape(img_h, [-1, ]), tf.float32) # img_w = tf.cast(tf.reshape(img_w, [-1, ]), tf.float32) inputs_list.append([img, gtboxes_and_label, num_objects, img_h, img_w]) # put_op_list = [] # get_op_list = [] # for i in range(cfgs.NUM_GPU): # with tf.device("/GPU:%s" % i): # area = tf.contrib.staging.StagingArea( # dtypes=[tf.float32, tf.float32, tf.float32]) # put_op_list.append(area.put(inputs_list[i])) # get_op_list.append(area.get()) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) total_loss_dict = { 'rpn_cls_loss': tf.constant(0., tf.float32), 'rpn_bbox_loss': tf.constant(0., tf.float32), 'rpn_ctr_loss': tf.constant(0., tf.float32), 'total_losses': tf.constant(0., tf.float32), } with tf.variable_scope(tf.get_variable_scope()): for i in range(cfgs.NUM_GPU): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): gtboxes_and_label = tf.py_func(get_gtboxes_and_label, inp=[inputs_list[i][1], inputs_list[i][2]], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [cfgs.BATCH_SIZE, -1, 5]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] h_crop = tf.reduce_max(img_shape[0]) w_crop = tf.reduce_max(img_shape[1]) img = tf.image.crop_to_bounding_box(image=img, offset_height=0, offset_width=0, target_height=tf.cast(h_crop, tf.int32), target_width=tf.cast(w_crop, tf.int32)) outputs = faster_rcnn.build_whole_detection_network(input_img_batch=img, gtboxes_batch=gtboxes_and_label) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img[0, :, :, :], boxes=gtboxes_and_label[0, :, :-1], labels=gtboxes_and_label[0, :, -1]) gtboxes_in_img = tf.expand_dims(gtboxes_in_img, 0) tf.summary.image('Compare/gtboxes_gpu:%d' % i, gtboxes_in_img) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img[0, :, :, :], boxes=outputs[0], scores=outputs[1], labels=outputs[2]) detections_in_img = tf.expand_dims(detections_in_img, 0) tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_losses = 0.0 for k in loss_dict.keys(): total_losses += loss_dict[k] total_loss_dict[k] += loss_dict[k] / cfgs.NUM_GPU total_losses = total_losses / cfgs.NUM_GPU total_loss_dict['total_losses'] += total_losses if i == cfgs.NUM_GPU - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n(regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) tower_grads.append(grads) for k in total_loss_dict.keys(): tf.summary.scalar('{}/{}'.format(k.split('_')[0], k), total_loss_dict[k]) if len(tower_grads) > 1: grads = sum_gradients(tower_grads) else: grads = tower_grads[0] # final_gvs = [] # with tf.variable_scope('Gradient_Mult'): # for grad, var in grads: # scale = 1. # # if '/biases:' in var.name: # # scale *= 2. # if 'conv_new' in var.name: # scale *= 3. # if not np.allclose(scale, 1.0): # grad = tf.multiply(grad, scale) # final_gvs.append((grad, var)) apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) train_op = tf.group(apply_gradient_op, variables_averages_op) # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step) summary_op = tf.summary.merge_all() restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=10) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) tfconfig = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) tfconfig.gpu_options.allow_growth = True num_per_iter = cfgs.NUM_GPU * cfgs.BATCH_SIZE with tf.Session(config=tfconfig) as sess: sess.run(init_op) # sess.run(tf.initialize_all_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') for step in range(cfgs.MAX_ITERATION // num_per_iter): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, total_loss_dict_ = \ sess.run([train_op, global_step, total_loss_dict]) end = time.time() print('***'*20) print("""%s: global_step:%d current_step:%d""" % (training_time, (global_stepnp-1)*num_per_iter, step*num_per_iter)) print("""per_cost_time:%.3fs""" % ((end - start) / num_per_iter)) loss_str = '' for k in total_loss_dict_.keys(): loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k]) print(loss_str) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, (global_stepnp-1)*num_per_iter) summary_writer.flush() if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_per_iter) == 0) or (step >= cfgs.MAX_ITERATION // cfgs.NUM_GPU - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'coco_' + str((global_stepnp-1)*num_per_iter) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): faster_rcnn = build_whole_network.DetectionNetwork(base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_plac = tf.placeholder(dtype=tf.uint8, shape=[None, None, 3]) gtbox_plac = tf.placeholder(dtype=tf.int32, shape=[None, 5]) img_batch, gtboxes_and_label = preprocess_img(img_plac, gtbox_plac) # gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \ slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = 0 # tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories(img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant(global_step, boundaries=[np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1])], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients_YJR'): gradients = slim.learning.clip_gradient_norms(gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=30) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): img_id, img, gt_info = next_img(step=step) training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step], feed_dict={img_plac: img, gtbox_plac: gt_info} ) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \ fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \ sess.run( [train_op, global_step, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss], feed_dict={img_plac: img, gtbox_plac: gt_info}) end = time.time() print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, global_stepnp, str(img_id), rpnLocLoss, rpnClsLoss, rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run([train_op, global_step, summary_op], feed_dict={img_plac: img, gtbox_plac: gt_info} ) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved')
def tower_loss(scope): with tf.name_scope(scope): faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer( cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss total_loss = rpn_total_loss + fastrcnn_total_loss + weight_decay_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) return total_loss, faster_rcnn, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss,\ fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): if self.is_training: # ensure shape is [M, 5] and [M, 6] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network feature_to_cropped = self.build_base_network(input_img_batch) # 2. build rpn with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): rpn_conv3x3 = slim.conv2d(feature_to_cropped, 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3') rpn_cls_score = slim.conv2d(rpn_conv3x3, self.num_anchors_per_location * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score') rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred') rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax(rpn_cls_score, scope='rpn_cls_prob') # 3. generate_anchors featuremap_height, featuremap_width = tf.shape( feature_to_cropped)[1], tf.shape(feature_to_cropped)[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) anchors = anchor_utils.make_anchors( base_anchor_size=cfgs.BASE_ANCHOR_SIZE_LIST[0], anchor_scales=cfgs.ANCHOR_SCALES, anchor_ratios=cfgs.ANCHOR_RATIOS, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=cfgs.ANCHOR_STRIDE, name="make_anchors_forRPN") # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rois, roi_scores = postprocess_rpn_proposals( rpn_bbox_pred=rpn_box_pred, rpn_cls_prob=rpn_cls_prob, img_shape=img_shape, anchors=anchors, is_training=self.is_training) # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=rois, scores=roi_scores) tf.summary.image('all_rpn_rois', rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois, score_gre_05) score_gre_05_score = tf.gather(roi_scores, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois', score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.is_training: with tf.variable_scope('sample_anchors_minibatch'): rpn_labels, rpn_bbox_targets = \ tf.py_func( anchor_target_layer, [gtboxes_h_batch, img_shape, anchors], [tf.float32, tf.float32]) rpn_bbox_targets = tf.reshape(rpn_bbox_targets, [-1, 4]) rpn_labels = tf.to_int32(rpn_labels, name="to_int32") rpn_labels = tf.reshape(rpn_labels, [-1]) self.add_anchor_img_smry(input_img_batch, anchors, rpn_labels) # --------------------------------------add smry----------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_cls_prob, axis=1) kept_rpppn = tf.reshape(tf.where(tf.not_equal(rpn_labels, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) acc = tf.reduce_mean( tf.to_float( tf.equal(rpn_cls_category, tf.to_int64(tf.gather(rpn_labels, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy', acc) with tf.control_dependencies([rpn_labels]): with tf.variable_scope('sample_RCNN_minibatch'): rois, labels, bbox_targets_h, bbox_targets_r = \ tf.py_func(proposal_target_layer, [rois, gtboxes_h_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32, tf.float32]) rois = tf.reshape(rois, [-1, 4]) labels = tf.to_int32(labels) labels = tf.reshape(labels, [-1]) bbox_targets_h = tf.reshape(bbox_targets_h, [-1, 4 * (cfgs.CLASS_NUM + 1)]) bbox_targets_r = tf.reshape(bbox_targets_r, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois, labels) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=feature_to_cropped, rois=rois, img_shape=img_shape) # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 6. postprocess_fastrcnn if not self.is_training: final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_box_pred, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=bbox_targets_r, cls_score_r=cls_score_r, labels=labels) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict
if __name__ == "__main__": print('number samples: {0}'.format(get_num_samples(tfrecord_dir))) # create local and global variables initializer group # image, filename, gtboxes_and_label, num_objects = reader_tfrecord(record_file=tfrecord_dir, # shortside_len=IMG_SHORT_SIDE_LEN, # is_training=True) filename_batch, image_batch, gtboxes_and_label_batch, num_objects_batch = dataset_tfrecord( record_file=tfrecord_dir, shortside_len=IMG_SHORT_SIDE_LEN, length_limitation=IMG_MAX_LENGTH, is_training=True) gtboxes_and_label_tensor = tf.reshape(gtboxes_and_label_batch, [-1, 5]) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=image_batch, boxes=gtboxes_and_label_tensor[:, :-1], labels=gtboxes_and_label_tensor[:, -1]) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: sess.run(init_op) # create Coordinator to manage the life period of multiple thread coord = tf.train.Coordinator() # Starts all queue runners collected in the graph to execute input queue operation # the step contain two operation:filename to filename queue and sample to sample queue threads = tf.train.start_queue_runners(coord=coord) try: if not coord.should_stop(): filename, image, gtboxes_and_label, gtbox_img = sess.run([ filename_batch, image_batch, gtboxes_and_label_batch, gtboxes_in_img
def train(): with tf.Graph().as_default(): with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.SHORT_SIDE_LEN, is_training=True) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color(img_batch, tf.reshape(gtboxes_and_label_batch, [-1, 5])[:, :-1], text=tf.shape(gtboxes_and_label_batch)[1]) # *********************************************************************************************** # * share net * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * rpn * # *********************************************************************************************** rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), is_training=True, share_head=cfgs.SHARE_HEAD, share_net=share_net, stride=cfgs.STRIDE, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, scale_factors=cfgs.SCALE_FACTORS, base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, top_k_nms=cfgs.RPN_TOP_K_NMS, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, # iou>=0.7 is positive box, iou< 0.3 is negative rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=False, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] rpn_location_loss, rpn_classification_loss = rpn.rpn_losses() rpn_total_loss = rpn_classification_loss + rpn_location_loss with tf.name_scope('draw_proposals'): # score > 0.5 is object rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, 0.5)), [-1]) rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices) rpn_proposals_objcet_boxes_in_img = draw_box_with_color(img_batch, rpn_object_boxes, text=tf.shape(rpn_object_boxes)[0]) rpn_proposals_boxes_in_img = draw_box_with_color(img_batch, rpn_proposals_boxes, text=tf.shape(rpn_proposals_boxes)[0]) # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=img_batch, feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, # iou>0.5 is positive, iou<0.5 is negative fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, use_dropout=False, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=True, level=cfgs.LEVEL) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ fast_rcnn.fast_rcnn_predict() fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss() fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss with tf.name_scope('draw_boxes_with_categories'): fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=img_batch, boxes=fast_rcnn_decode_boxes, labels=detection_category, scores=fast_rcnn_score) # train added_loss = rpn_total_loss + fast_rcnn_total_loss total_loss = tf.losses.get_total_loss() global_step = tf.train.get_or_create_global_step() lr = tf.train.piecewise_constant(global_step, boundaries=[np.int64(20000), np.int64(40000)], values=[cfgs.LR, cfgs.LR / 10, cfgs.LR / 100]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) train_op = slim.learning.create_train_op(total_loss, optimizer, global_step) # rpn_total_loss, # train_op = optimizer.minimize(second_classification_loss, global_step) # *********************************************************************************************** # * Summary * # *********************************************************************************************** # ground truth and predict tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/faster_rcnn_predict', fast_rcnn_predict_boxes_in_imgs) # rpn loss and image tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss) tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss) tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss) tf.summary.scalar('loss/added_loss', added_loss) tf.summary.scalar('loss/total_loss', total_loss) tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img) tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img) # learning_rate tf.summary.scalar('learning_rate', lr) summary_op = tf.summary.merge_all() init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = restore_model.get_restorer(test=False) saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.ROOT_PATH + 'output/{}'.format(cfgs.DATASET_NAME), FLAGS.summary_path, cfgs.VERSION) mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) start = time.time() _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_total_loss, _added_loss, _total_loss, _ = \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss, rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss, fast_rcnn_total_loss, added_loss, total_loss, train_op]) end = time.time() if step % 50 == 0: print("""{}: step{} image_name:{} rpn_loc_loss:{:.4f} | rpn_cla_loss:{:.4f} | rpn_total_loss:{:.4f} fast_rcnn_loc_loss:{:.4f} | fast_rcnn_cla_loss:{:.4f} | fast_rcnn_total_loss:{:.4f} added_loss:{:.4f} | total_loss:{:.4f} | pre_cost_time:{:.4f}s""" .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _added_loss, _total_loss, (end - start))) if step % 500 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, _global_step) summary_writer.flush() if (step > 15000 and step % 1000 == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.ROOT_PATH + 'output/{}'.format(cfgs.DATASET_NAME), FLAGS.trained_checkpoint, cfgs.VERSION) mkdir(save_dir) save_ckpt = os.path.join(save_dir, '{}_'.format( cfgs.DATASET_NAME)+str(_global_step)+'model.ckpt') saver.save(sess, save_ckpt) print('Weights have been saved to {}.'.format(save_ckpt)) print('Training done.') coord.request_stop() coord.join(threads)
def build_head_train_sample(self): """ when training, we should know each reference box's label and gtbox, in second stage iou >= 0.5 is object iou < 0.5 is background this function need batch_slice :return: minibatch_reference_proboxes: (batch_szie, config.HEAD_MINIBATCH_SIZE, 4)[y1, x1, y2, x2] minibatch_encode_gtboxes:(batch_szie, config.HEAD_MINIBATCH_SIZE, 4)[dy, dx, log(dh), log(dw)] object_mask:(batch_szie, config.HEAD_MINIBATCH_SIZE) 1 indicate is object, 0 indicate is not objects label: (batch_szie, config.HEAD_MINIBATCH_SIZE) # config.HEAD_MINIBATCH_SIZE 表示 classes_id """ with tf.name_scope('build_head_train_sample'): def batch_slice_build_sample(gtboxes_and_label, rpn_proposals_boxes): with tf.name_scope('select_pos_neg_samples'): gtboxes = tf.cast( tf.reshape(gtboxes_and_label[:, :-1], [-1, 4]), tf.float32) gt_class_ids = tf.cast( tf.reshape(gtboxes_and_label[:, -1], [ -1, ]), tf.int32) gtboxes, non_zeros = boxes_utils.trim_zeros_graph( gtboxes, name="trim_gt_box") # [M, 4] gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros) rpn_proposals_boxes, _ = boxes_utils.trim_zeros_graph( rpn_proposals_boxes, name="trim_rpn_proposal_train") ious = iou.iou_calculate(rpn_proposals_boxes, gtboxes) # [N, M] matchs = tf.cast(tf.argmax(ious, axis=1), tf.int32) # [N, ] max_iou_each_row = tf.reduce_max(ious, axis=1) positives = tf.cast( tf.greater_equal( max_iou_each_row, cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD), tf.int32) reference_boxes_mattached_gtboxes = tf.gather( gtboxes, matchs) # [N, 4] gt_class_ids = tf.gather(gt_class_ids, matchs) # [N, ] object_mask = tf.cast(positives, tf.float32) # [N, ] # when box is background, not caculate gradient, so give a weight 0 to avoid caculate gradient gt_class_ids = gt_class_ids * positives with tf.name_scope('head_train_minibatch'): # choose the positive indices positive_indices = tf.reshape( tf.where(tf.equal(object_mask, 1.)), [-1]) num_of_positives = tf.minimum( tf.shape(positive_indices)[0], tf.cast( cfgs.FAST_RCNN_MINIBATCH_SIZE * cfgs.FAST_RCNN_POSITIVE_RATE, tf.int32)) positive_indices = tf.random_shuffle(positive_indices) positive_indices = tf.slice(positive_indices, begin=[0], size=[num_of_positives]) # choose the negative indices, # Strictly propose the proportion of positive and negative is 1:3 negative_indices = tf.reshape( tf.where(tf.equal(object_mask, 0.)), [-1]) num_of_negatives = tf.cast(int(1. / cfgs.FAST_RCNN_POSITIVE_RATE) * num_of_positives, tf.int32)\ - num_of_positives num_of_negatives = tf.minimum( tf.shape(negative_indices)[0], num_of_negatives) negative_indices = tf.random_shuffle(negative_indices) negative_indices = tf.slice(negative_indices, begin=[0], size=[num_of_negatives]) minibatch_indices = tf.concat( [positive_indices, negative_indices], axis=0) minibatch_reference_gtboxes = tf.gather( reference_boxes_mattached_gtboxes, minibatch_indices) minibatch_reference_proboxes = tf.gather( rpn_proposals_boxes, minibatch_indices) # encode gtboxes minibatch_encode_gtboxes = \ encode_and_decode.encode_boxes( unencode_boxes=minibatch_reference_gtboxes, reference_boxes=minibatch_reference_proboxes, scale_factors=cfgs.BBOX_STD_DEV) object_mask = tf.gather(object_mask, minibatch_indices) gt_class_ids = tf.gather(gt_class_ids, minibatch_indices) # padding if necessary gap = tf.cast(cfgs.FAST_RCNN_MINIBATCH_SIZE - (num_of_positives + num_of_negatives), dtype=tf.int32) bbox_padding = tf.zeros((gap, 4)) minibatch_reference_proboxes = tf.concat( [minibatch_reference_proboxes, bbox_padding], axis=0) minibatch_encode_gtboxes = tf.concat( [minibatch_encode_gtboxes, bbox_padding], axis=0) object_mask = tf.pad(object_mask, [(0, gap)]) gt_class_ids = tf.pad(gt_class_ids, [(0, gap)]) return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids = \ boxes_utils.batch_slice([self.gtboxes_and_label, self.rpn_proposals_boxes], lambda x, y: batch_slice_build_sample(x, y), cfgs.BATCH_SIZE) if cfgs.DEBUG: gt_vision = draw_boxes_with_categories( self.origin_image[0], self.gtboxes_and_label[0, :, :4], self.gtboxes_and_label[0, :, 4], cfgs.LABEL_TO_NAME) tf.summary.image("gt_vision", gt_vision) draw_bbox_train = draw_boxes_with_categories( self.origin_image[0], minibatch_reference_proboxes[0], gt_class_ids[0], cfgs.LABEL_TO_NAME) tf.summary.image("train_proposal", draw_bbox_train) return minibatch_reference_proboxes, minibatch_encode_gtboxes, object_mask, gt_class_ids
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): if self.is_training: # ensure shape is [M, 5] and [M, 6] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network C2_, C4 = self.build_base_network(input_img_batch) C2 = slim.conv2d(C2_, num_outputs=1024, kernel_size=[1, 1], stride=1, scope='build_C2_to_1024') self.feature_pyramid = {'C2': C2, 'C4': C4} # 2. build rpn rpn_all_encode_boxes = {} rpn_all_boxes_scores = {} rpn_all_cls_score = {} anchors = {} with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): i = 0 for level in self.level: rpn_conv3x3 = slim.conv2d( self.feature_pyramid[level], 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3_{}'.format(level)) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location[i] * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score_{}'.format(level)) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location[i] * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred_{}'.format(level)) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax( rpn_cls_score, scope='rpn_cls_prob_{}'.format(level)) # do the softmax rpn_all_cls_score[level] = rpn_cls_score rpn_all_boxes_scores[level] = rpn_cls_prob # do the softmax rpn_all_encode_boxes[level] = rpn_box_pred i += 1 # 3. generate_anchors i = 0 for level, base_anchor_size, stride in zip(self.level, self.base_anchor_size_list, self.stride): featuremap_height, featuremap_width = tf.shape( self.feature_pyramid[level])[1], tf.shape( self.feature_pyramid[level])[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32) #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32) anchor_scale = self.anchor_scales[i] anchor_ratio = self.anchor_ratios[i] tmp_anchors = anchor_utils.make_anchors( base_anchor_size=base_anchor_size, anchor_scales=anchor_scale, anchor_ratios=anchor_ratio, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=stride, name="make_anchors_forRPN_{}".format(level)) tmp_anchors = tf.reshape(tmp_anchors, [-1, 4]) anchors[level] = tmp_anchors i += 1 # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS rois = {} roi_scores = {} with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) for level in self.level: rois_rpn, roi_scores_rpn = postprocess_rpn_proposals( rpn_bbox_pred=rpn_all_encode_boxes[level], rpn_cls_prob=rpn_all_boxes_scores[level], img_shape=img_shape, anchors=anchors[level], is_training=self.is_training) # rois[level] = rois # roi_scores[level] = roi_scores # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ rois[level] = rois_rpn roi_scores[level] = roi_scores_rpn if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=rois_rpn, scores=roi_scores_rpn) tf.summary.image('all_rpn_rois_{}'.format(level), rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois_rpn, score_gre_05) score_gre_05_score = tf.gather(roi_scores_rpn, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois_{}'.format(level), score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rpn_labels = {} rpn_bbox_targets = {} labels_all = [] labels = {} bbox_targets_h = {} bbox_targets_r = {} bbox_targets_all_h = [] bbox_targets_all_r = [] if self.is_training: for level in self.level: with tf.variable_scope( 'sample_anchors_minibatch_{}'.format(level)): rpn_labels_one, rpn_bbox_targets_one = \ tf.py_func( anchor_target_layer, [gtboxes_h_batch, img_shape, anchors[level]], [tf.float32, tf.float32]) rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one, [-1, 4]) rpn_labels_one = tf.to_int32( rpn_labels_one, name="to_int32_{}".format(level)) rpn_labels_one = tf.reshape(rpn_labels_one, [-1]) self.add_anchor_img_smry(input_img_batch, anchors[level], rpn_labels_one) # -----------------------------add to the dict------------------------------------------------------------- rpn_labels[level] = rpn_labels_one rpn_bbox_targets[level] = rpn_bbox_targets_one # --------------------------------------add smry----------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level], axis=1) kept_rpppn = tf.reshape( tf.where(tf.not_equal(rpn_labels_one, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) # 预测 acc = tf.reduce_mean( tf.to_float( tf.equal( rpn_cls_category, tf.to_int64(tf.gather(rpn_labels_one, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc) with tf.control_dependencies([rpn_labels[level]]): with tf.variable_scope( 'sample_RCNN_minibatch_{}'.format(level)): rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \ tf.py_func(proposal_target_layer, [rois[level], gtboxes_h_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32, tf.float32]) rois_fast = tf.reshape(rois_, [-1, 4]) labels_fast = tf.to_int32(labels_) labels_fast = tf.reshape(labels_fast, [-1]) bbox_targets_h_fast = tf.reshape( bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)]) bbox_targets_r_fast = tf.reshape( bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois_fast, labels_fast) #----------------------new_add---------------------- rois[level] = rois_fast labels[level] = labels_fast bbox_targets_h[level] = bbox_targets_h_fast bbox_targets_r[level] = bbox_targets_r_fast labels_all.append(labels_fast) bbox_targets_all_h.append(bbox_targets_h_fast) bbox_targets_all_r.append(bbox_targets_r_fast) fast_labels = tf.concat(labels_all, axis=0) fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0) fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=self.feature_pyramid, rois_all=rois, img_shape=img_shape) # 这里的feature_to_cropped是feature maps 特征图 # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') # 根据代码可看到水平和旋转的处理过程是分开的 cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(fast_labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(fast_labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 6. postprocess_fastrcnn if not self.is_training: rois_all = [] for level in self.level: rois_all.append(rois[level]) rois = tf.concat(rois_all, axis=0) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_all_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=fast_bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=fast_bbox_targets_r, cls_score_r=cls_score_r, labels=fast_labels) rois_all = [] for level in self.level: rois_all.append(rois[level]) rois = tf.concat(rois_all, axis=0) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict
def train(): retinanet = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img_batch = img_batch / tf.constant([cfgs.PIXEL_STD]) final_bbox, final_scores, final_category, loss_dict = retinanet.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) cls_loss = loss_dict['cls_loss'] reg_loss = loss_dict['reg_loss'] total_loss = cls_loss + reg_loss + weight_decay_loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RETINANET_LOSS/cls_loss', cls_loss) tf.summary.scalar('RETINANET_LOSS/reg_loss', reg_loss) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients gradients = retinanet.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = retinanet.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = retinanet.get_restorer() saver = tf.train.Saver(max_to_keep=30) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) if not os.path.exists(summary_path): os.makedirs(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, img_name, reg_loss_, cls_loss_, total_loss_ = \ sess.run( [train_op, global_step, img_name_batch, reg_loss, cls_loss, total_loss]) end = time.time() print(""" {}: step{} image_name:{} |\t reg_loss:{} |\t cls_loss:{} |\t total_loss:{} |per_cost_time:{}s""" \ .format(training_time, global_stepnp, str(img_name[0]), reg_loss_, cls_loss_, total_loss_, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) save_ckpt = os.path.join( save_dir, '{}_'.format(cfgs.DATASET_NAME) + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): with tf.Graph().as_default(): tf.set_random_seed(1234) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.SHORT_SIDE_LEN, is_training=True, is_val=False) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color( img_batch, tf.reshape(gtboxes_and_label_batch, [-1, 5])[:, :-1], text=tf.shape(gtboxes_and_label_batch)[1]) # ********************************************************************* # * share net * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * rpn * # *********************************************************************************************** rpn = build_rpn.RPN( net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), is_training=True, share_head=cfgs.SHARE_HEAD, share_net=share_net, stride=cfgs.STRIDE, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, scale_factors=cfgs.SCALE_FACTORS, base_anchor_size_list=cfgs. BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, top_k_nms=cfgs.RPN_TOP_K_NMS, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs. RPN_IOU_NEGATIVE_THRESHOLD, # iou>=0.7 is positive box, iou< 0.3 is negative rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=False, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals( ) # rpn_score shape: [300, ] rpn_location_loss, rpn_classification_loss = rpn.rpn_losses() rpn_total_loss = rpn_classification_loss + rpn_location_loss with tf.name_scope('draw_proposals'): # score > 0.5 is object rpn_object_boxes_indices = tf.reshape( tf.where(tf.greater(rpn_proposals_scores, 0.5)), [-1]) rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices) rpn_proposals_objcet_boxes_in_img = draw_box_with_color( img_batch, rpn_object_boxes, text=tf.shape(rpn_object_boxes)[0]) rpn_proposals_boxes_in_img = draw_box_with_color( img_batch, rpn_proposals_boxes, text=tf.shape(rpn_proposals_boxes)[0]) # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN( img_batch=img_batch, feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs. FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=cfgs. FINAL_SCORE_THRESHOLD, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs. FAST_RCNN_IOU_POSITIVE_THRESHOLD, # iou>0.5 is positive, iou<0.5 is negative use_dropout=False, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=True, level=cfgs.LEVEL) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ fast_rcnn.fast_rcnn_predict() fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss( ) fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss with tf.name_scope('draw_boxes_with_categories'): fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories( img_batch=img_batch, boxes=fast_rcnn_decode_boxes, labels=detection_category, scores=fast_rcnn_score) # train total_loss = slim.losses.get_total_loss() global_step = slim.get_or_create_global_step() #返回并创建全局步长张量 # # lr = tf.train.piecewise_constant(global_step, # boundaries=[np.int64(10000), np.int64(20000)], # values=[cfgs.LR, cfgs.LR / 10, cfgs.LR / 100]) lr = tf.train.exponential_decay(cfgs.LR, global_step, decay_steps=5000, decay_rate=1 / 2., staircase=True) # lr = tf.train.piecewise_constant(global_step, # boundaries=[np.int64(30000), np.int64(40000)], # values=[lr, cfgs.LR/100, cfgs.LR/1000]) tf.summary.scalar('learning_rate', lr) # optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) optimizer = tf.train.AdamOptimizer(lr, beta1=cfgs.MOMENTUM, beta2=0.999, epsilon=1e-8, use_locking=False, name='Adam') # optimizer = tf.train.RMSPropOptimizer(lr, decay=0.9, epsilon=1e-6, name='RMSProp') #创建一个计算梯度并返回损失的Operation train_op = slim.learning.create_train_op( total_loss, optimizer, global_step) # rpn_total_loss, # train_op = optimizer.minimize(second_classification_loss, global_step) # *********************************************************************************************** # * Summary * # *********************************************************************************************** # ground truth and predict tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/faster_rcnn_predict', fast_rcnn_predict_boxes_in_imgs) # rpn loss and image tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss) tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss) tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss) tf.summary.scalar('loss/total_loss', total_loss) # # # tf.summary.image('C2', _concact_features(share_net['resnet_v1_50/block1/unit_2/bottleneck_v1'][:, :, :, 0:16]), 1) # tf.summary.image('C3', _concact_features(share_net['resnet_v1_50/block2/unit_3/bottleneck_v1'][:, :, :, 0:16]), 1) # tf.summary.image('C4', _concact_features(share_net['resnet_v1_50/block3/unit_5/bottleneck_v1'][:, :, :, 0:16]), 1) # tf.summary.image('C5', _concact_features(share_net['resnet_v1_50/block4'][:, :, :, 0:16]), 1) # tf.summary.image('P2', _concact_features(rpn.feature_pyramid['P2'][:, :, :, 0:16]),1) # tf.summary.image('P3', _concact_features(rpn.feature_pyramid['P3'][:, :, :, 0:16]),1) # tf.summary.image('P4', _concact_features(rpn.feature_pyramid['P4'][:, :, :, 0:16]),1) # tf.summary.image('P5', _concact_features(rpn.feature_pyramid['P5'][:, :, :, 0:16]), 1) # tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img) # tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img) # learning_rate # tf.summary.scalar('learning_rate', lr) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = restore_model.get_restorer() saver = tf.train.Saver(max_to_keep=16) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: if cfgs.NET_NAME == 'pvanet': sess.run(init_op) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) start = 0 if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') start = int("".join( list(restore_ckpt.split('/')[-1])[4:8])) + 1 else: # read_npy.load_initial_weights(sess) read_npy.load_ckpt_weights(sess) else: sess.run(init_op) # print(sess.run('resnet_v1_50/block4/unit_3/bottleneck_v1/conv3/BatchNorm/moving_variance')) # print(sess.run('vgg_16/block4/unit_3/bottleneck_v1/conv3/BatchNorm/moving_variance')) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) start = 0 if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') # start = int("".join(list(restore_ckpt.split('/')[-1])[4:8]))+1 summary_path = os.path.join(FLAGS.summary_path, cfgs.VERSION) mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) df = pd.DataFrame( [], columns=['Recall', 'Precision', 'mAP', 'F1_score'], index=[]) for step in range(0, cfgs.MAX_ITERATION): # print(img_name_batch.eval()) training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) start = time.time() _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_total_loss, _total_loss, _ = \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss, rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss, fast_rcnn_total_loss, total_loss, train_op]) end = time.time() # if step == 100: # save_dir = os.path.join(FLAGS.trained_checkpoint, cfgs.VERSION) # mkdir(save_dir) # # save_ckpt = os.path.join(save_dir, 'voc_' + str(_global_step) + 'model.ckpt') # saver.save(sess, save_ckpt) # print(' weights had been saved') # if step == 500: # save_dir = os.path.join(FLAGS.trained_checkpoint, cfgs.VERSION) # mkdir(save_dir) # # save_ckpt = os.path.join(save_dir, 'voc_' + str(_global_step) + 'model.ckpt') # saver.save(sess, save_ckpt) # print(' weights had been saved') if step % 50 == 0: print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t pre_cost_time:{}s""" \ .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss, (end - start))) # print(""" {}: step{} image_name:{} |\t # rpn_loc_loss:{} |\t # fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | # total_loss:{} |\t pre_cost_time:{}s""" \ # .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, # _fast_rcnn_location_loss, # _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss, # (end - start))) if step % 250 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, _global_step) summary_writer.flush() if (step > 0 and step % 2000 == 0) or ( step > 0 and (step == 1000)) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(FLAGS.trained_checkpoint, cfgs.VERSION) mkdir(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(_global_step) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') #保存验证集信息 if (step > 0 and step % 2000 == 0) or (step == cfgs.MAX_ITERATION - 1): save_excel = os.path.abspath( '../' ) + r'/Loss/' + cfgs.NET_NAME + r'_' + cfgs.VERSION mkdir(save_excel) new_index = np.append(df.index, [str(step)]) df2 = pd.DataFrame( [valval.val(is_val=True)], columns=['Recall', 'Precision', 'mAP', 'F1_score']) df = df.append(df2) df.index = new_index df.to_excel(save_excel + r'/validation.xls') print('validation result had been saved') coord.request_stop() coord.join(threads)
def train(): # Step 1: # clw note:传递网络名称如resnet_v1,是否训练is_training,以及每个位置含有anchor box的个数, # 构建基本的网络 faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope( 'get_batch' ): # clw note:tf.name_scope 主要结合 tf.Variable() 来使用,方便参数命名管理。 # clw note:从文件队列、内存队列中读取、组合,得到该batch的内容 # 主要包括每个批次(目前仅支持批次数目即batch_size=1,也就是这里每次只读出1张图片) # 对应的变量包括:图片名称、图片矩阵、ground truth坐标及对应的label,图片中包含的目标数 # 这些变量的组成结构均为 [批次数目,相应批次中每一幅图片的相关信息] img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) # clw note:样本个数m不知道,但是对单个样本都有gtboxes的4个坐标,加上1个label共5个值;使用-1来自动计算样本个数 biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # Step 2: # clw note:Faster R-CNN网络的搭建! # 先看一下下面这个函数arg_scope的声明 # @tf_contextlib.contextmanager # def arg_scope(list_ops_or_scope, **kwargs): 功能是给list_ops中的内容设置默认值,即list中所有元素都用**kargs的参数设置。 # 有函数修饰符@tf_contextlib.contextmanager修饰arg_scope函数:@之后一般接一个可调用对象为其执行一系列辅助操作, # 我们来看一个demo: ######################################### # import time # def my_time(func): # print(time.ctime()) # return func() # # @my_time # 从这里可以看出@time 等价于 time(xxx()),但是这种写法你得考虑python代码的执行顺序 # def xxx(): # print('Hello world!') # # 运行结果: # Wed Jul 26 23:01:21 2017 # Hello world! ########################################## # 在这个例子中,xxx函数实现我们的主要功能,打印Hello world,但我们想给xxx函数添加一些辅助操作,让它同时打印出时间,于是我们用 # 函数修饰符 @ my_time完成这个目标。整个例子的执行流程为调用my_time可调用对象,它接受xxx函数作为参数,先打印时间,再执行xxx函数 # 详见:https://www.cnblogs.com/zzy-tf/p/9356883.html # 来看另一个demo: ########################################## # with slim.arg_scope( # [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],stride = 1, padding = 'VALID'): # net = slim.conv2d(inputs, 32, [3, 3], stride = 2, scope = 'Conv2d_1a_3x3') # net = slim.conv2d(net, 32, [3, 3], scope = 'Conv2d_2a_3x3') # net = slim.conv2d(net, 64, [3, 3], padding = 'SAME', scope = 'Conv2d_2b_3x3') # 所以,在使用过程中可以直接slim.conv2d( )等函数设置默认参数。例如在下面的代码中,不做单独声明的情况下, # slim.conv2d, slim.max_pool2d, slim.avg_pool2d三个函数默认的步长都设为1,padding模式都是'VALID'的。 # 当然也可以在调用时进行单独声明,只不过一个一个写很麻烦,不如统一给个默认值。 # 这种参数设置方式在构建网络模型时,尤其是较深的网络时,可以节省时间。 with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer( 0.0)): # list as many types of layers as possible, # even if they are not used now # build_whole_detection_network功能:构建整体网络架构,包含backbone,RPN网络,Pooling层,以及后续网络。 # return:网络的最后的预测框,预测的类别信息,预测的概率,以及整体网络和RPN网络的损失,所有的损失被写入到一个字典中。 final_bbox, final_scores, final_category, loss_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) # weight_decay_loss = tf.add_n(tf.losses.get_regularization_losses()) rpn_location_loss = loss_dict['rpn_loc_loss'] rpn_cls_loss = loss_dict['rpn_cls_loss'] rpn_total_loss = rpn_location_loss + rpn_cls_loss fastrcnn_cls_loss = loss_dict['fastrcnn_cls_loss'] fastrcnn_loc_loss = loss_dict['fastrcnn_loc_loss'] fastrcnn_total_loss = fastrcnn_cls_loss + fastrcnn_loc_loss # clw note:根据论文的公式,最后将RPN网络的(分类,回归)误差与Fast-RCNN的(分类,回归)误差相加后作为总的误差进行训练即可。 total_loss = rpn_total_loss + fastrcnn_total_loss # ____________________________________________________________________________________________________build loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('RPN_LOSS/cls_loss', rpn_cls_loss) tf.summary.scalar('RPN_LOSS/location_loss', rpn_location_loss) tf.summary.scalar('RPN_LOSS/rpn_total_loss', rpn_total_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_cls_loss', fastrcnn_cls_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_location_loss', fastrcnn_loc_loss) tf.summary.scalar('FAST_LOSS/fastrcnn_total_loss', fastrcnn_total_loss) tf.summary.scalar('LOSS/total_loss', total_loss) # tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = show_box_in_tensor.draw_boxes_with_categories_and_scores( img_batch=img_batch, boxes=final_bbox, labels=final_category, scores=final_scores) tf.summary.image('Compare/final_detection', detections_in_img) tf.summary.image('Compare/gtboxes', gtboxes_in_img) # ___________________________________________________________________________________________________add summary global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer( lr, momentum=cfgs.MOMENTUM) # clw note:选择优化器,可以尝试其他选择, # 也可以尝试tf.train.AdamOptimizer(1e-4).minimize(total_loss) # ---------------------------------------------------------------------------------------------compute gradients # clw note:对于上面优化器没有使用minimize()的几点说明, # 使用minimize()操作,该操作不仅可以计算出梯度,而且还可以将梯度作用在变量上。 # 如果想按照自己的方式处理梯度,可以按照以下步骤: # 1、使用compute_gradients()计算梯度,其实下面的get_gradients()方法就是optimizer.compute_gradients(loss) # 2、使用自己的方式进一步处理梯度 # 3、使用apply_gradients()应用处理过后的梯度; gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: # clw note:clip_by_norm是指对梯度进行裁剪,通过控制梯度的最大范式,防止梯度爆炸的问题,是一种比较常用的梯度规约的方式 with tf.name_scope('clip_gradients_YJR'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # _____________________________________________________________________________________________compute gradients # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=30) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) # tools.mkdir(summary_path) if not os.path.exists(summary_path): os.makedirs(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, img_name, rpnLocLoss, rpnClsLoss, rpnTotalLoss, \ fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss = \ sess.run( [train_op, global_step, img_name_batch, rpn_location_loss, rpn_cls_loss, rpn_total_loss, fastrcnn_loc_loss, fastrcnn_cls_loss, fastrcnn_total_loss, total_loss]) end = time.time() print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, global_stepnp, str(img_name[0]), rpnLocLoss, rpnClsLoss, rpnTotalLoss, fastrcnnLocLoss, fastrcnnClsLoss, fastrcnnTotalLoss, totalLoss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)
def train(): faster_rcnn = build_whole_network.DetectionNetwork( base_network_name=cfgs.NET_NAME, is_training=True) with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.IMG_SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.reshape(gtboxes_and_label_batch, [-1, 5]) biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) # list as many types of layers as possible, even if they are not used now with slim.arg_scope([ slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected ], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): # result_dict, losses_dict = faster_rcnn.build_whole_detection_network(input_img_batch=img_batch, # gtboxes_batch=gtboxes_and_label) result_dict, losses_dict = faster_rcnn.build_whole_detection_network( input_img_batch=img_batch, gtboxes_batch=gtboxes_and_label) # ----------------------------------------------------------------------------------------------------build loss weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) # weight_decay_loss = tf.add_n(tf.losses.get_regularization_losses()) bbox_loss_m1 = losses_dict['bbox_loss_m1'] cls_loss_m1 = losses_dict['cls_loss_m1'] total_loss_m1 = bbox_loss_m1 + cls_loss_m1 bbox_loss_m2 = losses_dict['bbox_loss_m2'] cls_loss_m2 = losses_dict['cls_loss_m2'] total_loss_m2 = bbox_loss_m2 + cls_loss_m2 bbox_loss_m3 = losses_dict['bbox_loss_m3'] cls_loss_m3 = losses_dict['cls_loss_m3'] total_loss_m3 = bbox_loss_m3 + cls_loss_m3 total_loss = total_loss_m1 + total_loss_m2 + total_loss_m3 + weight_decay_loss # ---------------------------------------------------------------------------------------------------add summary tf.summary.scalar('SSH_M1_LOSS/cls_loss_m1', cls_loss_m1) tf.summary.scalar('SSH_M1_LOSS/bbox_loss_m1', bbox_loss_m1) tf.summary.scalar('SSH_M1_LOSS/total_loss_m1', total_loss_m1) tf.summary.scalar('SSH_M2_LOSS/cls_loss_m2', cls_loss_m2) tf.summary.scalar('SSH_M2_LOSS/bbox_loss_m2', bbox_loss_m2) tf.summary.scalar('SSH_M2_LOSS/total_loss_m2', total_loss_m2) tf.summary.scalar('SSH_M3_LOSS/cls_loss_m3', cls_loss_m3) tf.summary.scalar('SSH_M3_LOSS/bbox_loss_m3', bbox_loss_m3) tf.summary.scalar('SSH_M3_LOSS/total_loss_m3', total_loss_m3) tf.summary.scalar('LOSS/total_loss', total_loss) tf.summary.scalar('LOSS/regular_weights', weight_decay_loss) gtboxes_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=img_batch, boxes=gtboxes_and_label[:, :-1], labels=gtboxes_and_label[:, -1]) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img_m1 = \ show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch, boxes=result_dict['final_bbox_m1'], labels=result_dict['final_category_m1'], scores=result_dict['final_scores_m1']) tf.summary.image('Compare/final_detection_m1', detections_in_img_m1) detections_in_img_m2 = \ show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch, boxes=result_dict['final_bbox_m2'], labels=result_dict['final_category_m2'], scores=result_dict['final_scores_m2']) tf.summary.image('Compare/final_detection_m2', detections_in_img_m2) detections_in_img_m3 = \ show_box_in_tensor.draw_boxes_with_categories_and_scores(img_batch=img_batch, boxes=result_dict['final_bbox_m3'], labels=result_dict['final_category_m3'], scores=result_dict['final_scores_m3']) tf.summary.image('Compare/final_detection_m3', detections_in_img_m3) tf.summary.image('Compare/gtboxes', gtboxes_in_img) global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant( global_step, boundaries=[ np.int64(cfgs.DECAY_STEP[0]), np.int64(cfgs.DECAY_STEP[1]) ], values=[cfgs.LR, cfgs.LR / 10., cfgs.LR / 100.]) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) # ---------------------------------------------------------------------------------------------compute gradients gradients = faster_rcnn.get_gradients(optimizer, total_loss) # enlarge_gradients for bias if cfgs.MUTILPY_BIAS_GRADIENT: gradients = faster_rcnn.enlarge_gradients_for_bias(gradients) if cfgs.GRADIENT_CLIPPING_BY_NORM: with tf.name_scope('clip_gradients'): gradients = slim.learning.clip_gradient_norms( gradients, cfgs.GRADIENT_CLIPPING_BY_NORM) # train_op train_op = optimizer.apply_gradients(grads_and_vars=gradients, global_step=global_step) summary_op = tf.summary.merge_all() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) restorer, restore_ckpt = faster_rcnn.get_restorer() saver = tf.train.Saver(max_to_keep=30) config = tf.ConfigProto() # config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) if not os.path.exists(summary_path): os.makedirs(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # start = time.time() # _, global_stepnp, img_name, totalLoss, summary_str = \ # sess.run( # [train_op, global_step, img_name_batch, total_loss, summary_op]) # # end = time.time() # # print(""" {}: step{} image_name:{} |\t total_loss:{} |\t per_cost_time:{}s""" \ # .format(training_time, global_stepnp, str(img_name[0]), totalLoss, # (end - start))) # summary_writer.add_summary(summary_str, global_stepnp) # summary_writer.flush() if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0: _, global_stepnp = sess.run([train_op, global_step]) else: if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0: start = time.time() _, global_stepnp, img_name, totalLoss = \ sess.run( [train_op, global_step, img_name_batch, total_loss]) end = time.time() print(""" {}: step{} image_name:{} |\t total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, global_stepnp, str(img_name[0]), totalLoss, (end - start))) else: if step % cfgs.SMRY_ITER == 0: _, global_stepnp, summary_str = sess.run( [train_op, global_step, summary_op]) summary_writer.add_summary(summary_str, global_stepnp) summary_writer.flush() if (step > 0 and step % cfgs.SAVE_WEIGHTS_INTE == 0) or (step == cfgs.MAX_ITERATION - 1): save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.makedirs(save_dir) save_ckpt = os.path.join( save_dir, 'voc_' + str(global_stepnp) + 'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)