def testNoGTBoundingBox(self): """A test where the image has no ground truth bounding boxes. """ graph = tf.get_default_graph() with graph.as_default(), self.test_session() as sess: # No ground truth bounding boxes images = tf.random_uniform([1, 299, 299, 3], minval=-1, maxval=1, dtype=tf.float32) batched_bboxes = tf.zeros([1, 5, 4]) batched_num_bboxes = np.array([0]) bbox_priors = tf.random_uniform([NUM_BBOX_LOCATIONS, 4], minval=0, maxval=1, dtype=tf.float32) batch_norm_params = { 'decay': 0.997, 'epsilon': 0.001, 'variables_collections': [tf.GraphKeys.MOVING_AVERAGE_VARIABLES], 'is_training': True } # Set activation_fn and parameters for batch_norm. with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.00004), biases_regularizer=slim.l2_regularizer(0.00004)) as scope: locs, confs, inception_vars = model.build( inputs=images, num_bboxes_per_cell=5, reuse=False, scope='') location_loss, confidence_loss = loss.add_loss( locations=locs, confidences=confs, batched_bboxes=batched_bboxes, batched_num_bboxes=batched_num_bboxes, bbox_priors=bbox_priors, location_loss_alpha=1.0) total_loss = slim.losses.get_total_loss( add_regularization_losses=False) sess.run(tf.initialize_all_variables()) fetches = [location_loss, confidence_loss, total_loss] outputs = sess.run(fetches) self.assertTrue(outputs[0] == 0) self.assertTrue(outputs[1] > 0) self.assertTrue(outputs[0] + outputs[1] == outputs[2])
def testBuildLoss(self): """Build the model and add the loss. """ graph = tf.get_default_graph() with graph.as_default(), self.test_session() as sess: # Just placeholders images = tf.placeholder(tf.float32, [1, 299, 299, 3]) batched_bboxes = tf.placeholder(tf.float32, [1, 5, 4]) batched_num_bboxes = tf.placeholder(tf.int32, [1, 1]) bbox_priors = tf.placeholder(tf.float32, [NUM_BBOX_LOCATIONS, 4]) batch_norm_params = { 'decay': 0.997, 'epsilon': 0.001, 'variables_collections': [tf.GraphKeys.MOVING_AVERAGE_VARIABLES], 'is_training': True } # Set activation_fn and parameters for batch_norm. with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.00004), biases_regularizer=slim.l2_regularizer(0.00004)) as scope: locs, confs, inception_vars = model.build( inputs=images, num_bboxes_per_cell=5, reuse=False, scope='') location_loss, confidence_loss = loss.add_loss( locations=locs, confidences=confs, batched_bboxes=batched_bboxes, batched_num_bboxes=batched_num_bboxes, bbox_priors=bbox_priors, location_loss_alpha=1.0) self.assertTrue( location_loss in graph.get_collection(tf.GraphKeys.LOSSES)) self.assertTrue( confidence_loss in graph.get_collection(tf.GraphKeys.LOSSES))
def train(tfrecords, bbox_priors, logdir, cfg, pretrained_model_path=None, fine_tune=False, extract_feats=False, trainable_scopes=None, use_moving_averages=False, restore_moving_averages=False): """ Args: tfrecords (list) bbox_priors (np.array) logdir (str) cfg (EasyDict) pretrained_model_path (str) : path to a pretrained Inception Network """ tf.logging.set_verbosity(tf.logging.DEBUG) if extract_feats: cfg.DO_RANDOM_BBOX_SHIFT = 0 cfg.DO_RANDOM_CROP = 0 cfg.DO_RANDOM_FLIP_LEFT_RIGHT = False feature_cache_dir = os.path.join(logdir, "feature_cache") feature_cache_file = os.path.join(logdir, "feature_cache_created") if not os.path.exists(feature_cache_dir): os.makedirs(feature_cache_dir) if not os.path.exists(feature_cache_file): h,w,c = extract_features(tfrecords, pretrained_model_path, [INCEPTION_FEATURE_LAYER_NAME], cfg, feature_cache_dir) with open(feature_cache_file,'w') as f: f.write("%d %d %d" % (h,w,c)) tfrecords_feat = [os.path.join(feature_cache_dir, f) for f in os.listdir(feature_cache_dir) if os.path.isfile(os.path.join(feature_cache_dir, f))] with open(feature_cache_file) as f: feat_shape = [int(a) for a in f.read().split(' ')] graph = tf.Graph() # Force all Variables to reside on the CPU. with graph.as_default(): # Create a variable to count the number of train() calls. global_step = slim.get_or_create_global_step() # Calculate the learning rate schedule. num_batches_per_epoch = (cfg.NUM_TRAIN_EXAMPLES / cfg.BATCH_SIZE) decay_steps = int(num_batches_per_epoch * cfg.NUM_EPOCHS_PER_DELAY) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay( learning_rate=cfg.INITIAL_LEARNING_RATE, global_step=global_step, decay_steps=decay_steps, decay_rate=cfg.LEARNING_RATE_DECAY_FACTOR, staircase=cfg.LEARNING_RATE_STAIRCASE ) # Create an optimizer that performs gradient descent. optimizer = tf.train.RMSPropOptimizer( learning_rate=lr, decay=cfg.RMSPROP_DECAY, momentum=cfg.RMSPROP_MOMENTUM, epsilon=cfg.RMSPROP_EPSILON ) input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) batched_images, batched_bboxes, batched_num_bboxes, image_ids, batched_filenames = inputs.input_nodes( tfrecords=tfrecords, max_num_bboxes = cfg.MAX_NUM_BBOXES, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, capacity=cfg.QUEUE_CAPACITY, min_after_dequeue=cfg.QUEUE_MIN, add_summaries = True, shuffle_batch=True, cfg=cfg ) if extract_feats: batched_features, batched_bboxes, batched_num_bboxes, image_ids = inputs.input_nodes_precomputed_features( tfrecords=tfrecords_feat, max_num_bboxes = cfg.MAX_NUM_BBOXES, num_epochs=None, batch_size=cfg.BATCH_SIZE, num_threads=cfg.NUM_INPUT_THREADS, capacity=cfg.QUEUE_CAPACITY, min_after_dequeue=cfg.QUEUE_MIN, shuffle_batch=True, cfg=cfg, feat_shape=feat_shape ) locs, confs, inception_vars, detection_vars = build_finetunable_model(cfg, inputs=batched_images, feature_inputs=batched_features) all_trainable_var_names = [v.op.name for v in tf.trainable_variables()] trainable_vars = [v for v_name, v in detection_vars.items() if v_name in all_trainable_var_names] else: if fine_tune: locs, confs, inception_vars, detection_vars = build_finetunable_model(cfg, inputs=batched_images) all_trainable_var_names = [v.op.name for v in tf.trainable_variables()] trainable_vars = [v for v_name, v in detection_vars.items() if v_name in all_trainable_var_names] else: locs, confs, inception_vars = build_fully_trainable_model(batched_images, cfg) trainable_vars = tf.trainable_variables() location_loss, confidence_loss = loss.add_loss( locations = locs, confidences = confs, batched_bboxes = batched_bboxes, batched_num_bboxes = batched_num_bboxes, bbox_priors = bbox_priors, location_loss_alpha = cfg.LOCATION_LOSS_ALPHA ) total_loss = slim.losses.get_total_loss() # Track the moving averages of all trainable variables. # At test time we'll restore all variables with the average value # Note that we maintain a "double-average" of the BatchNormalization # global statistics. This is more complicated then need be but we employ # this for backward-compatibility with our previous models. ema = tf.train.ExponentialMovingAverage( decay=cfg.MOVING_AVERAGE_DECAY, num_updates=global_step ) variables_to_average = (slim.get_model_variables()) # Makes it easier to restore for eval and detect purposes (whether you use the fine_tune flag or not) maintain_averages_op = ema.apply(variables_to_average) tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, maintain_averages_op) trainable_vars = filter_trainable_variables(trainable_vars, trainable_scopes) train_op = slim.learning.create_train_op(total_loss, optimizer, variables_to_train=trainable_vars) # Summary operations summary_op = tf.summary.merge([ tf.summary.scalar('total_loss', total_loss), tf.summary.scalar('location_loss', location_loss), tf.summary.scalar('confidence_loss', confidence_loss), tf.summary.scalar('learning_rate', lr) ] + input_summaries) sess_config = tf.ConfigProto( log_device_placement=False, #device_filters = device_filters, allow_soft_placement = True, gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=cfg.SESSION_CONFIG.PER_PROCESS_GPU_MEMORY_FRACTION ) ) saver = tf.train.Saver( # Save all variables max_to_keep = cfg.MAX_TO_KEEP, keep_checkpoint_every_n_hours = cfg.KEEP_CHECKPOINT_EVERY_N_HOURS ) # Run training. slim.learning.train(train_op, logdir, init_fn=get_init_function(logdir, pretrained_model_path, fine_tune, inception_vars, use_moving_averages, restore_moving_averages, ema), number_of_steps=cfg.NUM_TRAIN_ITERATIONS, save_summaries_secs=cfg.SAVE_SUMMARY_SECS, save_interval_secs=cfg.SAVE_INTERVAL_SECS, saver=saver, session_config=sess_config, summary_op = summary_op, log_every_n_steps = cfg.LOG_EVERY_N_STEPS )