def main(_): if not tf.gfile.Exists(FLAGS.eval_log_dir): tf.gfile.MakeDirs(FLAGS.eval_log_dir) dataset = common_flags.create_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name) model = common_flags.create_model(num_classes=FLAGS.num_classes) data = data_provider.get_data(dataset, FLAGS.model_name, FLAGS.batch_size, is_training=False, height=FLAGS.height, width=FLAGS.width) logits, endpoints = model.create_model(data.images, num_classes=FLAGS.num_classes, is_training=False) eval_ops = model.create_summary(data, logits, is_training=False) slim.get_or_create_global_step() session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.train_dir, logdir=FLAGS.eval_log_dir, eval_op=eval_ops, num_evals=FLAGS.num_evals, eval_interval_secs=FLAGS.eval_interval_secs, max_number_of_evaluations=FLAGS.number_of_steps, session_config=session_config)
def _create_image_encoder(preprocess_fn, factory_fn, image_shape, batch_size=32, session=None, checkpoint_path=None, loss_mode="cosine"): image_var = tf.placeholder(tf.uint8, (None, ) + image_shape) preprocessed_image_var = tf.map_fn( lambda x: preprocess_fn(x, is_training=False), tf.cast(image_var, tf.float32)) l2_normalize = loss_mode == "cosine" feature_var, _ = factory_fn( preprocessed_image_var, l2_normalize=l2_normalize, reuse=None) feature_dim = feature_var.get_shape().as_list()[-1] if session is None: session = tf.Session() if checkpoint_path is not None: slim.get_or_create_global_step() init_assign_op, init_feed_dict = slim.assign_from_checkpoint( checkpoint_path, slim.get_variables_to_restore()) session.run(init_assign_op, feed_dict=init_feed_dict) def encoder(data_x): out = np.zeros((len(data_x), feature_dim), np.float32) _run_in_batches( lambda x: session.run(feature_var, feed_dict=x), {image_var: data_x}, out, batch_size) return out return encoder
def main(_): if not tf.gfile.Exists(FLAGS.eval_log_dir): tf.gfile.MakeDirs(FLAGS.eval_log_dir) dataset = common_flags.create_dataset(split_name=FLAGS.split_name) model = common_flags.create_model(dataset.num_char_classes, dataset.max_sequence_length, dataset.num_of_views, dataset.null_code) data = data_provider.get_data( dataset, FLAGS.batch_size, augment=False, central_crop_size=common_flags.get_crop_size()) endpoints = model.create_base(data.images, labels_one_hot=None) model.create_loss(data, endpoints) eval_ops = model.create_summaries( data, endpoints, dataset.charset, is_training=False) slim.get_or_create_global_step() session_config = tf.ConfigProto(device_count={"GPU": 0}) slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.train_log_dir, logdir=FLAGS.eval_log_dir, eval_op=eval_ops, num_evals=FLAGS.num_batches, eval_interval_secs=FLAGS.eval_interval_secs, max_number_of_evaluations=FLAGS.number_of_steps, session_config=session_config)
def setUp(self): super(EvaluationTest, self).setUp() num_classes = 8 batch_size = 16 inputs, labels = GenerateTestData(num_classes, batch_size) self._expected_accuracy = GroundTruthAccuracy(inputs, labels, batch_size) self._global_step = slim.get_or_create_global_step() self._inputs = tf.constant(inputs, dtype=tf.float32) self._labels = tf.constant(labels, dtype=tf.int64) self._predictions, self._scale = TestModel(self._inputs)
def get_train_op(self, loss): """Creates a training op. Args: loss: A float32 `Tensor` representing the total training loss. Returns: train_op: A slim.learning.create_train_op train_op. Raises: ValueError: If specified optimizer isn't supported. """ # Get variables to train (defined in subclass). assert self.variables_to_train # Define a learning rate schedule. decay_steps = self._config.learning.decay_steps decay_factor = self._config.learning.decay_factor learning_rate = float(self._config.learning.learning_rate) # Define a learning rate schedule. global_step = slim.get_or_create_global_step() learning_rate = tf.train.exponential_decay( learning_rate, global_step, decay_steps, decay_factor, staircase=True) # Create an optimizer. opt_type = self._config.learning.optimizer if opt_type == 'adam': opt = tf.train.AdamOptimizer(learning_rate) elif opt_type == 'momentum': opt = tf.train.MomentumOptimizer(learning_rate, 0.9) elif opt_type == 'rmsprop': opt = tf.train.RMSPropOptimizer(learning_rate, momentum=0.9, epsilon=1.0, decay=0.9) else: raise ValueError('Unsupported optimizer %s' % opt_type) if self._config.use_tpu: opt = tpu_optimizer.CrossShardOptimizer(opt) # Create a training op. # train_op = opt.minimize(loss, var_list=self.variables_to_train) # Create a training op. train_op = slim.learning.create_train_op( loss, optimizer=opt, variables_to_train=self.variables_to_train, update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS)) return train_op
def train(self, num_episodes): global_step = slim.get_or_create_global_step() sv = tf.train.Supervisor(logdir=self.logdir, save_model_secs=20, global_step=global_step) with sv.managed_session('') as sess: for i in xrange(num_episodes): ep_num_steps, ep_reward, ep_loss = self.runEpisode(sess, is_training=True) self.train_rewards[self.current_episode % 100] = ep_reward self.current_episode += 1 print "Episode", self.current_episode, "has finshed in", ep_num_steps, "steps" print " Reward: {:10} Loss: {:.6f} Epsilon: {:.3f}".format(ep_reward, ep_loss, self.epsilon) print "Running average reward for the last 100 episodes:", \ sum(self.train_rewards) / min(100, self.current_episode) sv.saver.save(sess, self.logdir, global_step=sv.global_step)
def warm_start_training(self, sess): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ ckpt = tf.train.latest_checkpoint(self.train_dir) if ckpt: # the global_step will restore sa well saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) saver.restore(sess, ckpt) print('restore from the checkpoint{0}'.format(ckpt)) return if self.checkpoint_path is None: return None exclusions = [] if self.checkpoint_exclude_scopes: exclusions = [ scope.strip() for scope in self.checkpoint_exclude_scopes.split(',') ] # TODO(sguada) variables.filter_variables() variables_to_restore = [] all_variables = slim.get_model_variables() if tf.gfile.IsDirectory(self.checkpoint_path): global_step = slim.get_or_create_global_step() all_variables.append(global_step) checkpoint_path = tf.train.latest_checkpoint(self.checkpoint_path) else: checkpoint_path = self.checkpoint_path for var in all_variables: excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) # tf.logging.info('Fine-tuning from %s' % checkpoint_path) slim.assign_from_checkpoint_fn(checkpoint_path, variables_to_restore)(sess) return
def _get_init_fn(self): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ if self.checkpoint_path is None: return None # Warn the user if a checkpoint exists in the train_dir. Then we'll be ignoring the checkpoint anyway. if tf.train.latest_checkpoint(self.train_dir): tf.logging.info( 'Ignoring --checkpoint_path because a checkpoint already exists in %s' % self.train_dir) return None exclusions = [] if self.checkpoint_exclude_scopes: exclusions = [ scope.strip() for scope in self.checkpoint_exclude_scopes.split(',') ] variables_to_restore = [] all_variables = slim.get_model_variables() if self.fine_tune_fe: global_step = slim.get_or_create_global_step() all_variables.append(global_step) for var in all_variables: excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) if tf.gfile.IsDirectory(self.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(self.checkpoint_path) else: checkpoint_path = self.checkpoint_path tf.logging.info('Fine-tuning from %s' % checkpoint_path) return slim.assign_from_checkpoint_fn( checkpoint_path, variables_to_restore, ignore_missing_vars=self.ignore_missing_vars)
def create_summary_op(self, sess, logdir): ''' Creates summary writer Args: sess: Tensorflow session to be used logdir: Directory for saving summary Returns: A summary writer operation ''' # Check if path already exists if not os.path.exists(logdir): os.makedirs(logdir) writer = tf.summary.FileWriter(logdir, sess.graph) # Create placeholders to track some statistics episode_reward = tf.placeholder(name='episode_reward', shape=(), dtype=tf.float32) episode_length = tf.placeholder(name='episode_length', shape=(), dtype=tf.float32) episode_epsilon = tf.placeholder(name='epsilon', shape=(), dtype=tf.float32) # Create some summaries tf.summary.scalar('reward', episode_reward) tf.summary.scalar('episode_length', episode_length) tf.summary.scalar('epsilon', episode_epsilon) # Merge all summaries merged = tf.summary.merge_all() global_step = slim.get_or_create_global_step() def summary_writer(states, actions, targets, reward, length, epsilon): feed_dict = { self.states: np.squeeze(states), self.actions: np.squeeze(actions), self.targets: np.squeeze(targets), episode_reward: np.squeeze(reward), episode_length: np.squeeze(length), episode_epsilon: np.squeeze(epsilon) } summary, step = sess.run([merged, global_step], feed_dict=feed_dict) # Write summary writer.add_summary(summary, step) return summary_writer
def main(_): tmp_capStore = os.path.abspath('.') + 'caption.txt' opts, args = getopt.getopt(sys.argv[1:], "hi:o:") input_file = "" output_file = tmp_capStore for op, value in opts: if op == "-i": input_file = value elif op == "-o": output_file = value elif op == "-h": usage() sys.exit() dataset = utils.create_dataset(split_name=split_name) model = utils.create_model(dataset.num_char_classes, dataset.max_sequence_length, dataset.num_of_views, dataset.null_code) data = data_provider.get_data(dataset, batch_size, augment=False, central_crop_size=utils.get_crop_size()) endpoints = model.create_base(data.images, labels_one_hot=None) model.create_loss(data, endpoints) eval_ops = model.create_summaries(data, endpoints, dataset.charset, is_training=False) slim.get_or_create_global_step() session_config = tf.ConfigProto(device_count={"GPU": 0}) slim.evaluation.evaluation_loop(master=master, checkpoint_dir=train_log_dir, logdir=eval_log_dir, eval_op=eval_ops, num_evals=num_batches, eval_interval_secs=eval_interval_secs, max_number_of_evaluations=number_of_steps, session_config=session_config)
def _create_image_encoder(preprocess_fn, factory_fn, image_shape, batch_size=32, session=None, checkpoint_path=None, loss_mode="cosine"): image_var = tf.placeholder(tf.uint8, (None, ) + image_shape) preprocessed_image_var = tf.map_fn( lambda x: preprocess_fn(x, is_training=False), tf.cast(image_var, tf.float32)) l2_normalize = loss_mode == "cosine" feature_var, _ = factory_fn(preprocessed_image_var, l2_normalize=l2_normalize, reuse=None) feature_dim = feature_var.get_shape().as_list()[-1] if session is None: config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) if checkpoint_path is not None: slim.get_or_create_global_step() init_assign_op, init_feed_dict = slim.assign_from_checkpoint( checkpoint_path, slim.get_variables_to_restore()) session.run(init_assign_op, feed_dict=init_feed_dict) def encoder(data_x): out = np.zeros((len(data_x), feature_dim), np.float32) _run_in_batches(lambda x: session.run(feature_var, feed_dict=x), {image_var: data_x}, out, batch_size) return out return encoder
def build_training_net(self, features, timestamps, mmsis): self._build_net(features, timestamps, mmsis, True) trainers = [ self.fishing_localisation_objective.build_trainer( timestamps, mmsis) ] learning_rate = tf.train.exponential_decay( self.initial_learning_rate, slim.get_or_create_global_step(), self.decay_examples, self.learning_decay_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) return TrainNetInfo(optimizer, trainers)
def build_training_net(self, features, timestamps, mmsis): self._build_model(features, timestamps, mmsis, is_training=True) trainers = [] for i in range(len(self.training_objectives)): trainers.append(self.training_objectives[i].build_trainer( timestamps, mmsis)) learning_rate = tf.train.exponential_decay( self.initial_learning_rate, slim.get_or_create_global_step(), self.decay_examples, self.learning_decay_rate) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) return TrainNetInfo(optimizer, trainers)
def cnn_model(learning_rate): with tf.Graph().as_default() as graph: mnist = input_data.read_data_sets('MNIST_data', one_hot=True) x = tf.placeholder(tf.float32, shape=[None, 784]) y = tf.placeholder(tf.float32, shape=[None, 10]) x_train = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', x_train, 10) with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, activation_fn=tf.nn.relu): with slim.arg_scope([slim.max_pool2d], padding='SAME'): conv1 = slim.conv2d(x_train, 32, [5, 5]) conv_vars = tf.get_collection(tf.GraphKeys.MODEL_VARIABLES, 'Conv') tf.summary.histogram('conv_weights', conv_vars[0]) pool1 = slim.max_pool2d(conv1, [2, 2]) conv2 = slim.conv2d(pool1, 64, [5, 5]) pool2 = slim.max_pool2d(conv2, [2, 2]) flatten = slim.flatten(pool2) fc = slim.fully_connected(flatten, 1024) logits = slim.fully_connected(fc, 10, activation_fn=None) softmax = tf.nn.softmax(logits, name='output') with tf.name_scope('loss'): loss = slim.losses.softmax_cross_entropy(logits, y) tf.summary.scalar('loss', loss) train_op = slim.optimize_loss(loss, slim.get_or_create_global_step(), learning_rate=learning_rate, optimizer='Adam') with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) summary = tf.summary.merge_all() return { 'x': x, 'y': y, 'accuracy': accuracy, 'summary': summary, 'mnist': mnist }, train_op, graph
def main(_): def _readout(target): max_axis = tf.reduce_max(target, [0, 1], keep_dims=True) min_axis = tf.reduce_min(target, [0, 1], keep_dims=True) image = (target - min_axis) / (max_axis - min_axis) return image tf.reset_default_graph() env = environment.get_game_environment(FLAGS.maps, multiproc=FLAGS.multiproc, random_goal=FLAGS.random_goal, random_spawn=FLAGS.random_spawn) exp = expert.Expert() net = CMAP() estimate_images = [_readout(estimate[0, -1, :, :, 0]) for estimate in net.intermediate_tensors['estimate_map_list']] value_images = [_readout(value[0, :, :, 0]) for value in tf.unstack(net.intermediate_tensors['value_map'], axis=1)] step_history = tf.placeholder(tf.string, name='step_history') step_history_op = tf.summary.text('game/step_history', step_history, collections=['game']) global_step = slim.get_or_create_global_step() update_global_step_op = tf.assign_add(global_step, 1) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) gradients = optimizer.compute_gradients(net.output_tensors['loss']) gradient_names = [v.name for _, v in gradients] gradient_summary_op = [tf.reduce_mean(tf.abs(g)) for g, _ in gradients] train_op = optimizer.apply_gradients(gradients) slim.learning.train(train_op=train_op, logdir=FLAGS.logdir, global_step=global_step, train_step_fn=DAGGER_train_step, train_step_kwargs=dict(env=env, exp=exp, net=net, update_global_step_op=update_global_step_op, step_history=step_history, step_history_op=step_history_op, gradient_names=gradient_names, gradient_summary_op=gradient_summary_op, estimate_maps=estimate_images, value_maps=value_images), number_of_steps=FLAGS.num_games, save_interval_secs=300 if not FLAGS.debug else 60, save_summaries_secs=300 if not FLAGS.debug else 60)
def main(_): if not tf.gfile.Exists(FLAGS.output_dir): tf.gfile.MakeDirs(FLAGS.output_dir) assert FLAGS.model is not None assert FLAGS.model_type in ('vanilla', 'act', 'act_early_stopping', 'sact') assert FLAGS.dataset in ('imagenet', 'cifar') batch_size = 1 if FLAGS.dataset == 'imagenet': height, width = 224, 224 num_classes = 1001 elif FLAGS.dataset == 'cifar': height, width = 32, 32 num_classes = 10 images = tf.random_uniform((batch_size, height, width, 3)) model = utils.split_and_int(FLAGS.model) # Define the model if FLAGS.dataset == 'imagenet': with slim.arg_scope( imagenet_model.resnet_arg_scope(is_training=False)): logits, end_points = imagenet_model.get_network( images, model, num_classes, model_type=FLAGS.model_type) elif FLAGS.dataset == 'cifar': # Define the model: with slim.arg_scope(cifar_model.resnet_arg_scope(is_training=False)): logits, end_points = cifar_model.resnet( images, model=model, num_classes=num_classes, model_type=FLAGS.model_type) tf_global_step = slim.get_or_create_global_step() checkpoint_path = tf.train.latest_checkpoint(FLAGS.input_dir) assert checkpoint_path is not None saver = tf.train.Saver(write_version=2) with tf.Session() as sess: saver.restore(sess, checkpoint_path) saver.save(sess, FLAGS.output_dir + '/model', global_step=tf_global_step)
def build_training_net(self, features, timestamps, mmsis): self.build_model(tf.constant(True), features) trainers = [] for obj in self.objectives: trainers.append(obj.build_trainer(timestamps, mmsis)) example = slim.get_or_create_global_step() * self.batch_size learning_rate = tf.train.exponential_decay( self.initial_learning_rate, example, self.decay_examples, self.learning_decay_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, self.momentum) return TrainNetInfo(optimizer, trainers)
def __init__(self, model_func, num_labels, input_shape, test_in=None, optimizer='adam', emb_size=128, dropout_keep_prob=1, augmentation_function=None): """Initialize SemisupModel class. Creates an evaluation graph for the provided model_func. Args: model_func: Model function. It should receive a tensor of images as the first argument, along with the 'is_training' flag. num_labels: Number of taget classes. input_shape: List, containing input images shape in form [height, width, channel_num]. test_in: None or a tensor holding test images. If None, a placeholder will be created. """ self.num_labels = num_labels self.step = slim.get_or_create_global_step() self.ema = tf.train.ExponentialMovingAverage(0.99, self.step) self.emb_size = emb_size self.test_batch_size = 100 self.model_func = model_func self.augmentation_function = augmentation_function self.optimizer = optimizer self.dropout_keep_prob = dropout_keep_prob if test_in is not None: self.test_in = test_in else: self.test_in = tf.placeholder(np.float32, [None] + input_shape, 'test_in') self.test_emb = self.image_to_embedding(self.test_in, is_training=False) self.test_logit = self.embedding_to_logit(self.test_emb, is_training=False)
def _init_model(self): ''' init modle for train :return: ''' self.global_step = slim.get_or_create_global_step() self.batch_data = tf.placeholder( dtype=tf.float32, shape=[None, self.input_size, self.input_size, self.input_channel], name='input_images') #image self.batch_label = tf.placeholder(dtype=tf.int64, shape=[None], name='input_labels') #label self.input_data, self.gt_input_data = tf.split(self.batch_data, 2, axis=0) #mk onehot labels self.labels = slim.one_hot_encoding(self.batch_label, self.class_nums) #comput loss self._predict_drgan_multipie() self._loss_gan_multipie() self._loss_compute() self.summary_train = tf.summary.merge_all() #select var list train_vars = tf.trainable_variables() self.varsg_decoder = [ var for var in train_vars if 'decoding' in var.name ] self.varsg_merge = [var for var in train_vars if 'merging' in var.name] self.varsd = [var for var in train_vars if 'discriminator' in var.name] self.fc_add = [ var for var in train_vars if 'recognition_resnet_fc' in var.name ] self.inception = [ var for var in train_vars if 'Inception-ResNet-v1' in var.name ] self.vard_fr = [var for var in train_vars if 'resnet_yd' in var.name] # self.vard_fr = [var for var in train_vars if 'encoding' in var.name] self.varfr_all = self.vard_fr + self.fc_add self.inception_vars = self.inception self.init_vars = self.varsg_merge + self.varsg_decoder + self.vard_fr + self.varsd self.varsg = self.varsg_merge + self.varsg_decoder + self.varfr_all self._get_train_op(self.global_step)
def __init__(self, learning_rate, num_actions): # Placeholders self.states = tf.placeholder(name='states', shape=(None, 4), dtype=tf.float32) self.returns = tf.placeholder(name='returns', shape=(None), dtype=tf.float32) self.actions = tf.placeholder(name='chosen_action', shape=(None), dtype=tf.int32) with tf.variable_scope('policy'): self.fc = slim.fully_connected(self.states, 16) # Final/output layer self.output = slim.fully_connected(self.fc, num_actions, activation_fn=tf.nn.softmax) # Optimization process (to increase likelihood of a good action) batch_size = tf.shape(self.states)[0] # Select the ids of picked actions # action_ids = (i_batch * NUM_ACTIONS) + action action_ids = tf.range(batch_size) * tf.shape( self.output)[1] + self.actions # Select probability of chosen actions chosen_actions = tf.gather(tf.reshape(self.output, [-1]), action_ids) eligibility = tf.log(chosen_actions) # Change the likelihood of taken action using the return (self.returns) self.loss = -tf.reduce_mean(self.returns * eligibility) opt = tf.train.AdamOptimizer(learning_rate) # We should perform gradient ascent in the likelihood of specified action # which is the same as performing gradient descent on the negative of the loss local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'policy') grads_and_vars = opt.compute_gradients(self.loss, local_vars) self.global_step = slim.get_or_create_global_step() self.train_op = opt.apply_gradients(grads_and_vars, self.global_step) # Add summaries tf.summary.histogram('last_hidden', self.fc) tf.summary.histogram('action_probs', self.output) tf.summary.scalar('policy_loss', self.loss)
def main(_): """Train FlowNet for a FLAGS.max_steps.""" with tf.Graph().as_default(): imgs_0, imgs_1, flows = flownet_tools.get_data(FLAGS.datadir) # img summary after loading flownet.image_summary(imgs_0, imgs_1, "A_input", flows) # apply augmentation imgs_0, imgs_1, flows = apply_augmentation(imgs_0, imgs_1, flows) # model calc_flows = architectures.flownet_s(imgs_0, imgs_1, flows) # img summary of result flownet.image_summary(None, None, "E_result", calc_flows) global_step = slim.get_or_create_global_step() train_op = flownet.create_train_op(global_step) config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf_saver.Saver( max_to_keep=FLAGS.max_checkpoints, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours) slim.learning.train( train_op, logdir=FLAGS.logdir + '/train', save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, summary_op=tf.summary.merge_all(), log_every_n_steps=FLAGS.log_every_n_steps, trace_every_n_steps=FLAGS.trace_every_n_steps, session_config=config, saver=saver, number_of_steps=FLAGS.max_steps, )
def run(self): self.parse_param() tf.logging.set_verbosity(tf.logging.INFO) net = TLModel() _ = slim.get_or_create_global_step() net.input, _ , net.labels,_ = self.get_input(self.split_name, is_training=False,batch_size=self.batch_size) net.build_eval_graph() num_batches = math.ceil(self.dataset.num_samples / float(self.batch_size)) # Standard evaluation loop. if tf.gfile.IsDirectory(self.checkpoint_path): checkpoint_file = tf.train.latest_checkpoint(self.checkpoint_path) else: checkpoint_file = self.checkpoint_path tf.logging.info('Evaluating checkpoint_path={}, split={}'.format(checkpoint_file, self.split_name)) logdir = './logs/evals/' + self.split_name start = time.time() slim.evaluation.evaluate_once( master='', checkpoint_path=checkpoint_file, logdir=logdir, num_evals=num_batches, eval_op=net.names_to_updates , variables_to_restore=slim.get_variables_to_restore()) # Log time spent. elapsed = time.time() elapsed = elapsed - start print('Time spent : %.3f seconds.' % elapsed) print('Time spent per BATCH: %.3f seconds.' % (elapsed / num_batches)) return
def __init__(self, model_func, num_labels, input_shape, test_in=None): """Initialize SemisupModel class. Creates an evaluation graph for the provided model_func. Args: model_func: Model function. It should receive a tensor of images as the first argument, along with the 'is_training' flag. num_labels: Number of taget classes. input_shape: List, containing input images shape in form [height, width, channel_num]. test_in: None or a tensor holding test images. If None, a placeholder will be created. """ self.num_labels = num_labels self.step = slim.get_or_create_global_step() self.ema = tf.train.ExponentialMovingAverage(0.99, self.step) self.test_batch_size = 100 self.model_func = model_func if test_in is not None: self.test_in = test_in self.test_label = test_label else: self.test_in = tf.placeholder(np.float32, [None] + input_shape, 'test_in') self.test_label = tf.placeholder(np.int32, [None] + [], 'test_label') self.test_emb = self.image_to_embedding(self.test_in, is_training=False) self.test_logit = self.embedding_to_logit(self.test_emb, is_training=False) self.test_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot( self.test_label, self.test_logit.get_shape()[-1]), logits=self.test_logit))
def summary_writer_op(sess, logdir, policy_net, value_net): ''' Merge all summaries and returns an function to write the summary ''' # Check if path already exists if not os.path.exists(logdir): os.makedirs(logdir) writer = tf.summary.FileWriter(logdir, sess.graph) # Create placeholders to track some statistics episode_reward = tf.placeholder(name='episode_reward', shape=(), dtype=tf.float32) episode_length = tf.placeholder(name='episode_length', shape=(), dtype=tf.float32) # Create some summaries tf.summary.scalar('reward', episode_reward) tf.summary.scalar('episode_length', episode_length) # Merge all summaries merged = tf.summary.merge_all() global_step = slim.get_or_create_global_step() def summary_writer(states, actions, returns, targets, reward, length): feed_dict = { policy_net.states: states, policy_net.actions: actions, policy_net.returns: returns, value_net.states: states, value_net.targets: targets, episode_reward: reward, episode_length: length } summary, step = sess.run([merged, global_step], feed_dict=feed_dict) # Write summary writer.add_summary(summary, step) return summary_writer
def get_restorer(self): """ restore pretrain weight :param pretrain_model_dir: :param is_pretrain: :return: """ checkpoint_path = tf.train.latest_checkpoint(os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)) if checkpoint_path != None: if cfgs.RESTORE_FROM_RPN: print('___restore from rpn___') model_variables = slim.get_model_variables() restore_variables = [var for var in model_variables if not var.name.startswith('FastRCNN_Head')] + \ [slim.get_or_create_global_step()] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) else: restorer = tf.train.Saver() print("model restore from {0}".format(checkpoint_path)) else: model_variables = slim.get_model_variables() ckpt_var_dict = {} for var in model_variables: if var.name.startswith(self.base_network_name): var_name_ckpt = var.op.name ckpt_var_dict[var_name_ckpt] = var restore_variables = ckpt_var_dict for key, item in restore_variables.items(): print("var_in_graph: ", item.name) print("var_in_ckpt: ", key) restorer = tf.compat.v1.train.Saver(restore_variables) checkpoint_path = os.path.join(cfgs.PRETRAINED_CKPT, self.base_network_name + '.ckpt') print("model restore from {0}".format(checkpoint_path)) print("restore from pretrained_weighs in IMAGE_NET") return restorer, checkpoint_path
def qLearning(self): # Compute Q(s, a). with tf.name_scope("compute_q_s_a"): q_s_a = self.computeTrainingNetwork() # Placeholder for max_a' Q(s', a'). self.q_s_prime_a_prime = tf.placeholder(tf.float32, shape=[self.config.BATCH_SIZE]) # Loss delta = self.r + ( 1. - self.is_terminal ) * self.config.GAMMA * self.q_s_prime_a_prime - q_s_a loss = tf.reduce_mean(tf.square(delta)) # Optimizer global_step = slim.get_or_create_global_step() train_op = tf.train.AdamOptimizer(self.config.ALPHA0).minimize( loss, global_step=global_step) return loss, train_op
def start_training(self): tf.logging.set_verbosity(tf.logging.DEBUG) # Get batched training data image, filename, glabels, gbboxes, gdifficulties, gclasses, localizations, gscores = \ self.g_prepare.get_voc_2007_2012_train_data() # Get model outputs predictions, localisations, logits, end_points = self.g_ssd.get_model(image) # Get model training loss total_loss = ssdmodel.get_losses(logits, localisations, gclasses, localizations, gscores) global_step = slim.get_or_create_global_step() variables_to_train = self._get_variables_to_train() print(variables_to_train) learning_rate = self._configure_learning_rate(self.g_prepare.dataset.num_samples, global_step) optimizer = self._configure_optimizer(learning_rate) # Create the train_op and clip the gradient norms: train_op = slim.learning.create_train_op(total_loss, optimizer, variables_to_train=variables_to_train, clip_gradient_norm=4) self._add_summaries(end_points, total_loss) tf.summary.scalar('learning_rate', learning_rate) self._setup_debugging(predictions, localizations, glabels, gbboxes, gdifficulties) #gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) slim.learning.train( train_op, self.train_dir, train_step_fn=self._train_step, saver=tf_saver.Saver(max_to_keep=500), init_fn=self._get_init_fn(), number_of_steps=self.max_number_of_steps, log_every_n_steps=self.log_every_n_steps, save_summaries_secs=self.save_summaries_secs, save_interval_secs=self.save_interval_secs, session_config=config )
def get_restorer(): checkpoint_path = tf.train.latest_checkpoint( os.path.join(FLAGS.trained_checkpoint, cfgs.VERSION)) if checkpoint_path != None: if RESTORE_FROM_RPN: print('___restore from rpn___') model_variables = slim.get_model_variables() restore_variables = [ var for var in model_variables if not var.name.startswith('Fast_Rcnn') ] + [slim.get_or_create_global_step()] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) else: restorer = tf.train.Saver() print("model restore from :", checkpoint_path) else: if cfgs.NET_NAME == 'pvanet': print("model initialization") restorer, checkpoint_path = None, None else: checkpoint_path = FLAGS.pretrained_model_path print("model restore from pretrained mode, path is :", checkpoint_path) model_variables = slim.get_model_variables() restore_variables = [ var for var in model_variables if (var.name.startswith(cfgs.NET_NAME) and not var.name.startswith('{}/logits'.format(cfgs.NET_NAME))) ] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) return restorer, checkpoint_path
def train_momentum_cross_entropy(): with tf.Graph().as_default(): global_step = slim.get_or_create_global_step() learning_rate = tf.train.inverse_time_decay( learning_rate=FLAGS.initial_learning_rate, global_step=global_step, decay_steps=9000, decay_rate=FLAGS.learning_rate_decay) images, labels = utils.load_batch(shards=TRAIN_SHARDS, batch_size=FLAGS.batch_size, train=True) labels = tf.one_hot(labels, depth=200) print_op = tf.Print(input_=labels, data=[labels]) # Define model # predictions = model.AlexNetXL(images) predictions = model.AlexNetXL(images) # Define loss function loss = tf.losses.softmax_cross_entropy(labels, predictions) tf.summary.scalar('loss', loss) # Define optimizer optimizer = tf.train.MomentumOptimizer( learning_rate=FLAGS.initial_learning_rate, momentum=FLAGS.momentum, use_nesterov=True) # Create training op train_op = slim.learning.create_train_op(loss, optimizer) # Initialize training slim.learning.train(train_op, FLAGS.trainlog_dir, number_of_steps=FLAGS.max_steps, save_summaries_secs=30, save_interval_secs=30)
def createTrainingOp(self): self.s = tf.placeholder( tf.float32, shape=[self.config.BATCH_SIZE, self.config.INPUT_DIM]) self.a = tf.placeholder(tf.int32, shape=[self.config.BATCH_SIZE]) self.r = tf.placeholder(tf.float32, shape=[self.config.BATCH_SIZE]) with slim.arg_scope(self.model.arg_scope(reuse=False)): logits = self.model.compute(self.s) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, self.a) loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(self.config.ALPHA0) gradients = optimizer.compute_gradients(loss) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (grad * self.r, var) global_step = slim.get_or_create_global_step() train_op = optimizer.apply_gradients(gradients, global_step=global_step) return train_op
def print_n_steps(variable, text, n_steps, summarize=10, data=None, name_or_scope=None): with tf.variable_scope(name_or_scope, default='print_n_steps', values=[variable]): global_step = slim.get_or_create_global_step() if data is None: data = variable def _print(): return tf.Print(data, [variable], text, summarize=summarize) def _not_print(): return data with utils.device_cpu(): return tf.cond(tf.equal(tf.mod(global_step, n_steps), 0), _print, _not_print)
def _init_model(self): ''' init modle for train :return: ''' # tf.set_random_seed(20) # with tf.Graph().as_default(): self.global_step = slim.get_or_create_global_step() self.batch_data = tf.placeholder(dtype=tf.float32,shape=[None,self.input_size,self.input_size,self.input_channel],name='input_images')#image self.batch_label = tf.placeholder(dtype= tf.int64,shape=[None],name='input_labels')#label self.input_data,self.gt_input_data=tf.split(self.batch_data,2,axis=0) self.input_label = tf.split(self.batch_label,2,axis=0)[0] #mk onehot labels self.labels = slim.one_hot_encoding(self.input_label,self.class_nums) #comput loss self._predict_drgan_multipie() self._loss_gan_multipie() self._loss_compute() self.summary_train = tf.summary.merge_all() #select var list train_vars = tf.trainable_variables() self.varsg = [var for var in train_vars if 'generator' in var.name] self.varsd = [var for var in train_vars if 'discriminator' in var.name] self.fc_add = [var for var in train_vars if 'recognation_fc' in var.name] self.vard_fr= [var for var in train_vars if 'resnet_yd' in var.name] # self.init_vars=self.vard_fr+self.varsd+self.varsg+self.fc_add self.init_vars=self.vard_fr self.varsg+=self.vard_fr self.varsg+=self.fc_add # self.var_total=self.varsg+self.varsd+self.vard_fr # self.varsd = self.varsd+self.vard_fr+self.fc_add###finetu fr net?? self._get_train_op(self.global_step)
def setup_training(loss_op, initial_learning_rate, steps_per_decay, learning_rate_decay, momentum, max_steps, sync=False, adjust_lr_sync=True, num_workers=1, replica_id=0, vars_to_optimize=None, clip_gradient_norm=0, typ=None, momentum2=0.999, adam_eps=1e-8): if sync and adjust_lr_sync: initial_learning_rate = initial_learning_rate * num_workers max_steps = np.int(max_steps / num_workers) steps_per_decay = np.int(steps_per_decay / num_workers) global_step_op = slim.get_or_create_global_step() lr_op = tf.train.exponential_decay(initial_learning_rate, global_step_op, steps_per_decay, learning_rate_decay, staircase=True) if typ == 'sgd': optimizer = tf.train.MomentumOptimizer(lr_op, momentum) elif typ == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=lr_op, beta1=momentum, beta2=momentum2, epsilon=adam_eps) if sync: sync_optimizer = tf.train.SyncReplicasOptimizer(optimizer, replicas_to_aggregate=num_workers, replica_id=replica_id, total_num_replicas=num_workers) train_op = slim.learning.create_train_op(loss_op, sync_optimizer, variables_to_train=vars_to_optimize, clip_gradient_norm=clip_gradient_norm) else: sync_optimizer = None train_op = slim.learning.create_train_op(loss_op, optimizer, variables_to_train=vars_to_optimize, clip_gradient_norm=clip_gradient_norm) should_stop_op = tf.greater_equal(global_step_op, max_steps) return lr_op, global_step_op, train_op, should_stop_op, optimizer, sync_optimizer
def model_fn(features, labels, mode, params=None, config=None): train_op = None loss = None eval_metrics = None predictions = None if mode == ModeKeys.TRAIN: transformer_model = TransformerModule(params=self.model_params) step = slim.get_or_create_global_step() loss = transformer_model(features) train_op = slim.optimize_loss(loss=loss, global_step=step, learning_rate=self.training_params["learning_rate"], clip_gradients=self.training_params["clip_gradients"], optimizer=params["optimizer"], summaries=slim.OPTIMIZER_SUMMARIES ) elif mode == ModeKeys.PREDICT: raise NotImplementedError elif mode == ModeKeys.EVAL: transformer_model = TransformerModule(params=self.model_params) loss = transformer_model(features) return EstimatorSpec(train_op=train_op, loss=loss, eval_metric_ops=eval_metrics, predictions=predictions, mode=mode)
def setup_training(loss_op, initial_learning_rate, steps_per_decay, learning_rate_decay, momentum, max_steps, sync=False, adjust_lr_sync=True, num_workers=1, replica_id=0, vars_to_optimize=None, clip_gradient_norm=0, typ=None, momentum2=0.999, adam_eps=1e-8): if sync and adjust_lr_sync: initial_learning_rate = initial_learning_rate * num_workers max_steps = np.int(max_steps / num_workers) steps_per_decay = np.int(steps_per_decay / num_workers) global_step_op = slim.get_or_create_global_step() lr_op = tf.train.exponential_decay(initial_learning_rate, global_step_op, steps_per_decay, learning_rate_decay, staircase=True) if typ == 'sgd': optimizer = tf.train.MomentumOptimizer(lr_op, momentum) elif typ == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=lr_op, beta1=momentum, beta2=momentum2, epsilon=adam_eps) if sync: sync_optimizer = tf.train.SyncReplicasOptimizer(optimizer, replicas_to_aggregate=num_workers, replica_id=replica_id, total_num_replicas=num_workers) train_op = slim.learning.create_train_op(loss_op, sync_optimizer, variables_to_train=vars_to_optimize, clip_gradient_norm=clip_gradient_norm) else: sync_optimizer = None train_op = slim.learning.create_train_op(loss_op, optimizer, variables_to_train=vars_to_optimize, clip_gradient_norm=clip_gradient_norm) should_stop_op = tf.greater_equal(global_step_op, max_steps) return lr_op, global_step_op, train_op, should_stop_op, optimizer, sync_optimizer
def main(self): with tf.Graph().as_default() as graph, tf.device('/cpu:0'): num_gpu = len(cfgs.GPU_GROUP.strip().split(',')) global_step = slim.get_or_create_global_step() lr = self.warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu) tf.summary.scalar('lr', lr) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) r3det = build_whole_network.DetectionNetworkR3Det(cfgs=self.cfgs, is_training=True) with tf.name_scope('get_batch'): if cfgs.IMAGE_PYRAMID: shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN) shortside_len = tf.random_shuffle(shortside_len_list)[0] else: shortside_len = cfgs.IMG_SHORT_SIDE_LEN img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \ self.reader.next_batch(dataset_name=cfgs.DATASET_NAME, batch_size=cfgs.BATCH_SIZE * num_gpu, shortside_len=shortside_len, is_training=True) # data processing inputs_list = [] for i in range(num_gpu): img = tf.expand_dims(img_batch[i], axis=0) if cfgs.NET_NAME in ['resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d']: img = img / tf.constant([cfgs.PIXEL_STD]) gtboxes_and_label_r = tf.py_func(backward_convert, inp=[gtboxes_and_label_batch[i]], Tout=tf.float32) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) gtboxes_and_label_h = get_horizen_minAreaRectangle(gtboxes_and_label_batch[i]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) num_objects = num_objects_batch[i] num_objects = tf.cast(tf.reshape(num_objects, [-1, ]), tf.float32) img_h = img_h_batch[i] img_w = img_w_batch[i] inputs_list.append([img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects, img_h, img_w]) tower_grads = [] biases_regularizer = tf.no_regularizer weights_regularizer = tf.contrib.layers.l2_regularizer(cfgs.WEIGHT_DECAY) with tf.variable_scope(tf.get_variable_scope()): for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('tower_%d' % i): with slim.arg_scope( [slim.model_variable, slim.variable], device='/device:CPU:0'): with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected], weights_regularizer=weights_regularizer, biases_regularizer=biases_regularizer, biases_initializer=tf.constant_initializer(0.0)): gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(self.get_gtboxes_and_label, inp=[inputs_list[i][1], inputs_list[i][2], inputs_list[i][3]], Tout=[tf.float32, tf.float32]) gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5]) gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6]) img = inputs_list[i][0] img_shape = inputs_list[i][-2:] img = tf.image.crop_to_bounding_box(image=img, offset_height=0, offset_width=0, target_height=tf.cast(img_shape[0], tf.int32), target_width=tf.cast(img_shape[1], tf.int32)) outputs = r3det.build_whole_detection_network(input_img_batch=img, gtboxes_batch_h=gtboxes_and_label_h, gtboxes_batch_r=gtboxes_and_label_r, gpu_id=i) gtboxes_in_img_h = self.drawer.draw_boxes_with_categories(img_batch=img, boxes=gtboxes_and_label_h[ :, :-1], labels=gtboxes_and_label_h[ :, -1], method=0) gtboxes_in_img_r = self.drawer.draw_boxes_with_categories(img_batch=img, boxes=gtboxes_and_label_r[ :, :-1], labels=gtboxes_and_label_r[ :, -1], method=1) tf.summary.image('Compare/gtboxes_h_gpu:%d' % i, gtboxes_in_img_h) tf.summary.image('Compare/gtboxes_r_gpu:%d' % i, gtboxes_in_img_r) if cfgs.ADD_BOX_IN_TENSORBOARD: detections_in_img = self.drawer.draw_boxes_with_categories_and_scores( img_batch=img, boxes=outputs[0], scores=outputs[1], labels=outputs[2], method=1) tf.summary.image('Compare/final_detection_gpu:%d' % i, detections_in_img) loss_dict = outputs[-1] total_loss_dict, total_losses = self.loss_dict(loss_dict, num_gpu) if i == num_gpu - 1: regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses()) total_losses = total_losses + tf.add_n(regularization_losses) tf.get_variable_scope().reuse_variables() grads = optimizer.compute_gradients(total_losses) if cfgs.GRADIENT_CLIPPING_BY_NORM is not None: grads = slim.learning.clip_gradient_norms(grads, cfgs.GRADIENT_CLIPPING_BY_NORM) tower_grads.append(grads) self.log_printer(r3det, optimizer, global_step, tower_grads, total_loss_dict, num_gpu, graph)
def get_restorer(): checkpoint_path = tf.train.latest_checkpoint(os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)) if checkpoint_path != None: if RESTORE_FROM_RPN: print('___restore from rpn___') model_variables = slim.get_model_variables() restore_variables = [var for var in model_variables if not var.name.startswith('Fast_Rcnn')] + [slim.get_or_create_global_step()] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) else: restorer = tf.train.Saver() print("model restore from :", checkpoint_path) else: checkpoint_path = cfgs.PRETRAINED_CKPT print("model restore from pretrained mode, path is :", checkpoint_path) model_variables = slim.get_model_variables() restore_variables = [var for var in model_variables if (var.name.startswith(cfgs.NET_NAME) and not var.name.startswith('{}/logits'.format(cfgs.NET_NAME)))] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) return restorer, checkpoint_path
def train(): with tf.Graph().as_default(): with tf.name_scope('get_batch'): img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch = \ next_batch(dataset_name=cfgs.DATASET_NAME, # 'ship', 'spacenet', 'pascal', 'coco' batch_size=cfgs.BATCH_SIZE, shortside_len=cfgs.SHORT_SIDE_LEN, is_training=True) gtboxes_and_label = tf.py_func(back_forward_convert, inp=[tf.squeeze(gtboxes_and_label_batch, 0)], Tout=tf.float32) gtboxes_and_label = tf.reshape(gtboxes_and_label, [-1, 6]) with tf.name_scope('draw_gtboxes'): gtboxes_in_img = draw_box_with_color(img_batch, tf.reshape(gtboxes_and_label, [-1, 6])[:, :-1], text=tf.shape(gtboxes_and_label_batch)[1]) # *********************************************************************************************** # * shared CNN * # *********************************************************************************************** _, share_net = get_network_byname(net_name=cfgs.NET_NAME, inputs=img_batch, num_classes=None, is_training=True, output_stride=None, global_pool=False, spatial_squeeze=False) # *********************************************************************************************** # * rpn * # * Note: here the rpn is Feature Pyramid Networks * # *********************************************************************************************** rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, inputs=img_batch, gtboxes_and_label=gtboxes_and_label, is_training=True, share_head=cfgs.SHARED_HEADS, share_net=share_net, anchor_ratios=cfgs.ANCHOR_RATIOS, anchor_scales=cfgs.ANCHOR_SCALES, anchor_angles=cfgs.ANCHOR_ANGLES, scale_factors=cfgs.SCALE_FACTORS, # this parameter will affect the performance base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 level=cfgs.LEVEL, anchor_stride=cfgs.ANCHOR_STRIDE, top_k_nms=cfgs.RPN_TOP_K_NMS, kernel_size=cfgs.KERNEL_SIZE, use_angles_condition=False, anchor_angle_threshold=cfgs.RPN_ANCHOR_ANGLES_THRESHOLD, nms_angle_threshold=cfgs.RPN_NMS_ANGLES_THRESHOLD, rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, max_proposals_num=cfgs.MAX_PROPOSAL_NUM, rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, # iou>=0.7 is positive box, iou< 0.3 is negative rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, remove_outside_anchors=cfgs.IS_FILTER_OUTSIDE_BOXES, # whether remove anchors outside rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], scope='') rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] rpn_location_loss, rpn_classification_loss, rpn_predict_boxes, rpn_predict_scores = rpn.rpn_losses() rpn_total_loss = rpn_classification_loss + rpn_location_loss with tf.name_scope('draw_proposals'): # score > 0.6 is object rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, cfgs.FINAL_SCORE_THRESHOLD)), [-1]) rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices) rpn_object_soxres = tf.gather(rpn_proposals_scores, rpn_object_boxes_indices) rpn_proposals_objcet_boxes_in_img = draw_boxes_with_scores(img_batch, rpn_object_boxes, scores=rpn_object_soxres) # rpn_proposals_objcet_boxes_in_img = draw_box_with_color(img_batch, rpn_object_boxes, # text=tf.shape(rpn_object_boxes)[0]) rpn_proposals_boxes_in_img = draw_box_with_color(img_batch, rpn_proposals_boxes, text=tf.shape(rpn_proposals_boxes)[0]) # *********************************************************************************************** # * Fast RCNN * # *********************************************************************************************** fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=img_batch, feature_pyramid=rpn.feature_pyramid, rpn_proposals_boxes=rpn_proposals_boxes, rpn_proposals_scores=rpn_proposals_scores, stop_gradient_for_proposals=False, img_shape=tf.shape(img_batch), roi_size=cfgs.ROI_SIZE, roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, scale_factors=cfgs.SCALE_FACTORS, gtboxes_and_label=gtboxes_and_label, fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, top_k_nms=cfgs.FAST_RCNN_TOP_K_NMS, nms_angle_threshold=cfgs.FAST_RCNN_NMS_ANGLES_THRESHOLD, use_angle_condition=False, level=cfgs.LEVEL, fast_rcnn_maximum_boxes_per_img=100, fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, # show detections which score >= 0.6 num_classes=cfgs.CLASS_NUM, fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, # iou>0.5 is positive, iou<0.5 is negative boxes_angle_threshold=cfgs.FAST_RCNN_BOXES_ANGLES_THRESHOLD, use_dropout=cfgs.USE_DROPOUT, weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], is_training=True) fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ fast_rcnn.fast_rcnn_predict() fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss() fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss with tf.name_scope('draw_boxes_with_categories'): fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=img_batch, boxes=fast_rcnn_decode_boxes, labels=detection_category, scores=fast_rcnn_score) # train total_loss = slim.losses.get_total_loss() global_step = slim.get_or_create_global_step() lr = tf.train.piecewise_constant(global_step, boundaries=[np.int64(70000), np.int64(120000)], values=[cfgs.LR, cfgs.LR/10, cfgs.LR/100]) # optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) if cfgs.RPN_TRAIN: train_op = slim.learning.create_train_op(rpn_total_loss, optimizer, global_step) else: train_op = slim.learning.create_train_op(total_loss, optimizer, global_step) # train_op = optimizer.minimize(second_classification_loss, global_step) # *********************************************************************************************** # * Summary * # *********************************************************************************************** # ground truth and predict tf.summary.image('img/gtboxes', gtboxes_in_img) tf.summary.image('img/fast_rcnn_predict', fast_rcnn_predict_boxes_in_imgs) # rpn loss and image tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss) tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss) tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss) tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss) tf.summary.scalar('loss/total_loss', total_loss) tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img) tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img) # learning_rate tf.summary.scalar('learning_rate', lr) summary_op = tf.summary.merge_all() init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer() ) restorer, restore_ckpt = restore_model.get_restorer() saver = tf.train.Saver(max_to_keep=10) config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.5 config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) if not restorer is None: restorer.restore(sess, restore_ckpt) print('restore model') coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION) tools.mkdir(summary_path) summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) for step in range(cfgs.MAX_ITERATION): training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) start = time.time() _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ _fast_rcnn_total_loss, _total_loss, _= \ sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss, rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss, fast_rcnn_total_loss, total_loss, train_op]) end = time.time() if step % 10 == 0: print(""" {}: step{} image_name:{} |\t rpn_loc_loss:{} |\t rpn_cla_loss:{} |\t rpn_total_loss:{} | fast_rcnn_loc_loss:{} |\t fast_rcnn_cla_loss:{} |\t fast_rcnn_total_loss:{} | total_loss:{} |\t per_cost_time:{}s""" \ .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _total_loss, (end - start))) if (step % 50 == 0) and (step % 10000 != 0): # 50 summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, _global_step) summary_writer.flush() if (step > 0 and step % 10000 == 0) or (step == cfgs.MAX_ITERATION - 1): summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, _global_step) summary_writer.flush() save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION) if not os.path.exists(save_dir): os.mkdir(save_dir) save_ckpt = os.path.join(save_dir, 'voc_'+str(_global_step)+'model.ckpt') saver.save(sess, save_ckpt) print(' weights had been saved') coord.request_stop() coord.join(threads)