def evaluate(): print("in model evaluation") dataset = dataset_module.MyDataset(subset=FLAGS.subset) assert dataset.data_files() FLAGS.num_examples = dataset.num_examples_per_epoch( ) / FLAGS.subsample_factor """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. tensors_in, tensors_out = batching.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits_all = model.inference(tensors_in, num_classes, for_training=False) model.loss(logits_all, tensors_out, batch_size=FLAGS.batch_size) loss_op = slim.losses.get_losses() # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: _eval_once(saver, summary_writer, logits_all, tensors_out, loss_op, summary_op, tensors_in) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): dataset = dataset_module.MyDataset(subset=FLAGS.subset) assert dataset.data_files() FLAGS.num_examples = dataset.num_examples_per_epoch( ) / FLAGS.subsample_factor output_dir = os.path.dirname(FLAGS.stat_output_path) if not tf.gfile.Exists(output_dir): tf.gfile.MakeDirs(output_dir) """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. tensors_in, tensors_out = batching.inputs(dataset) config = tf.ConfigProto(intra_op_parallelism_threads=1) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) eval_method = globals()[FLAGS.eval_method] eval_method(tensors_out, sess, coord, tensors_in) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def train(): dataset = dataset_module.MyDataset(subset=FLAGS.subset) #assert dataset.data_files() """Train on dataset for a number of steps.""" # use gpu:0 instead of cpu0, to avoid RNN GPU variable uninitialized problem with tf.Graph().as_default(), tf.device('/gpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay) lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step - FLAGS.training_step_offset, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Create an optimizer that performs gradient descent. if FLAGS.optimizer == "rmsprop": opt = tf.train.RMSPropOptimizer(lr, decay=RMSPROP_DECAY, momentum=FLAGS.momentum, epsilon=RMSPROP_EPSILON) elif FLAGS.optimizer == "sgd": opt = tf.train.MomentumOptimizer(lr, FLAGS.momentum, use_nesterov=False) elif FLAGS.optimizer == "adadelta": opt = tf.train.AdadeltaOptimizer() elif FLAGS.optimizer == "adam": opt = tf.train.AdamOptimizer() else: print("optimizer invalid: %s" % FLAGS.optimizer) return # Get images and labels for ImageNet and split the batch across GPUs. assert FLAGS.batch_size % FLAGS.num_gpus == 0, ( 'Batch size must be divisible by number of GPUs') split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus) # Override the number of preprocessing threads to account for the increased # number of GPU towers. #num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus # choose not to overide, to have a finer control of how many threads to use num_preprocess_threads = FLAGS.num_preprocess_threads net_inputs, net_outputs = batching.distorted_inputs( dataset, num_preprocess_threads=num_preprocess_threads) input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) init_op = tf.initialize_all_variables() #初始化所有变量 # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. if FLAGS.background_class: num_classes = dataset.num_classes() + 1 else: num_classes = dataset.num_classes() # Split the batch of images and labels for towers. # TODO: this might become invalid if we are doing detection input_splits = _tensor_list_splits(net_inputs, FLAGS.num_gpus) output_splits = _tensor_list_splits(net_outputs, FLAGS.num_gpus) # Calculate the gradients for each model tower. tower_grads = [] for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%s' % i): with tf.name_scope('%s_%d' % (model.TOWER_NAME, i)) as scope: if True: # I don't see any improvements by pinning all variables on CPU, so I disabled this # Force all Variables to reside on the CPU. #with slim.arg_scope([slim.variable], device='/cpu:0'): # do not use this line, as it will assign all operations to cpu #with tf.device('/cpu:0'): # Calculate the loss for one tower of the CNN model. This # function constructs the entire CNN model but shares the # variables across all towers. loss = _tower_loss(input_splits[i], output_splits[i], num_classes, scope) if i == 0: # set different learning rates for different variables if hasattr(model, 'learning_rate_multipliers'): # this function returns a dictionary of [varname]=multiplier # learning rate multiplier that equals to one is set by default multiplier = model.learning_rate_multipliers() # computing the vars that needs gradient grad_var_list = [] for t in tf.trainable_variables(): v = t.op.name if (v in multiplier) and (abs( multiplier[v]) < 1e-6): pass else: grad_var_list.append(t) print("-" * 40 + "\n gradient will be computed for vars:") for x in grad_var_list: print(x.op.name) else: multiplier = None grad_var_list = None # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Retain the Batch Normalization updates operations only from the # final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. batchnorm_updates = tf.get_collection( ops.GraphKeys.UPDATE_OPS, scope) #batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION, # scope) # Calculate the gradients for the batch of data on this CNN # tower. grads = opt.compute_gradients(loss, var_list=grad_var_list) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. if FLAGS.EWC == "stat": grads, grads2 = _average_gradients(tower_grads, True) # merge grads2 into a dict of variable out = {} vard = {} for g2, v in grads2: out[v.op.name] = g2 vard[v.op.name] = v grads2 = out else: grads = _average_gradients(tower_grads) # Add a summaries for the input processing and global_step. summaries.extend(input_summaries) # Add a summary to track the learning rate. summaries.append(tf.scalar_summary('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.histogram_summary(var.op.name + '/gradients', grad)) if multiplier: print("-" * 40 + "\nusing learning rate multipliers") grads_out = [] for g, v in grads: v_name = v.op.name if v_name in multiplier: g_out = tf.mul(multiplier[v_name], g) print(v_name, " * ", multiplier[v_name]) else: g_out = g print(v_name, " * 1.00") grads_out.append((g_out, v)) grads = grads_out # gradient clipping if FLAGS.clip_gradient_threshold > 0: print("-" * 40 + "\n Gradient Clipping On") t_list = [x[0] for x in grads] t_list, gnorm = tf.clip_by_global_norm( t_list, FLAGS.clip_gradient_threshold, name='gradient_clipping') grads = [(t_list[i], grads[i][1]) for i in range(len(t_list))] # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.histogram_summary(var.op.name, var)) # Track the moving averages of all trainable variables. # Note that we maintain a "double-average" of the BatchNormalization # global statistics. This is more complicated then need be but we employ # this for backward-compatibility with our previous models. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY, global_step) # Another possiblility is to use tf.slim.get_variables(). variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) # Group all updates to into a single train op. batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, variables_averages_op, batchnorm_updates_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.merge_summary(summaries) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement, intra_op_parallelism_threads=1) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init) #训练开始 # TODO: not supported to load from different number of towers now if FLAGS.pretrained_model_checkpoint_path: assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path) #variables_to_restore = tf.get_collection(slim.variables.VARIABLES_TO_RESTORE) variables_to_restore = slim.get_variables_to_restore() # only restore those that are in the checkpoint existing_vars = util.tensors_in_checkpoint_file( FLAGS.pretrained_model_checkpoint_path) restore_new = [] ignore_vars = [] for x in variables_to_restore: if x.op.name in existing_vars: restore_new.append(x) else: ignore_vars.append(x.op.name) if len(ignore_vars) > 0: print( "-" * 40 + "\nWarning: Some variables does not exists in the checkpoint, ignore them: " ) for x in ignore_vars: print(x) variables_to_restore = restore_new restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) print('%s: Pre-trained model restored from %s' % (datetime.now(), FLAGS.pretrained_model_checkpoint_path)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter( FLAGS.train_dir, graph_def=sess.graph.as_graph_def(add_shapes=True)) start_time = time.time() duration_compute = 0 grads2_accu = None grads2_count = 0 step_start = int(sess.run(global_step)) try: for step in xrange(step_start, FLAGS.max_steps): # call a function in the model definition to do some extra work if hasattr(model, 'update_each_step'): model.update_each_step(sess, step) if FLAGS.EWC == "stat": # then run a stat mode grads2_v = sess.run(grads2) if grads2_count == 0: grads2_accu = grads2_v else: for key in grads2_v.keys(): grads2_accu[key] += grads2_v[key] grads2_count += 1 if step == (FLAGS.max_steps - 1): # save the fisher infomation matirx for key in grads2_accu.keys(): grads2_accu[key] /= grads2_count fname = os.path.join(FLAGS.train_dir, "EWC_stat.pkl") pickle.dump(grads2_accu, open(fname, "wb")) # save the MAP file vard_v = sess.run(vard) fname = os.path.join(FLAGS.train_dir, "EWC_map.pkl") pickle.dump(vard_v, open(fname, "wb")) if (step + 1) % FLAGS.display_loss == 0: print("processed ", step - step_start, " examples") continue has_run_meta = False if FLAGS.profile: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() start_time_compute = time.time() _, loss_value = sess.run([train_op, loss], options=run_options, run_metadata=run_metadata) duration_compute = duration_compute + time.time( ) - start_time_compute # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open(os.path.join(FLAGS.train_dir, 'timeline.json'), 'w') as f: f.write(ctf) print("generated a time line profile for one session") else: start_time_compute = time.time() if (step + 1) % (FLAGS.display_summary * 10) == 0: has_run_meta = True # profile in a longer interval run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, loss_value, summary_str = \ sess.run([train_op, loss, summary_op], options=run_options, run_metadata=run_metadata) summary_writer.add_run_metadata( run_metadata, 'step%d' % step) summary_writer.add_summary(summary_str, step) print('Adding run metadata for', step) # Create the Timeline object, and write it to a json tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open( os.path.join(FLAGS.train_dir, 'timeline.json'), 'w') as f: f.write(ctf) print("generated a time line profile for one session") else: _, loss_value = sess.run([train_op, loss]) duration_compute = duration_compute + time.time( ) - start_time_compute assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if (step + 1) % FLAGS.display_loss == 0: duration = (time.time() - start_time) / FLAGS.display_loss duration_compute = duration_compute / FLAGS.display_loss examples_per_sec = FLAGS.batch_size / float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch; compute %.1f examples/sec)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration, FLAGS.batch_size / duration_compute)) duration_compute = 0 start_time = time.time() if (step + 1) % FLAGS.display_summary == 0 and not has_run_meta: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % FLAGS.checkpoint_interval == 0 or ( step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) except KeyboardInterrupt: print("Control C pressed. Saving model before exit. ") checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) sys.exit()
images, labels = preprocessor.minibatch(dataset, subset='validation') graph = load_graph(model_file) input_tensor = graph.get_tensor_by_name(input_layer + ":0") output_tensor = graph.get_tensor_by_name(output_layer + ":0") rewrite_options = rewriter_config_pb2.RewriterConfig( layout_optimizer=rewriter_config_pb2.RewriterConfig.ON) config = tf.ConfigProto() config.inter_op_parallelism_threads = num_inter_threads config.intra_op_parallelism_threads = num_intra_threads config.graph_options.rewrite_options.remapping = ( rewriter_config_pb2.RewriterConfig.OFF) total_accuracy1, total_accuracy5 = (0.0, 0.0) num_processed_images = 0 num_remaining_images = dataset.num_examples_per_epoch(subset='validation') \ - num_processed_images top1 = 0 with tf.Session(config=config) as sess: sess_graph = tf.Session(graph=graph, config=config) while num_remaining_images >= batch_size: # Reads and preprocess data # import pdb # pdb.set_trace() np_images, np_labels = sess.run([images[0], labels[0]]) np_labels -= 1 # print(np_labels.shape) num_processed_images += batch_size num_remaining_images -= batch_size start_time = time.time()