def train_face_net(img_data, lab_data, nclass, image_size, nrof_preprocess_threads, face_class, alfa, max_epoch, batch_size, epoch_num, learning_rate=0.0001): read_train_dir = './botid/1010/' seave_train_dir = './botid/1010/' batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') image_batch, label_batch = create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') labels = ops.convert_to_tensor(lab_data, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many(batch_size * epoch_num, 'index_dequeue') prelogits = face_id_net(image_batch, face_class, phase_train_placeholder) prelogits_center_loss, _ = center_loss(prelogits, label_batch, alfa, nclass) logits = slim.fully_connected( prelogits, nclass, activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), scope='Logits', reuse=False) eps = 1e-4 prelogits_norm = tf.reduce_mean( tf.norm(tf.abs(prelogits) + eps, ord=1, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * 0.0005) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * 0.00) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) print('看看你的真面目', [cross_entropy_mean], regularization_losses, prelogits_center_loss) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses + [prelogits_center_loss], name='total_loss') train_op = trian_optimizer(total_loss, learning_rate=learning_rate) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(read_train_dir) y_step = 0 if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print('原有训练次数', global_step) y_step = int(float(global_step)) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): for epoch in range(1, max_epoch + 1): # print('你说这是什么事:img_data',type(img_data),len(img_data),img_data) # print('你说这是什么事:lab_data',type(lab_data), len(lab_data), lab_data) index_epoch = sess.run(index_dequeue_op) # print(index_epoch) label_epoch = np.array(lab_data)[index_epoch] image_epoch = np.array(img_data)[index_epoch] # Enqueue one epoch of image paths and labels labels_array = np.expand_dims(np.array(label_epoch), 1) # print('这里的图片是什么', image_epoch.shape, image_epoch) # print('到这里标签变成什么了', labels_array.shape, labels_array) image_paths_array = np.expand_dims(np.array(image_epoch), 1) control_value = 14 control_array = np.ones_like(labels_array) * control_value sess.run( enqueue_op, { image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array }) batch_number = 0 while batch_number < epoch_num: tensor_list = [ total_loss, train_op, regularization_losses, prelogits, cross_entropy_mean, prelogits_norm, accuracy, prelogits_center_loss ] feed_dict = { phase_train_placeholder: True, batch_size_placeholder: batch_size } loss_, _, reg_losses_, prelogits_, cross_entropy_mean_, prelogits_norm_, accuracy_, center_loss_ = sess.run( tensor_list, feed_dict=feed_dict) print( 'Epoch: [%d][%d/%d]\t总loss %2.3f\t交叉熵平均 %2.3f\t正则化loss %2.3f\t准确率 %2.3f\t中心loss %2.3f' % (epoch, batch_number + 1, epoch_num, loss_, cross_entropy_mean_, np.sum(reg_losses_), accuracy_, center_loss_)) batch_number += 1 gd = sess.graph.as_graph_def() # fix batch norm nodes for node in gd.node: # print(node.name) if node.op == 'RefSwitch': node.op = 'Switch' for index in range(len(node.input)): if 'moving_' in node.input[index]: node.input[index] = node.input[index] + '/read' elif node.op == 'AssignSub': node.op = 'Sub' if 'use_locking' in node.attr: del node.attr['use_locking'] constant_graph = graph_util.convert_variables_to_constants( sess, sess.graph_def, ['output/output']) with tf.gfile.FastGFile(seave_train_dir + 'face72.pb', mode='wb') as f: f.write(constant_graph.SerializeToString()) checkpoint_path = os.path.join(seave_train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=epoch + y_step) return True
def _read_keyed_batch_examples_helper(file_pattern, batch_size, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, num_threads=1, read_batch_size=1, filter_fn=None, parse_fn=None, setup_shared_queue=False, name=None, seed=None): """Adds operations to read, queue, batch `Example` protos. Args: file_pattern: List of files or patterns of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If `None`, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call `tf.global_variables_initializer()` and run the op in a session. queue_capacity: Capacity for input queue. num_threads: The number of threads enqueuing examples. read_batch_size: An int or scalar `Tensor` specifying the number of records to read at once filter_fn: Filtering function, takes both keys as well `Example` Tensors and returns a boolean mask of the same shape as the input Tensors to be applied for filtering. If `None`, no filtering is done. parse_fn: Parsing function, takes `Example` Tensor returns parsed representation. If `None`, no parsing is done. setup_shared_queue: Whether to set up a shared queue for file names. name: Name of resulting op. seed: An integer (optional). Seed used if randomize_input == True. Returns: Returns tuple of: - `Tensor` of string keys. - String `Tensor` of batched `Example` proto. Raises: ValueError: for invalid inputs. """ # Retrieve files to read. file_names = _get_file_names(file_pattern, randomize_input) # Check input parameters are given and reasonable. if (not queue_capacity) or (queue_capacity <= 0): raise ValueError('Invalid queue_capacity %s.' % queue_capacity) if (batch_size is None) or ( (not isinstance(batch_size, ops.Tensor)) and (batch_size <= 0 or batch_size > queue_capacity)): raise ValueError('Invalid batch_size %s, with queue_capacity %s.' % (batch_size, queue_capacity)) if (read_batch_size is None) or ( (not isinstance(read_batch_size, ops.Tensor)) and (read_batch_size <= 0)): raise ValueError('Invalid read_batch_size %s.' % read_batch_size) if (not num_threads) or (num_threads <= 0): raise ValueError('Invalid num_threads %s.' % num_threads) if (num_epochs is not None) and (num_epochs <= 0): raise ValueError('Invalid num_epochs %s.' % num_epochs) with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope: with ops.name_scope('file_name_queue') as file_name_queue_scope: if setup_shared_queue: file_name_queue = data_flow_ops.FIFOQueue( capacity=1, dtypes=[dtypes.string], shapes=[[]]) enqueue_op = file_name_queue.enqueue( input_pipeline_ops.seek_next(file_names, shuffle=randomize_input, num_epochs=num_epochs, seed=seed)) queue_runner.add_queue_runner( queue_runner.QueueRunner(file_name_queue, [enqueue_op])) else: file_name_queue = input_ops.string_input_producer( constant_op.constant(file_names, name='input'), shuffle=randomize_input, num_epochs=num_epochs, name=file_name_queue_scope, seed=seed) example_list = _get_examples(file_name_queue, reader, num_threads, read_batch_size, filter_fn, parse_fn) enqueue_many = read_batch_size > 1 if num_epochs is None: allow_smaller_final_batch = False else: allow_smaller_final_batch = True # Setup batching queue given list of read example tensors. if randomize_input: if isinstance(batch_size, ops.Tensor): min_after_dequeue = int(queue_capacity * 0.4) else: min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size) queued_examples_with_keys = input_ops.shuffle_batch_join( example_list, batch_size, capacity=queue_capacity, min_after_dequeue=min_after_dequeue, enqueue_many=enqueue_many, name=scope, allow_smaller_final_batch=allow_smaller_final_batch, seed=seed) else: queued_examples_with_keys = input_ops.batch_join( example_list, batch_size, capacity=queue_capacity, enqueue_many=enqueue_many, name=scope, allow_smaller_final_batch=allow_smaller_final_batch) if parse_fn and isinstance(queued_examples_with_keys, dict): queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME) return queued_keys, queued_examples_with_keys return queued_examples_with_keys
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = facenet.get_dataset(args.data_dir) if args.filter_filename: train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The dataset should not be empty' # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many(args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1,), (1,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_png(file_contents) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) # pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor > 0.0: prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer) sess.close() return model_dir
def testContainer(self): """Set containers outside & inside of cond_v2. Make sure the containers are set correctly for both variable creation (tested by variables.Variable) and for stateful ops (tested by FIFOQueue) """ self.skipTest("b/113048653") with ops.Graph().as_default() as g: with self.session(graph=g): v0 = variables.Variable([0]) q0 = data_flow_ops.FIFOQueue(1, dtypes.float32) def container(node): return node.op.get_attr("container") self.assertEqual(compat.as_bytes(""), container(v0)) self.assertEqual(compat.as_bytes(""), container(q0.queue_ref)) def true_fn(): # When this branch is created in cond below, # the container should begin with 'l1' v1 = variables.Variable([1]) q1 = data_flow_ops.FIFOQueue(1, dtypes.float32) with ops.container("l2t"): v2 = variables.Variable([2]) q2 = data_flow_ops.FIFOQueue(1, dtypes.float32) v3 = variables.Variable([1]) q3 = data_flow_ops.FIFOQueue(1, dtypes.float32) self.assertEqual(compat.as_bytes("l1"), container(v1)) self.assertEqual(compat.as_bytes("l1"), container(q1.queue_ref)) self.assertEqual(compat.as_bytes("l2t"), container(v2)) self.assertEqual(compat.as_bytes("l2t"), container(q2.queue_ref)) self.assertEqual(compat.as_bytes("l1"), container(v3)) self.assertEqual(compat.as_bytes("l1"), container(q3.queue_ref)) return constant_op.constant(2.0) def false_fn(): # When this branch is created in cond below, # the container should begin with 'l1' v1 = variables.Variable([1]) q1 = data_flow_ops.FIFOQueue(1, dtypes.float32) with ops.container("l2f"): v2 = variables.Variable([2]) q2 = data_flow_ops.FIFOQueue(1, dtypes.float32) v3 = variables.Variable([1]) q3 = data_flow_ops.FIFOQueue(1, dtypes.float32) self.assertEqual(compat.as_bytes("l1"), container(v1)) self.assertEqual(compat.as_bytes("l1"), container(q1.queue_ref)) self.assertEqual(compat.as_bytes("l2f"), container(v2)) self.assertEqual(compat.as_bytes("l2f"), container(q2.queue_ref)) self.assertEqual(compat.as_bytes("l1"), container(v3)) self.assertEqual(compat.as_bytes("l1"), container(q3.queue_ref)) return constant_op.constant(6.0) with ops.container("l1"): cnd_true = cond_v2.cond_v2(constant_op.constant(True), true_fn, false_fn) self.assertEquals(cnd_true.eval(), 2) cnd_false = cond_v2.cond_v2(constant_op.constant(False), true_fn, false_fn) self.assertEquals(cnd_false.eval(), 6) v4 = variables.Variable([3]) q4 = data_flow_ops.FIFOQueue(1, dtypes.float32) v5 = variables.Variable([4]) q5 = data_flow_ops.FIFOQueue(1, dtypes.float32) self.assertEqual(compat.as_bytes("l1"), container(v4)) self.assertEqual(compat.as_bytes("l1"), container(q4.queue_ref)) self.assertEqual(compat.as_bytes(""), container(v5)) self.assertEqual(compat.as_bytes(""), container(q5.queue_ref))
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This contains most of the synchronization implementation and also wraps the apply_gradients() from the real optimizer. Args: grads_and_vars: List of (gradient, variable) pairs as returned by compute_gradients(). global_step: Optional Variable to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: train_op: The op to dequeue a token so the replicas can exit this batch and start the next one. This is executed by each replica. Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required to check staleness") self._global_step = global_step train_ops = [] aggregated_grad = [] var_list = [] # local_anchor op will be placed on this worker task by default. local_anchor = control_flow_ops.no_op() # Colocating local_step variable prevents it being placed on the PS. distribution_strategy = distribution_strategy_context.get_strategy() with distribution_strategy.extended.colocate_vars_with(local_anchor): self._local_step = variable_scope.variable( initial_value=0, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=global_step.dtype.base_dtype, name="sync_rep_local_step") self.local_step_init_op = state_ops.assign(self._local_step, global_step) chief_init_ops = [self.local_step_init_op] self.ready_for_local_init_op = variables.report_uninitialized_variables( variables.global_variables()) with ops.name_scope(None, self._name): for grad, var in grads_and_vars: var_list.append(var) with ops.device(var.device): # Dense gradients. if grad is None: aggregated_grad.append(None) # pass-through. continue elif isinstance(grad, ops.Tensor): grad_accum = data_flow_ops.ConditionalAccumulator( grad.dtype, shape=var.get_shape(), shared_name=var.name + "/grad_accum") train_ops.append( grad_accum.apply_grad(grad, local_step=self._local_step)) aggregated_grad.append( grad_accum.take_grad(self._replicas_to_aggregate)) else: if not isinstance(grad, indexed_slices.IndexedSlices): raise ValueError("Unknown grad type!") grad_accum = data_flow_ops.SparseConditionalAccumulator( grad.dtype, shape=(), shared_name=var.name + "/grad_accum") train_ops.append( grad_accum.apply_indexed_slices_grad( grad, local_step=self._local_step)) aggregated_grad.append( grad_accum.take_indexed_slices_grad( self._replicas_to_aggregate)) self._accumulator_list.append((grad_accum, var.device)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) # sync_op will be assigned to the same device as the global step. with ops.device(global_step.device), ops.name_scope(""): update_op = self._opt.apply_gradients( aggregated_grads_and_vars, global_step) # Create token queue. with ops.device(global_step.device), ops.name_scope(""): sync_token_queue = (data_flow_ops.FIFOQueue( -1, global_step.dtype.base_dtype, shapes=(), name="sync_token_q", shared_name="sync_token_q")) self._sync_token_queue = sync_token_queue with ops.device(global_step.device), ops.name_scope(""): # Replicas have to wait until they can get a token from the token queue. with ops.control_dependencies(train_ops): token = sync_token_queue.dequeue() train_op = state_ops.assign(self._local_step, token) with ops.control_dependencies([update_op]): # Sync_op needs to insert tokens to the token queue at the end of the # step so the replicas can fetch them to start the next step. tokens = array_ops.fill([self._tokens_per_step], global_step) sync_op = sync_token_queue.enqueue_many((tokens, )) if self._variable_averages is not None: with ops.control_dependencies([sync_op ]), ops.name_scope(""): sync_op = self._variable_averages.apply( self._variables_to_average) self._chief_queue_runner = queue_runner.QueueRunner( sync_token_queue, [sync_op]) for accum, dev in self._accumulator_list: with ops.device(dev): chief_init_ops.append( accum.set_global_step(global_step, name="SetGlobalStep")) self.chief_init_op = control_flow_ops.group(*(chief_init_ops)) self._gradients_applied = True return train_op
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_jpeg(file_contents, channels=3) image = tf.image.resize_images( image, [args.image_size + 22, args.image_size + 22]) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_images( image, [args.image_size, args.image_size]) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference graph if args.optimizer == 'MOMW' or args.optimizer == 'ADAMW' or args.optimizer == 'ADABOUND': print("\nNot using L2 regularization\n") prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=0.0) else: prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) if args.fraction != 2.0: print("\n Using fractional distance metric\n") #embeddings = lk_normalize(prelogits, axis=1, fraction=args.fraction, name='embeddings') embeddings = tf.identity(prelogits, name='embeddings') else: embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha, args.fraction) # USING CYCLICAL LEARNING RATE if args.max_lr != 0: print("\nUsing cyclic learning rate\n") learning_rate = clr.cyclic_learning_rate(global_step, learning_rate_placeholder, max_lr=args.max_lr, step_size=args.cycle_size, mode=args.cycle_policy) else: print("\nUsing exponentially decaying learning rate\n") learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), weight_decay=args.weight_decay) # Create a saver saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss, learning_rate, regularization_losses) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size, args.fraction) return model_dir
def main(pretrained_model: str, logs_base_dir: str = '~/logs/facenet', models_base_dir: str = '~/models/facenet', gpu_memory_fraction: float = 1.0, data_dir: str = '~/datasets/casia/casia_maxpy_mtcnnalign_182_160', model_def: str = 'models.inception_resnet_v1', max_nrof_epochs: int = 500, batch_size: int = 100, image_size: int = 160, epoch_size: int = 1000, embedding_size: int = 128, random_crop: bool = False, random_flip: bool = False, random_rotate: bool = False, use_fixed_image_standardization: bool = False, keep_probability: float = 1.0, weight_decay: float = 0.0, center_loss_factor: float = 0.0, center_loss_alfa: float = 0.95, prelogits_norm_loss_factor: float = 0.0, prelogits_norm_p: float = 1.0, prelogits_hist_max: float = 10.0, optimizer: str = 'ADAGRAD', learning_rate: float = 0.1, learning_rate_decay_epochs: int = 100, learning_rate_decay_factor: float = 1.0, moving_average_decay: float = 0.9999, seed: int = 666, nrof_preprocess_threads: int = 4, log_histograms: bool = False, learning_rate_schedule_file: str = 'data/learning_rate_schedule.txt', filter_filename: str = '', filter_percentile: float = 100.0, filter_min_nrof_images_per_class: int = 0, validate_every_n_epochs: int = 5, validation_set_split_ratio: float = 0.0, min_nrof_val_images_per_class: int = 0, lfw_pairs: str = 'data/pairs.txt', lfw_dir: str = '', lfw_batch_size: int = 100, lfw_nrof_folds: int = 10, lfw_distance_metric: int = 0, lfw_use_flipped_images: bool = False, lfw_subtract_mean: bool = False): """Train with softmax Arguments: pretrained_model {str} -- Load a pretrained model before training starts. Keyword Arguments: logs_base_dir {str} -- Directory where to write event logs. (default: {'~/logs/facenet'}) models_base_dir {str} -- Directory where to write trained models and checkpoints. (default: {'~/models/facenet'}) gpu_memory_fraction {float} -- Upper bound on the amount of GPU memory that will be used by the process. (default: {1.0}) data_dir {str} -- Path to the data directory containing aligned face patches. (default: {'~/datasets/casia/casia_maxpy_mtcnnalign_182_160'}) model_def {str} -- Model definition. Points to a module containing the definition of the inference graph. (default: {'models.inception_resnet_v1'}) max_nrof_epochs {int} -- Number of epochs to run. (default: {500}) batch_size {int} -- Number of images to process in a batch. (default: {100}) image_size {int} -- Image size (height, width) in pixels. (default: {160}) epoch_size {int} -- Number of batches per epoch. (default: {1000}) embedding_size {int} -- Dimensionality of the embedding. (default: {128}) random_crop {bool} -- Performs random cropping of training images. If false, the center image_size pixels from the training images are used. If the size of the images in the data directory is equal to image_size no cropping is performed (default: {False}) random_flip {bool} -- Performs random horizontal flipping of training images. (default: {False}) random_rotate {bool} -- Performs random rotations of training images. (default: {False}) use_fixed_image_standardization {bool} -- Performs fixed standardization of images. (default: {False}) keep_probability {float} -- Keep probability of dropout for the fully connected layer(s). (default: {1.0}) weight_decay {float} -- L2 weight regularization. (default: {0.0}) center_loss_factor {float} -- Center loss factor. (default: {0.0}) center_loss_alfa {float} -- Center update rate for center loss. (default: {0.95}) prelogits_norm_loss_factor {float} -- Loss based on the norm of the activations in the prelogits layer. (default: {0.0}) prelogits_norm_p {float} -- Norm to use for prelogits norm loss. (default: {1.0}) prelogits_hist_max {float} -- The max value for the prelogits histogram. (default: {10.0}) optimizer {str} -- The optimization algorithm to use (default: {'ADAGRAD'}) learning_rate {float} -- Initial learning rate. If set to a negative value a learning rate schedule can be specified in the file "learning_rate_schedule.txt" (default: {0.1}) learning_rate_decay_epochs {int} -- Number of epochs between learning rate decay. (default: {100}) learning_rate_decay_factor {float} -- Learning rate decay factor. (default: {1.0}) moving_average_decay {float} -- Exponential decay for tracking of training parameters. (default: {0.9999}) seed {int} -- Random seed. (default: {666}) nrof_preprocess_threads {int} -- Number of preprocessing (data loading and augmentation) threads. (default: {4}) log_histograms {bool} -- Enables logging of weight/bias histograms in tensorboard. (default: {False}) learning_rate_schedule_file {str} -- File containing the learning rate schedule that is used when learning_rate is set to to -1. (default: {'data/learning_rate_schedule.txt'}) filter_filename {str} -- File containing image data used for dataset filtering (default: {''}) filter_percentile {float} -- Keep only the percentile images closed to its class center (default: {100.0}) filter_min_nrof_images_per_class {int} -- Keep only the classes with this number of examples or more (default: {0}) validate_every_n_epochs {int} -- Number of epoch between validation (default: {5}) validation_set_split_ratio {float} -- The ratio of the total dataset to use for validation (default: {0.0}) min_nrof_val_images_per_class {int} -- Classes with fewer images will be removed from the validation set (default: {0}) lfw_pairs {str} -- The file containing the pairs to use for validation. (default: {'data/pairs.txt'}) lfw_dir {str} -- Path to the data directory containing aligned face patches. (default: {''}) lfw_batch_size {int} -- Number of images to process in a batch in the LFW test set. (default: {100}) lfw_nrof_folds {int} -- Number of folds to use for cross validation. Mainly used for testing. (default: {10}) lfw_distance_metric {int} -- Type of distance metric to use. 0: Euclidian, 1:Cosine similarity distance. (default: {0}) lfw_use_flipped_images {bool} -- Concatenates embeddings for the image and its horizontally flipped counterpart. (default: {False}) lfw_subtract_mean {bool} -- Subtract feature mean before calculating distance. (default: {False}) Returns: [type] -- [description] """ network = importlib.import_module(model_def) image_size = (image_size, image_size) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(logs_base_dir), subdir) os.makedirs(log_dir, exist_ok=True) model_dir = os.path.join(os.path.expanduser(models_base_dir), subdir) os.makedirs(model_dir, exist_ok=True) stat_file_name = os.path.join(log_dir, 'stat.h5') # Write arguments to a text file # facenet.write_arguments_to_file( # args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory # src_path, _ = os.path.split(os.path.realpath(__file__)) # facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=seed) random.seed(seed) dataset = facenet.get_dataset(data_dir) if filter_filename: dataset = filter_dataset(dataset, os.path.expanduser(filter_filename), filter_percentile, filter_min_nrof_images_per_class) if validation_set_split_ratio > 0.0: train_set, val_set = facenet.split_dataset( dataset, validation_set_split_ratio, min_nrof_val_images_per_class, 'SPLIT_IMAGES') else: train_set, val_set = dataset, [] image_list, label_list = facenet.get_image_paths_and_labels(train_set) val_image_list, val_label_list = facenet.get_image_paths_and_labels( val_set) assert len(image_list) > 0, 'The training set should not be empty' nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if pretrained_model: pretrained_model = os.path.expanduser(pretrained_model) print('Pre-trained model: %s' % pretrained_model) if lfw_dir: print('LFW directory: %s' % lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(lfw_pairs)) # Get the paths for the corresponding images lfw_paths, lfw_labels = lfw.get_paths(os.path.expanduser(lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(seed) global_step = tf.Variable(0, trainable=False) # Create a queue that produces indices into the image_list and # label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many(batch_size * epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') nrof_preprocess_threads = 4 input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') image_batch, label_batch = facenet.create_input_pipeline( input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Number of classes in training set: %d' % nrof_classes) print('Number of examples in training set: %d' % len(image_list)) print('Number of classes in validation set: %d' % len(val_set)) print('Number of examples in validation set: %d' % len(val_image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference(image_batch, keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=embedding_size, weight_decay=weight_decay) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Norm for the prelogits eps = 1e-4 prelogits_norm = tf.reduce_mean( tf.norm(tf.abs(prelogits) + eps, ord=prelogits_norm_p, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * prelogits_norm_loss_factor) # Add center loss prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * center_loss_factor) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, learning_rate_decay_epochs * epoch_size, learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and # updates the model parameters train_op = facenet.train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, tf.global_variables(), log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') nrof_steps = max_nrof_epochs * epoch_size # Validate every validate_every_n_epochs as well as in the last # epoch nrof_val_samples = int( math.ceil(max_nrof_epochs / validate_every_n_epochs)) stat = { 'loss': np.zeros((nrof_steps, ), np.float32), 'center_loss': np.zeros((nrof_steps, ), np.float32), 'reg_loss': np.zeros((nrof_steps, ), np.float32), 'xent_loss': np.zeros((nrof_steps, ), np.float32), 'prelogits_norm': np.zeros((nrof_steps, ), np.float32), 'accuracy': np.zeros((nrof_steps, ), np.float32), 'val_loss': np.zeros((nrof_val_samples, ), np.float32), 'val_xent_loss': np.zeros((nrof_val_samples, ), np.float32), 'val_accuracy': np.zeros((nrof_val_samples, ), np.float32), 'lfw_accuracy': np.zeros((max_nrof_epochs, ), np.float32), 'lfw_valrate': np.zeros((max_nrof_epochs, ), np.float32), 'learning_rate': np.zeros((max_nrof_epochs, ), np.float32), 'time_train': np.zeros((max_nrof_epochs, ), np.float32), 'time_validate': np.zeros((max_nrof_epochs, ), np.float32), 'time_evaluate': np.zeros((max_nrof_epochs, ), np.float32), 'prelogits_hist': np.zeros((max_nrof_epochs, 1000), np.float32), } for epoch in range(1, max_nrof_epochs + 1): step = sess.run(global_step, feed_dict=None) # Train for one epoch t = time.time() cont = train( args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, learning_rate_schedule_file, stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, random_rotate, random_crop, random_flip, prelogits_norm, prelogits_hist_max, use_fixed_image_standardization) stat['time_train'][epoch - 1] = time.time() - t if not cont: break t = time.time() if len(val_image_list) > 0 and ( (epoch - 1) % validate_every_n_epochs == validate_every_n_epochs - 1 or epoch == max_nrof_epochs): validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, validate_every_n_epochs, use_fixed_image_standardization) stat['time_validate'][epoch - 1] = time.time() - t # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) # Evaluate on LFW t = time.time() if lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, lfw_paths, lfw_labels, lfw_batch_size, lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, lfw_distance_metric, lfw_subtract_mean, lfw_use_flipped_images, use_fixed_image_standardization) stat['time_evaluate'][epoch - 1] = time.time() - t print('Saving statistics') with h5py.File(stat_file_name, 'w') as f: for key, value in stat.iteritems(): f.create_dataset(key, data=value) return model_dir
def main(): model_def = 'inception_resnet_v1' network = importlib.import_module(model_def) gpu_memory_fraction = 0.96 # Learning params FC_SIZE = 1 learning_rate = 0.0001 batch_size = 8 epoch_size = 2 keep_probability = 1.0 weight_decay = 0.0 # Read the file containing the pairs used for testing pairs_train, labels_train, labels_sigle_train = read_pairs_path_label_test( image_directory, train_filename) pairs_validation, labels_validation, labels_sigle_validation = read_pairs_path_label_test( image_directory, validation_filename) with tf.Graph().as_default(): image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 2), name='image_path') # batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 2), name='labels') # # read image path input_queue = data_flow_ops.FIFOQueue(capacity=300000, dtypes=[tf.string, tf.int32], shapes=[(2, ), (2, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 3 images_labels = [] for _ in range(nrof_preprocess_threads): filenames, labels = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) # pylint: disable=no-member image.set_shape((224, 224, 3)) images.append(tf.image.per_image_standardization(image)) images_labels.append([images, labels]) image_batch, labels_batch = tf.train.batch_join( images_labels, batch_size=batch_size_placeholder, shapes=[(224, 224, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the graph prelogits, _ = network.inference(image_batch, keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=1, weight_decay=weight_decay) predictions = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # # model_ = Model(inputs=resnet.input, outputs=predictions) preds_left, preds_right = tf.unstack( tf.reshape(predictions, [-1, 2, 1]), 2, 1) labels_1, _ = tf.unstack(tf.reshape(labels_batch, [-1, 2, 1]), 2, 1) difference = tf.sigmoid(tf.subtract(preds_left, preds_right)) loss = log_loss_(labels_1, difference) # # optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss) train_op = _train_op(loss, 'MOM', learning_rate, tf.global_variables()) # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): # Training and validation loop max_nrof_epochs = 30 epoch = 0 while epoch < max_nrof_epochs: batch_number = 0 nrof_pairs = len(pairs_train) while batch_number < epoch_size: nrof_batches = int(np.ceil(nrof_pairs * 2 / batch_size)) pair_paths = list(itertools.chain(*pairs_train)) labels_array = np.reshape(labels_train, (-1, 2)) pair_paths_array = np.reshape( np.expand_dims(np.array(pair_paths), 1), (-1, 2)) sess.run(enqueue_op, feed_dict={ image_paths_placeholder: pair_paths_array, labels_placeholder: labels_array }) nrof_examples = len(pair_paths) train_time = 0 batch_err = 0 batch_preds = 0 i = 0 while i < nrof_batches: start_time = time.time() batch_size = min(nrof_examples - i * batch_size, batch_size) # preds = sess.run(predictions, feed_dict={batch_size_placeholder:batch_size}) # err, _ = sess.run([loss, train_op], feed_dict={batch_size_placeholder:batch_size, phase_train_placeholder: True}) err, _, preds, lab = sess.run( [loss, train_op, predictions, labels_batch], feed_dict={ batch_size_placeholder: batch_size, phase_train_placeholder: True }) duration = time.time() - start_time i += 1 train_time += duration batch_err += err batch_preds += preds print('Epoch: [%d][%d/%d]\tTime %.3f\tLoss %2.3f\t' % (epoch, batch_number + 1, epoch_size, train_time, batch_err / i)) batch_number += 1 # ### validate ########## nro_size = pairs_validation.shape[0] labels_array = np.reshape(labels_validation, (-1, 2)) image_paths_array = np.reshape( np.expand_dims(np.array(pairs_validation), 1), (-1, 2)) sess.run(enqueue_op, feed_dict={ image_paths_placeholder: image_paths_array, labels_placeholder: labels_array }) # for i in xrange(nrof_batches): # batch_size = min(nrof_images - i * batch_size, batch_size) # emb, lab = sess.run([embeddings, labels_batch], feed_dict={batch_size_placeholder: batch_size}) feed_dict = { batch_size_placeholder: nro_size, phase_train_placeholder: False } err, _, preds, lab = sess.run( [loss, train_op, predictions, labels_batch], feed_dict=feed_dict) y = np.reshape(lab, (lab.shape[0], 1)) accuracy = compute_accuracy(preds, y) print('epoch %d Accuracy validate set %0.2f' % (epoch, accuracy)) epoch += 1
def main(args): #Importing network based on model.def path network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) #Create a model file directory if not os.path.isdir(model_dir): os.makedirs(model_dir) palmPrintsNet.write_arguments_to_file( args, os.path.join(log_dir, 'arguments.txt')) src_path, _ = os.path.split(os.path.realpath(__file__)) palmPrintsNet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = palmPrintsNet.get_dataset(args.data_dir) #Total class's number nrof_classes = len(train_set) print('model_path: %s' % model_dir) print('log_path: %s' % log_dir) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) image_list, label_list = palmPrintsNet.get_image_paths_and_labels( train_set) assert len(image_list) > 0, 'The dataset should not be empty' labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=1) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(160, 160, 1), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('the number of class: %d' % nrof_classes) print('the number of image: %d' % len(image_list)) #perlogits is a 128-v feature vector prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') #center loss if args.center_loss_factor > 0.0: prelogits_center_loss, _ = palmPrintsNet.center_loss( prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection( tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) #total loss regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') #train train_op = palmPrintsNet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) return model_dir
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set_ID = facenet.get_dataset(args.data_dir_ID) train_set_camera = facenet.get_dataset(args.data_dir_camera) logger.info('Model directory: %s' % model_dir) logger.info('Log directory: %s' % log_dir) if args.pretrained_model: logger.info('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: logger.info('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) # associative, fengchen assoc = Associative(network, args) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder_ID = tf.placeholder(tf.string, shape=(None, 3), name='image_paths_ID') image_paths_placeholder_camera = tf.placeholder( tf.string, shape=(None, 3), name='image_paths_camera') image_paths_placeholder_valid = tf.placeholder( tf.string, shape=(None, 3), name='image_paths_valid') labels_placeholder_ID = tf.placeholder(tf.int64, shape=(None, 3), name='labels_ID') labels_placeholder_camera = tf.placeholder(tf.int64, shape=(None, 3), name='labels_camera') labels_placeholder_valid = tf.placeholder(tf.int64, shape=(None, 3), name='labels_valid') input_queue_ID = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) input_queue_camera = data_flow_ops.FIFOQueue( capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) input_queue_valid = data_flow_ops.FIFOQueue( capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op_ID = input_queue_ID.enqueue_many( [image_paths_placeholder_ID, labels_placeholder_ID]) enqueue_op_camera = input_queue_camera.enqueue_many( [image_paths_placeholder_camera, labels_placeholder_camera]) enqueue_op_valid = input_queue_valid.enqueue_many( [image_paths_placeholder_valid, labels_placeholder_valid]) nrof_preprocess_threads = 4 images_and_labels_ID = [] images_and_labels_camera = [] images_and_labels_valid = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue_ID.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels_ID.append([images, label]) for _ in range(nrof_preprocess_threads): filenames, label = input_queue_camera.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) # pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels_camera.append([images, label]) for _ in range(nrof_preprocess_threads): filenames, label = input_queue_valid.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) # pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels_valid.append([images, label]) image_batch_ID, labels_batch_ID = tf.train.batch_join( images_and_labels_ID, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch_ID = tf.identity(image_batch_ID, 'image_batch_ID') image_batch_ID = tf.identity(image_batch_ID, 'input_ID') labels_batch_ID = tf.identity(labels_batch_ID, 'label_batch_ID') image_batch_camera, labels_batch_camera = tf.train.batch_join( images_and_labels_camera, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch_camera = tf.identity(image_batch_camera, 'image_batch_camera') image_batch_camera = tf.identity(image_batch_camera, 'input_camera') labels_batch_camera = tf.identity(labels_batch_camera, 'label_batch_camera') image_batch_valid, labels_batch_valid = tf.train.batch_join( images_and_labels_valid, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch_valid = tf.identity(image_batch_valid, 'image_batch_valid') labels_batch_valid = tf.identity(labels_batch_valid, 'label_batch_valid') # Build the inference graph prelogits_ID, _, _, _, _ = network.inference( image_batch_ID, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) prelogits_camera, _, _, _, _ = network.inference( image_batch_camera, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) prelogits_valid, _, _, _, _ = network.inference( image_batch_valid, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings_ID = tf.nn.l2_normalize(prelogits_ID, 1, 1e-10, name='embeddings_ID') embeddings_camera = tf.nn.l2_normalize(prelogits_camera, 1, 1e-10, name='embeddings_camera') embeddings_valid = tf.nn.l2_normalize(prelogits_valid, 1, 1e-10, name='embeddings_valid') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor_ID, positive_ID, negative_ID = tf.unstack( tf.reshape(embeddings_ID, [-1, 3, args.embedding_size]), 3, 1) triplet_loss_ID = facenet.triplet_loss(anchor_ID, positive_ID, negative_ID, args.alpha) anchor_camera, positive_camera, negative_camera = tf.unstack( tf.reshape(embeddings_camera, [-1, 3, args.embedding_size]), 3, 1) triplet_loss_camera = facenet.triplet_loss(anchor_camera, positive_camera, negative_camera, args.alpha) images_mmd_ID, images_mmd_camera, _, _ = assoc.get_image_and_label() feature_map3_ID, _, _, _, _ = network.inference( images_mmd_ID, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) feature_map3_camera, _, _, _, _ = network.inference( images_mmd_camera, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) feature_map3_ID = tf.nn.l2_normalize(feature_map3_ID, 1, 1e-10, name='feature_map3_ID') feature_map3_camera = tf.nn.l2_normalize(feature_map3_camera, 1, 1e-10, name='feature_map3_camera') loss_feature_map3 = losses.mmd_loss(feature_map3_ID, feature_map3_camera) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # # associative, fengchen # associative_loss = assoc.loss() * 10 # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) triplet_loss = tf.add_n([triplet_loss_ID] + [triplet_loss_camera] + regularization_losses, name='triplet_loss') # associative, fengchen loss_total = tf.add_n([triplet_loss_ID] + [triplet_loss_camera] + [loss_feature_map3] + regularization_losses, name='loss_total') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(loss_total, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) train_op_triplet = facenet.train(triplet_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: logger.info('Restoring pretrained model: %s' % args.pretrained_model) saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) saver.restore(sess, os.path.expanduser(args.pretrained_model)) saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set_ID, train_set_camera, epoch, image_paths_placeholder_ID, image_paths_placeholder_camera, labels_placeholder_ID, labels_placeholder_camera, labels_batch_ID, labels_batch_camera, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op_ID, enqueue_op_camera, global_step, embeddings_ID, embeddings_camera, triplet_loss, loss_total, triplet_loss_ID, triplet_loss_camera, loss_feature_map3, regularization_losses, train_op, train_op_triplet, summary_writer, args.learning_rate_schedule_file, args.embedding_size) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, lfw_paths, embeddings_valid, labels_batch_valid, image_paths_placeholder_valid, labels_placeholder_valid, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op_valid, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) return model_dir
def main(data_dir, log_dir, model_dir, batch_size_train, epoch_size_train): """ main process for tripless lot train :return: """ batch_size_train = int(batch_size_train) epoch_size_train = int(epoch_size_train) network = importlib.import_module(constants.MODEL_DEF_DEFAULT) subdir = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S') logdir = log_dir print(logdir) if not os.path.isdir(logdir): os.makedirs(logdir) #model_dir = os.path.join(os.path.expanduser(constants.MODELS_BASE_DIR_DEFAULT), subdir) if not os.path.isdir(model_dir): os.makedirs(model_dir) np.random.seed(seed=constants.SEED_DEFAULT) # Load traing set print('Data dir : {0}'.format(data_dir)) train_set = face_net.load_dataset(data_dir) print('Model dir : {0}'.format(model_dir)) print('Log dir : {0}'.format(logdir)) # TODO load pre-trained model # Load LFW if constants.LFW_DIR_DEFAULT: print('LFW dir : {0}'.format(constants.LFW_DIR_DEFAULT)) # Read the file which contain the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(constants.LFW_PAIRS_DEFAULT)) lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(constants.LFW_DIR_DEFAULT), pairs) with tf.Graph().as_default(): tf.set_random_seed(constants.SEED_DEFAULT) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='labels') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3,), (3,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, labels_placeholder]) nrof_preprocess_thread = constants.LFW_NROF_PREPROCESS_THREAD_DEFAULT images_and_labels = [] for i in range(nrof_preprocess_thread): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_content = tf.read_file(filename) image = tf.to_float(tf.image.decode_image(file_content, channels=3)) if constants.RANDOM_CROP_DEFAULT: image = tf.random_crop(image, [constants.IMAGE_SIZE, constants.IMAGE_SIZE, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, constants.IMAGE_SIZE, constants.IMAGE_SIZE) if constants.RANDOM_FLIP_DEFAULT: image = tf.image.random_flip_left_right(image) # Pylink : disable=no-member image.set_shape((constants.IMAGE_SIZE, constants.IMAGE_SIZE, 3)) images.append(image) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(constants.IMAGE_SIZE, constants.IMAGE_SIZE, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_thread * batch_size_train, allow_smaller_final_batch=True) # TODO something strange image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Buld the inference graph prelogits, _ = network.inference(image_batch, constants.KEEP_PROBABILITY_DEFAULT, phase_train=phase_train_placeholder, bottleneck_layer_size=constants.EMBEDDING_SIZE_DEFAULT, weight_decay=constants.WEIGHT_DECAY_DEFAULT) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1, 3, constants.EMBEDDING_SIZE_DEFAULT]), 3, 1) triplet_loss = face_net.triplet_lot(anchor, positive, negative, constants.ALPHA_DEFAULT) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, constants.LEARNING_RATE_DECAY_EPOCHS_DEFAULT * epoch_size_train, constants.LEARNING_RATE_DECAY_EPOCHS_FACTOR, staircase=True) tf.summary.scalar('learing_rate', learning_rate) # Calculate the total losses regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_loss, name='total_loss') # Build a Graph that train the model with one batch of example and update the model parameter train_op = face_net.train(total_loss, global_step, constants.OPTIMIZER_DEFAULT, learning_rate, constants.MOVING_AVERAGE_DECAY_DEFAULT, tf.global_variables(), log_dir) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of summaries summary_op = tf.summary.merge_all() # Start running operations on the Graph gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=constants.GPU_MEMORY_FRACTION_DEFAULT) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(logdir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if constants.PRETRAINED_MODEL_DEFAULT: print('Restoring pretrained model: {0}'.format(constants.PRETRAINED_MODEL_DEFAULT)) saver.restore(sess, os.path.expanduser(constants.PRETRAINED_MODEL_DEFAULT)) # Training and validation loop epoch = 0 while epoch < constants.MAX_NROF_EPOCHS_DEFAULT: step = sess.run(global_step, feed_dict=None) epoch = step / epoch_size_train print('epoch for train {0}'.format(epoch)) # Training for each one epoch train(sess, batch_size_train, epoch_size_train, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, constants.LEARNING_RATE_SCHEDULE_FILE_DEFAULT, constants.EMBEDDING_SIZE_DEFAULT, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it does not exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LW if constants.LFW_DIR_DEFAULT: evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, batch_size_train, constants.LFW_NROF_FOLDS_DEFAULT, logdir, step, summary_writer, constants.EMBEDDING_SIZE_DEFAULT) print('End main process.') return model_dir
def apply_gradients(self, grads_and_vars, global_step=None, name=None): """Apply gradients to variables. This contains most of the synchronization implementation and also wraps the apply_gradients() from the real optimizer. Args: grads_and_vars: List of (gradient, variable) pairs as returned by compute_gradients(). global_step: Optional Variable to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: train_op: The op to dequeue a token so the replicas can exit this batch and start the next one. This is executed by each replica. Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required to check staleness") self._global_step = global_step train_ops = [] aggregated_grad = [] inputs = [] var_list = [] for x in grads_and_vars: inputs.extend(list(x)) with ops.device(global_step.device): self._local_steps = variables.Variable( array_ops.zeros( [self._total_num_replicas], dtype=global_step.dtype), trainable=False, name="local_steps") # Check staleness. Note that this has to be ref(), otherwise identity will # be accessed and it will be old values. local_step = array_ops.slice(self._local_steps.ref(), array_ops.reshape(self._replica_id, (1,)), [1], name="get_local_step") local_step = array_ops.reshape(local_step, ()) is_stale = math_ops.less(local_step, global_step) with ops.op_scope(inputs, None, self._name): for grad, var in grads_and_vars: var_list.append(var) with ops.device(var.device): if isinstance(grad, ops.Tensor): gradient_queue = (data_flow_ops.FIFOQueue(self._tokens_per_step * 2, grad.dtype, shapes=var.get_shape(), shared_name=var.name)) self._one_element_queue_list.append((gradient_queue, var.device)) train_ops.append(gradient_queue.enqueue([grad])) # Aggregate all gradients gradients = gradient_queue.dequeue_many( self._replicas_to_aggregate) aggregated_grad.append(math_ops.reduce_sum(gradients, [0])) elif grad is None: aggregated_grad.append(None) # pass-through. else: if not isinstance(grad, ops.IndexedSlices): raise ValueError("Unknown grad type!") aggregated_grad.append(self._aggregate_sparse_grad(grad, var, train_ops)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) # sync_op will be assigned to the same device as the global step. with ops.device(global_step.device), ops.name_scope(""): update_op = self._opt.apply_gradients(aggregated_grads_and_vars, global_step) # Create token queue. with ops.device(global_step.device), ops.name_scope(""): sync_token_queue = ( data_flow_ops.FIFOQueue(-1, global_step.dtype.base_dtype, shapes=(), shared_name="sync_token_q")) self._sync_token_queue = sync_token_queue # dummy_queue is passed to the queue runner. Don't use the real queues # because the queue runner doesn't automatically reopen it once it # closed queues in PS devices. dummy_queue = ( data_flow_ops.FIFOQueue(1, types_pb2.DT_INT32, shapes=(), shared_name="dummy_queue")) # Clear all the gradients queues in case there are stale gradients. clear_queue_ops = [] with ops.control_dependencies([update_op]): for queue, dev in self._one_element_queue_list: with ops.device(dev): stale_grads = queue.dequeue_many(queue.size()) clear_queue_ops.append(stale_grads) for queue, dev in self._sparse_grad_queues_and_devs: with ops.device(dev): _, stale_indices = queue.dequeue_many(queue.size()) clear_queue_ops.append(stale_indices) with ops.device(global_step.device): self._clean_up_op = control_flow_ops.abort( error_msg="From sync_replicas") # According to the staleness, select between the enqueue op (real_grad) # or no-op (no_op_grad). Effectively dropping all the stale gradients. no_op_grad = lambda: [control_flow_ops.no_op(name="no_grad_enqueue")] real_grad = lambda: [control_flow_ops.group(*train_ops)] final_train_ops = control_flow_ops.cond(is_stale, no_op_grad, real_grad) with ops.device(global_step.device), ops.name_scope(""): # Replicas have to wait until they can get a token from the token queue. with ops.control_dependencies([final_train_ops]): token = sync_token_queue.dequeue() train_op = state_ops.scatter_update(self._local_steps, self._replica_id, token) with ops.control_dependencies(clear_queue_ops): # Sync_op needs to insert tokens to the token queue at the end of the # step so the replicas can fetch them to start the next step. # Note that ref() is used to avoid reading from the identity with old # the step. tokens = array_ops.fill([self._tokens_per_step], global_step.ref()) sync_op = sync_token_queue.enqueue_many((tokens,)) if self._variable_averages is not None: with ops.control_dependencies([sync_op]), ops.name_scope(""): sync_op = self._variable_averages.apply( self._variables_to_average) self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue, [sync_op]) self._gradients_applied = True return train_op
def simple_fn(x): del x queue = data_flow_ops.FIFOQueue(10, dtypes.int32, ()) # Blocking return queue.dequeue_many(5)
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir(model_dir): os.makedirs(model_dir) np.random.seed(seed=args.seed) random.seed(args.seed) train_set = facenet.get_dataset(args.data_dir) if args.filter_filename: train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The dataset should not be empty' labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=256000, dtypes=[tf.string, tf.int64], shapes=[(1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.cast( tf.image.decode_image(file_contents, channels=3), tf.float32) # if args.random_crop: # image = tf.random_crop(image, [args.image_size, args.image_size, 3]) # #image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) # else: # image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #image = tf.image.random_brightness(image,max_delta=30) #image = tf.image.random_contrast(image,lower=0.8,upper=1.2) #image = tf.image.random_saturation(image,lower=0.8,upper=1.2) image.set_shape((112, 96, 3)) images.append(tf.subtract(image, 127.5) * 0.0078125) images_and_labels.append([images, label]) image_batch, label_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(112, 96, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') AM_logits = AM_logits_compute(embeddings, label_batch, args, nrof_classes) #AM_logits = Arc_logits(embeddings, label_batch, args, nrof_classes) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=AM_logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') #print('test',tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) for weights in slim.get_variables_by_name('kernel'): kernel_regularization = tf.contrib.layers.l2_regularizer( args.weight_decay)(weights) print(weights) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, kernel_regularization) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if args.weight_decay == 0: total_loss = tf.add_n([cross_entropy_mean], name='total_loss') else: total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') tf.add_to_collection('losses', total_loss) #define two saver in case under 'finetuning on different dataset' situation saver_load = tf.train.Saver(tf.trainable_variables(), max_to_keep=1) saver_save = tf.train.Saver(tf.trainable_variables(), max_to_keep=1) #train_op = facenet.train(total_loss, global_step, args.optimizer, # learning_rate, args.moving_average_decay, tf.trainable_variables(), args.log_histograms) #train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss,global_step = global_step,var_list=tf.trainable_variables()) train_op = tf.train.MomentumOptimizer( learning_rate, momentum=0.9).minimize(total_loss, global_step=global_step, var_list=tf.trainable_variables()) summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver_load.restore(sess, pretrained_model) print('Running training') epoch = 0 best_accuracy = 0.0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size train(args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file) print('validation running...') if args.lfw_dir: #best_accuracy = evaluate_double(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, # label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer,best_accuracy, saver_save,model_dir,subdir,image_batch,args) best_accuracy = evaluate( sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, best_accuracy, saver_save, model_dir, subdir) return model_dir
def apply_gradients(self, grads_and_vars, worker_id, global_step=None, name=None, collect_cdfs=False): """Apply gradients to variables. This contains most of the synchronization implementation and also wraps the apply_gradients() from the real optimizer. Args: grads_and_vars: List of (gradient, variable) pairs as returned by compute_gradients(). global_step: Optional Variable to increment by one after the variables have been updated. name: Optional name for the returned operation. Default to the name passed to the Optimizer constructor. Returns: train_op: The op to dequeue a token so the replicas can exit this batch and start the next one. This is executed by each replica. Raises: ValueError: If the grads_and_vars is empty. ValueError: If global step is not provided, the staleness cannot be checked. """ if not grads_and_vars: raise ValueError("Must supply at least one variable") if global_step is None: raise ValueError("Global step is required to check staleness") self._global_step = global_step train_ops = [] aggregated_grad = [] var_list = [] # worker_id_list_printer = logging_ops.Print(global_step, # [a for a in self._worker_idx_list] + [worker_id] + [global_step], # message="Worker ID list status") # train_ops.append(worker_id_list_printer) self._local_step = variables.Variable( initial_value=0, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=global_step.dtype.base_dtype, name="sync_rep_local_step") self.local_step_init_op = state_ops.assign(self._local_step, global_step._ref()) chief_init_ops = [self.local_step_init_op] self.ready_for_local_init_op = variables.report_uninitialized_variables( variables.all_variables()) # The wait op waits for the current worker to dequeue a token from its respective token queue self._wait_op = self._sync_token_queues[worker_id].dequeue() # Replicas have to wait until they can get a token from the token queue # BEFORE begining to compute gradients. with ops.device(global_step.device): queue_size = self._sync_token_queues[worker_id].size() update_local_step_op = state_ops.assign(self._local_step, global_step._ref()) # Gradient accum creation with ops.name_scope(None, self._name): worker_idx_list = [] worker_counter = 0 for grad, var in grads_and_vars: var_list.append(var) tf.logging.info("Grad " + str(grad) + " assigned to " + str(var.device)) with ops.device(var.device): if grad is None: continue elif isinstance(grad, ops.Tensor): grad_accum = data_flow_ops.ConditionalAccumulator( grad.dtype, shape=var.get_shape(), shared_name=var.name + "/grad_accum") else: if not isinstance(grad, ops.IndexedSlices): raise ValueError("Unknown grad type!") grad_accum = data_flow_ops.SparseConditionalAccumulator( grad.dtype, shape=(), shared_name=var.name + "/grad_accum") self._accumulator_list.append((grad_accum, var)) with ops.device(global_step.device): worker_idx_list.append(worker_id) worker_counter += 1 worker_id_list_printer = logging_ops.Print(global_step, [a for a in worker_idx_list] + [worker_id] + [global_step], message="Worker ID list status") train_ops.append(worker_id_list_printer) counter_printer = logging_ops.Print(global_step, [worker_counter], message="Test for the counter") train_ops.append(counter_printer) """# Phase 1 gradient computation with ops.control_dependencies([update_local_step_op]): for index, (grad, var) in enumerate(grads_and_vars): with ops.device(var.device): if grad is None: continue elif isinstance(grad, ops.Tensor): grad_accum = self._accumulator_list[index][0] train_ops.append(grad_accum.apply_grad(grad, local_step=self._local_step._ref())) else: if not isinstance(grad, ops.IndexedSlices): raise ValueError("Unknown grad type!") grad_accum = self._accumulator_list[index][0] train_ops.append(grad_accum.apply_indexed_slices_grad( grad, local_step=self._local_step._ref()))""" # Phase 1 gradient computation with ops.control_dependencies([update_local_step_op]): for index, (grad, var) in enumerate(grads_and_vars): print_start_op = logging_ops.Print(global_step, [global_step], message="Starting to apply grads for variable %d" % index) train_ops.append(print_start_op) with ops.device(var.device): if grad is None: continue elif isinstance(grad, ops.Tensor): grad_accum = self._accumulator_list[index][0] with ops.control_dependencies([print_start_op]): with tf.device("job:worker/task:%d" % worker_id): apply_grad_op = grad_accum.apply_grad(grad, local_step=self._local_step._ref()) with ops.control_dependencies([apply_grad_op]): accum_sizes_printer = logging_ops.Print(global_step, [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step], message="Accum aggregated status") ret = tf.cond(tf.greater(self._accumulator_list[0][0].num_accumulated(), self._constant_for_comparison), lambda: tf.constant(1), lambda: tf.constant(0)) notification_printer = logging_ops.Print(global_step, [ret], message="should stop notification") train_ops.append(notification_printer) '''else: notification_printer = logging_ops.Print(global_step, ["shouldn't stop"], message="shouldn't stop notification") train_ops.append(notification_printer)''' train_ops.append(accum_sizes_printer) # worker_id_list_printer = logging_ops.Print(global_step, # [len(self._worker_list)] + [worker_id] + [global_step], # message="Worker ID list status") # train_ops.append(worker_id_list_printer) finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index) train_ops.append(finished_print_op) else: if not isinstance(grad, ops.IndexedSlices): raise ValueError("Unknown grad type!") grad_accum = self._accumulator_list[index][0] with ops.control_dependencies([print_start_op]): with tf.device("job:worker/task:%d" % worker_id): apply_grad_op = grad_accum.apply_indexed_slices_grad( grad, local_step=self._local_step._ref()) with ops.control_dependencies([apply_grad_op]): accum_sizes_printer_parse = logging_ops.Print(global_step, [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step], message="Accum aggregated status") ret_sparse = tf.cond(tf.greater(self._accumulator_list[0][0].num_accumulated(), self._constant_for_comparison), lambda: tf.constant(1), lambda: tf.constant(0)) notification_printer_sparse = logging_ops.Print(global_step, [ret_sparse], message="should stop notification") train_ops.append(notification_printer_sparse) #else: # notification_printer_sparse = logging_ops.Print(global_step, ["shouldn't stop"], message="shouldn't stop notification") # train_ops.append(notification_printer_sparse) train_ops.append(accum_sizes_printer_parse) # worker_id_list_printer_sparse = logging_ops.Print(global_step, # [len(self._worker_list)] + [worker_id] + [global_step], # message="Worker ID list status") # train_ops.append(worker_id_list_printer_sparse) finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index) train_ops.append(finished_print_op) with ops.control_dependencies([apply_grad_op]): accum_sizes_printer = logging_ops.Print(global_step, [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step], message="Accum aggregated status on ps") train_ops.append(accum_sizes_printer) # Phase 2 gradient applying for index, (grad, var) in enumerate(grads_and_vars): with ops.device(var.device): grad_accum = self._accumulator_list[index][0] if grad is None: aggregated_grad.append(None) elif isinstance(grad, ops.Tensor): if collect_cdfs: aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas)) else: aggregated_grad.append(grad_accum.take_grad(1)) else: if collect_cdfs: aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas)) else: aggregated_grad.append(grad_accum.take_indexed_slices_grad(1)) aggregated_grads_and_vars = zip(aggregated_grad, var_list) # Some debug operations self.print_sizes = logging_ops.Print(global_step, [self._sync_token_queues[i].size() for i in range(self._total_num_replicas)], message="queue sizes") self.print_accum_sizes = logging_ops.Print(self._local_step, [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id], message="Accum sizes") self.print_local_step = logging_ops.Print(self._local_step, [self._local_step._ref(), global_step._ref()], message="local vs global step") # sync_op will be assigned to the same device as the global step. with ops.device(global_step.device), ops.name_scope(""): with ops.control_dependencies([self.print_accum_sizes]): update_op = self._opt.apply_gradients(aggregated_grads_and_vars, global_step) self._update_op = update_op with ops.control_dependencies([update_op]): sync_op = [] for cur_worker_id in range(self._total_num_replicas): sync_op.append(self._sync_token_queues[cur_worker_id].enqueue(global_step)) sync_op = control_flow_ops.group(*(sync_op)) # dummy_queue is passed to the queue runner. Don't use the real queues # because the queue runner doesn't automatically reopen it once it # closed queues in PS devices. dummy_queue = ( data_flow_ops.FIFOQueue(1, types_pb2.DT_INT32, shapes=(), shared_name="dummy_queue")) self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue, [sync_op]) with ops.device(global_step.device), ops.name_scope(""): with ops.control_dependencies(train_ops): # Worker finished applying gradients. Add token to phase1_finished_queue train_op = logging_ops.Print(self._local_step._ref(), [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step], message="Finished worker updates", name="FinishedWorkerUpdatesPrint") for accum, var in self._accumulator_list: with ops.device(var.device): chief_init_ops.append( accum.set_global_step( global_step, name="SetGlobalStep")) self.chief_init_op = control_flow_ops.group(*(chief_init_ops)) self._gradients_applied = True return train_op
def main(args): """basic info""" log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), "logs") if not os.path.isdir(log_dir): os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.logs_base_dir), "models") if not os.path.isdir(model_dir): os.makedirs(model_dir) argumentfile = os.path.join(log_dir, 'arguments.txt') utils.write_arguments_to_file(args, argumentfile) src_path, _ = os.path.split(os.path.realpath(__file__)) # utils.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) """trainset""" # np.random.seed(seed=args.seed) train_set = utils.get_dataset(args.data_path, classnum, argumentfile) """开启图""" with tf.Graph().as_default(): # tf.set_random_seed(args.seed) """占位符""" batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 1), name='labels') """通过队列读取批量数据""" input_queue = data_flow_ops.FIFOQueue(capacity=1000, dtypes=[tf.string, tf.int64], shapes=[(1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 17 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() # 路径、标签出队 images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image( file_contents, channels=1) # uint8 with shape [h, w, num_channels] image = tf.image.crop_to_bounding_box(image, 0, 0, 850, 2100) image = tf.image.resize_images( image, (args.image_size_h, args.image_size_w), method=1) if args.transform: if random.random() < 1: height, width, d = image.shape mask = utils.geterasebox(height, width) image = tf.multiply(image, mask) image = tf.cast(image, tf.float32) images.append(image) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size_h, args.image_size_w, 1), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=False) tf.summary.image("trainimage", image_batch, max_outputs=16) # 阶梯下降学习率 with tf.name_scope("lr"): global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.piecewise_constant(global_step, boundaries=boundaries, values=learning_rates) """运行网络 输出特征向量""" model = DentNet_ATT.DentNet_ATT(image_batch, classnum, True) embeddings = model.dropout2 Cosin_logits = cosineface_losses(embedding=embeddings, labels=labels_batch, out_num=classnum) # loss with tf.name_scope("loss"): cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels_batch, logits=Cosin_logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') L2_loss = tf.add_n( [tf.nn.l2_loss(var) for var in tf.trainable_variables()]) * 0.001 #0.001需要对比设置 total_loss = tf.add_n([cross_entropy_mean + L2_loss * weightdecay], name='total_loss') # optimizer with tf.name_scope('optimizer'): if args.opt == 'ADAGRAD': optimizer = tf.train.AdagradOptimizer(learning_rate) elif args.opt == 'ADADELTA': optimizer = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) elif args.opt == 'ADAM': optimizer = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) elif args.opt == 'RMSPROP': optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) elif args.opt == 'MOM': optimizer = tf.train.MomentumOptimizer(learning_rate, 0.95, use_nesterov=True) else: raise ValueError('Invalid optimization algorithm') train_op = optimizer.minimize(total_loss, global_step=global_step) # accuracy 计算准确度 with tf.name_scope("total_accuracy"): prob = tf.nn.softmax(Cosin_logits) one_hot_label = tf.one_hot(labels_batch, classnum) correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(one_hot_label, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Tensorboard tf.summary.scalar('loss', total_loss) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('lr', learning_rate) saver = tf.train.Saver(max_to_keep=200) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(log_dir) """开启session""" with tf.Session() as sess: sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) if args.param_num: count_params() summary_writer.add_graph(sess.graph) # 把图的数据写入 # Training epoch = 0 while epoch < args.max_nrof_epochs: batch_number = 0 # 一代内的步数 while batch_number < args.epoch_size: """选择训练样本""" thestep = sess.run(global_step) image_paths, num_per_class, batch_truelabel = sample_people( train_set, args.people_per_batch, args.images_per_person, log_dir, thestep) nrof_examples = args.people_per_batch * args.images_per_person image_paths_array = np.reshape( np.expand_dims(np.array(image_paths), 1), (-1, 1)) sess.run( enqueue_op, { image_paths_placeholder: image_paths_array, labels_placeholder: batch_truelabel }) nrof_batches = int(np.ceil(nrof_examples / args.batch_size)) """计算特征向量""" for i in range(nrof_batches): batch_size = min(nrof_examples - i * args.batch_size, args.batch_size) emb, lab, err, _, ministep, acc, s, lrr = sess.run( [ embeddings, labels_batch, total_loss, train_op, global_step, accuracy, summary_op, learning_rate ], feed_dict={ batch_size_placeholder: batch_size, phase_train_placeholder: True }) if (ministep + 1) % 100 == 0: timenow = str(time.strftime('%Y-%m-%d %H:%M:%S')) print(timenow, "step:", ministep + 1, "lr:", lrr, "Loss:", err, "TrainAcc:", acc) summary_writer.add_summary(s, ministep) # save model checkpoint_name = os.path.join( model_dir, 'models-step' + str(ministep + 1) + '.ckpt') if (ministep + 1) % 1000 == 0: print("Saving checkpoint of model:", checkpoint_name) saver.save(sess, checkpoint_name) batch_number += 1 epoch += 1 return
def parallel_read(data_sources, reader_class, num_epochs=None, num_readers=4, reader_kwargs=None, shuffle=True, dtypes=None, capacity=256, min_after_dequeue=128, seed=None, scope=None): """Reads multiple records in parallel from data_sources using n readers. It uses a ParallelReader to read from multiple files in parallel using multiple readers created using `reader_class` with `reader_kwargs'. If shuffle is True the common_queue would be a RandomShuffleQueue otherwise it would be a FIFOQueue. Usage: data_sources = ['path_to/train*'] key, value = parallel_read(data_sources, tf.CSVReader, num_readers=4) Args: data_sources: a list/tuple of files or the location of the data, i.e. /path/to/train@128, /path/to/train* or /tmp/.../train* reader_class: one of the io_ops.ReaderBase subclasses ex: TFRecordReader num_epochs: The number of times each data source is read. If left as None, the data will be cycled through indefinitely. num_readers: a integer, number of Readers to create. reader_kwargs: an optional dict, of kwargs for the reader. shuffle: boolean, wether should shuffle the files and the records by using RandomShuffleQueue as common_queue. dtypes: A list of types. The length of dtypes must equal the number of elements in each record. If it is None it will default to [tf.string, tf.string] for (key, value). capacity: integer, capacity of the common_queue. min_after_dequeue: integer, minimum number of records in the common_queue after dequeue. Needed for a good shuffle. seed: A seed for RandomShuffleQueue. scope: Optional name scope for the ops. Returns: key, value: a tuple of keys and values from the data_source. """ data_files = get_data_files(data_sources) with ops.name_scope(scope, 'parallel_read'): filename_queue = tf_input.string_input_producer( data_files, num_epochs=num_epochs, shuffle=shuffle, seed=seed, name='filenames') dtypes = dtypes or [tf_dtypes.string, tf_dtypes.string] if shuffle: common_queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, dtypes=dtypes, seed=seed, name='common_queue') else: common_queue = data_flow_ops.FIFOQueue( capacity=capacity, dtypes=dtypes, name='common_queue') summary.scalar('fraction_of_%d_full' % capacity, math_ops.to_float(common_queue.size()) * (1. / capacity)) return ParallelReader( reader_class, common_queue, num_readers=num_readers, reader_kwargs=reader_kwargs).read(filename_queue)
def main(args): network = importlib.import_module(args.model_def) image_size = (args.image_size, args.image_size) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) #日志保存地址 if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory # os.path.realpath(__file__)代表返回当前模块真实路径 # os.path.split(os.path.realpath(__file__))代表返回路径的目录和文件名(元组形式) src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) # dataset是列表,列表元素为每一个类的ImageClass对象,定义见facenet.py文件 dataset = facenet.get_dataset(args.data_dir) if args.filter_filename: dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) # 划分训练集和测试集,train_set和val_set的形式和dataset一样 if args.validation_set_split_ratio > 0.0: train_set, val_set = facenet.split_dataset( dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') else: train_set, val_set = dataset, [] nrof_classes = len(train_set) #类目数(即有多少人) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing # pairs也是列表,子元素也是列表,每个子列表包含pairs.txt的每一行 pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images #lfw_paths为两两比较的列表,列表的元素为有2个元素的元祖,actual_issame为列表,表示每个元素是否为同一个人 lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) # global_step对应的是全局批的个数,根据这个参数可以更新学习率 global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The training set should not be empty' val_image_list, val_label_list = facenet.get_image_paths_and_labels( val_set) # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) # range_size的大小和样本个数一样 range_size = array_ops.shape(labels)[0] #QueueRunner:保存的是队列中的入列操作,保存在一个list当中,其中每个enqueue运行在一个线程当中 #range_input_producer:返回的是一个队列,队列中有打乱的整数,范围是从0到range size #range_size大小和总的样本个数一样 #并将一个QueueRunner添加到当前图的QUEUE_RUNNER集合中 index_queue = tf.train.range_input_producer( range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) #capacity代表队列容量 # 返回的是一个出列操作,每次出列一个epoch需要用到的样本个数 index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') # 上面的队列是一个样本索引的出队队列,只用来出列 # 用来每次出列一个epoch中用到的样本 # 这里是第二个队列,这个队列用来入列,每个元素的大小为shape=[(1,),(1,),(1,)] nrof_preprocess_threads = 4 # 这里的shape代表每一个元素的维度 input_queue = data_flow_ops.FIFOQueue( capacity=600000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) # 这时一个入列的操作,这个操作将在session run的时候用到 # 每次入列的是image_paths_placeholder, labels_placeholder对 # 注意,这里只有2个队列,一个用来出列打乱的元素序号 # 一个根据对应的需要读取指定的文件 enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') image_batch, label_batch = facenet.create_input_pipeline( input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) # image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # arcface_logits = arcface_logits_compute(embeddings, label_batch, args, nrof_classes) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) with tf.variable_scope('Logits'): arcface_logits = arcface_logits_compute(embeddings, label_batch, args, nrof_classes) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=arcface_logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) correct_prediction = tf.cast( tf.equal(tf.argmax(arcface_logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # for weights in slim.get_variables_by_name('embedding_weights'): # weight_regularization = tf.contrib.layers.l2_regularizer(args.weight_decay)(weights) # tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, weight_regularization) regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) if args.weight_decay == 0: total_loss = tf.add_n([cross_entropy_mean], name='total_loss') else: total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') #define two saver in case under 'finetuning on different dataset' situation saver_save = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) facenet.load_model(pretrained_model) print('Running arcface training') best_accuracy = 0.0 for epoch in range(1, args.max_nrof_epochs + 1): step = sess.run(global_step, feed_dict=None) cont = train( args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, accuracy, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, args.random_rotate, args.random_crop, args.random_flip, args.use_fixed_image_standardization, control_placeholder) if not cont: break print('validation running...') if args.lfw_dir: best_accuracy = evaluate( sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, best_accuracy, saver_save, model_dir, subdir, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization, args.lfw_distance_metric) return model_dir
def _custom_shuffle_batch(tensors, batch_size, capacity, min_after_dequeue, keep_input, num_threads=1, seed=None, enqueue_many=False, shapes=None, allow_smaller_final_batch=False, shared_name=None, name=None, shuffle=False): """Helper function for `shuffle_batch` and `maybe_shuffle_batch`.""" if context.executing_eagerly(): raise ValueError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" " instead.") tensor_list = tf_input._as_tensor_list(tensors) with ops.name_scope(name, "shuffle_batch", list(tensor_list) + [keep_input]) as name: if capacity <= min_after_dequeue: raise ValueError( "capacity %d must be bigger than min_after_dequeue %d." % (capacity, min_after_dequeue)) tensor_list = tf_input._validate(tensor_list) keep_input = tf_input._validate_keep_input(keep_input, enqueue_many) tensor_list, sparse_info = tf_input._store_sparse_tensors( tensor_list, enqueue_many, keep_input) types = tf_input._dtypes([tensor_list]) shapes = tf_input._shapes([tensor_list], shapes, enqueue_many) ########################################################################################### if shuffle: queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, seed=seed, dtypes=types, shapes=shapes, shared_name=shared_name) else: # Remove shuffle property queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=types, shapes=shapes, shared_name=shared_name) ########################################################################################### tf_input._enqueue(queue, tensor_list, num_threads, enqueue_many, keep_input) full = (math_ops.to_float( math_ops.maximum(0, queue.size() - min_after_dequeue)) * (1. / (capacity - min_after_dequeue))) summary_name = ("fraction_over_%d_of_%d_full" % (min_after_dequeue, capacity - min_after_dequeue)) summary.scalar(summary_name, full) if allow_smaller_final_batch: dequeued = queue.dequeue_up_to(batch_size, name=name) else: dequeued = queue.dequeue_many(batch_size, name=name) dequeued = tf_input._restore_sparse_tensors(dequeued, sparse_info) return tf_input._as_original_type(tensors, dequeued)
def main(args): subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) train_set = facenet.get_dataset(args.data_dir) logging.info('Model directory: %s' % model_dir) logging.info('Log directory: %s' % log_dir) logging.info('Train set size:%d' % len(train_set)) # Build graph logging.info('building graph...') with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) ae = autoencoder() optimizer = tf.train.AdamOptimizer(args.learning_rate).minimize( ae['cost']) sess = tf.Session() sess.run(tf.global_variables_initializer()) datasets = get_dataset(args.data_dir) batch_size = args.batch_size n_epochs = args.epoch_size batch_xs = [] batch_num = 0 # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=100) image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string], shapes=[(1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder]) nrof_preprocess_threads = 4 images = [] for _ in range(nrof_preprocess_threads): filenames = input_queue.dequeue() images_ = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_jpeg(file_contents) image.set_shape((args.image_size, args.image_size, 3)) images_.append(tf.image.per_image_standardization(image)) images.append([images_]) image_batch = tf.train.batch_join( images, batch_size=args.batch_size, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') sess.run(enqueue_op, {image_paths_placeholder: datasets}) logging.info('training ...') logging.info('dataset len:%d' % len(datasets)) for epoch_i in range(n_epochs): for idx in range(len(datasets)): filename = datasets[idx] file_contents = tf.read_file(filename) image = tf.image.decode_jpeg(file_contents) # image = tf.subtract(image, 0.5) # image = tf.multiply(image, 2.0) image = tf.cast(image, tf.float32) batch_xs.append(sess.run(image)) if (idx + 1) % batch_size == 0: train = np.array([img for img in batch_xs]) if batch_num % 10 == 0: _ae, _ = sess.run([ae, optimizer], feed_dict={ae['x']: train}) x_ = _ae['x'] z_ = _ae['z'] y_ = _ae['y'] logging.info('type x_ : %s' % type(z_)) logging.info('%s, %s, %s' % (x_.shape, z_.shape, y_.shape)) cost_ = _ae['cost'] logging.info('epoch:%d, loss:%s' % (epoch_i, str(cost_))) else: sess.run(optimizer, feed_dict={ae['x']: train}) batch_num += 1 batch_xs[:] = [] logging.info('batch num:%d' % batch_num) # if batch_num % 100 == 0: # pass # save model logging.info('Saving variables') start_time = time.time() checkpoint_path = os.path.join(model_dir, 'model-%s.ckpt' % subdir) saver.save(sess, checkpoint_path, global_step=batch_num, write_meta_graph=False) save_time_variables = time.time() - start_time logging.info('Variables saved in %.2f seconds' % save_time_variables) metagraph_filename = os.path.join(model_dir, 'model-%s.meta' % subdir) save_time_metagraph = 0 if not os.path.exists(metagraph_filename): logging.info('Saving metagraph') start_time = time.time() saver.export_meta_graph(metagraph_filename) save_time_metagraph = time.time() - start_time logging.info('Metagraph saved in %.2f seconds' % save_time_metagraph)
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs) image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') nrof_preprocess_threads = 4 image_size = (args.image_size, args.image_size) eval_input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) eval_enqueue_op = eval_input_queue.enqueue_many( [ image_paths_placeholder, labels_placeholder, control_placeholder ], name='eval_enqueue_op') image_batch, label_batch = facenet.create_input_pipeline( eval_input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) # Load the model input_map = { 'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder } facenet.load_model(args.model, input_map=input_map) # Get output tensor embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") # coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) evaluate(sess, eval_enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, args.distance_metric, args.subtract_mean, args.use_flipped_images, args.use_fixed_image_standardization)
def input_producer(input_tensor, element_shape=None, num_epochs=None, shuffle=True, seed=None, capacity=32, shared_name=None, summary_name=None, name=None, cancel_op=None): """Output the rows of `input_tensor` to a queue for an input pipeline. Note: if `num_epochs` is not `None`, this function creates local counter `epochs`. Use `local_variable_initializer()` to initialize local variables. Args: input_tensor: A tensor with the rows to produce. Must be at least one-dimensional. Must either have a fully-defined shape, or `element_shape` must be defined. element_shape: (Optional.) A `TensorShape` representing the shape of a row of `input_tensor`, if it cannot be inferred. num_epochs: (Optional.) An integer. If specified `input_producer` produces each row of `input_tensor` `num_epochs` times before generating an `OutOfRange` error. If not specified, `input_producer` can cycle through the rows of `input_tensor` an unlimited number of times. shuffle: (Optional.) A boolean. If true, the rows are randomly shuffled within each epoch. seed: (Optional.) An integer. The seed to use if `shuffle` is true. capacity: (Optional.) The capacity of the queue to be used for buffering the input. shared_name: (Optional.) If set, this queue will be shared under the given name across multiple sessions. summary_name: (Optional.) If set, a scalar summary for the current queue size will be generated, using this name as part of the tag. name: (Optional.) A name for queue. cancel_op: (Optional.) Cancel op for the queue Returns: A queue with the output rows. A `QueueRunner` for the queue is added to the current `QUEUE_RUNNER` collection of the current graph. Raises: ValueError: If the shape of the input cannot be inferred from the arguments. """ with ops.name_scope(name, "input_producer", [input_tensor]): input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor") element_shape = input_tensor.get_shape()[1:].merge_with(element_shape) if not element_shape.is_fully_defined(): raise ValueError( "Either `input_tensor` must have a fully defined shape " "or `element_shape` must be specified") if shuffle: input_tensor = random_ops.random_shuffle(input_tensor, seed=seed) input_tensor = limit_epochs(input_tensor, num_epochs) q = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=[input_tensor.dtype.base_dtype], shapes=[element_shape], shared_name=shared_name, name=name) enq = q.enqueue_many([input_tensor]) queue_runner.add_queue_runner( queue_runner.QueueRunner(q, [enq], cancel_op=cancel_op)) if summary_name is not None: summary.scalar( "queue/%s/%s" % (q.name, summary_name), math_ops.cast(q.size(), dtypes.float32) * (1. / capacity)) return q
def main(args): network = importlib.import_module(args.model_def) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.random_crop( image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad( image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables()) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on LFW if args.lfw_dir: evaluate(sess, lfw_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size) return model_dir
def read_keyed_batch_features(file_pattern, batch_size, features, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, reader_num_threads=1, feature_queue_capacity=100, num_queue_runners=2, parser_num_threads=None, name=None): """Adds operations to read, queue, batch and parse `Example` protos. Given file pattern (or list of files), will setup a queue for file names, read `Example` proto using provided `reader`, use batch queue to create batches of examples of size `batch_size` and parse example given `features` specification. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: file_pattern: List of files or pattern of file paths containing `Example` records. See `tf.gfile.Glob` for pattern rules. batch_size: An int or scalar `Tensor` specifying the batch size to use. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. reader: A function or class that returns an object with `read` method, (filename tensor) -> (example tensor). randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. NOTE - If specified, creates a variable that must be initialized, so call tf.initialize_local_variables() as shown in the tests. queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. feature_queue_capacity: Capacity of the parsed features queue. num_queue_runners: Number of queue runners to start for the feature queue, Adding multiple queue runners for the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than parsing. parser_num_threads: (Deprecated) The number of threads to parse examples. name: Name of resulting op. Returns: A dict of `Tensor` or `SparseTensor` objects for each in `features`. If `keep_keys` is `True`, returns tuple of string `Tensor` and above dict. Raises: ValueError: for invalid inputs. """ if parser_num_threads: # TODO(sibyl-Aix6ihai): Remove on Sept 3 2016. logging.warning( 'parser_num_threads is deprecated, it will be removed on' 'Sept 3 2016') with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope: keys, examples = read_keyed_batch_examples( file_pattern, batch_size, reader, randomize_input=randomize_input, num_epochs=num_epochs, queue_capacity=queue_capacity, num_threads=reader_num_threads, read_batch_size=batch_size, name=scope) # Parse the example. feature_map = parsing_ops.parse_example(examples, features) # Lets also add preprocessed tensors into the queue types for each item of # the queue. tensors_to_enqueue = [] # Each entry contains the key, and a boolean which indicates whether the # tensor was a sparse tensor. tensors_mapping = [] # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse # tensors into a queue. This could be taken care in somewhere else so others # can reuse it. Also, QueueBase maybe extended to handle sparse tensors # directly. for key in sorted(feature_map.keys()): tensor = feature_map[key] if isinstance(tensor, ops.SparseTensor): tensors_mapping.append((key, True)) tensors_to_enqueue.extend( [tensor.indices, tensor.values, tensor.shape]) else: tensors_mapping.append((key, False)) tensors_to_enqueue.append(tensor) tensors_to_enqueue.append(keys) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity, queue_dtypes) # Add a summary op to debug if our feature queue is full or not. logging_ops.scalar_summary( 'queue/parsed_features/%s/fraction_of_%d_full' % (input_queue.name, feature_queue_capacity), math_ops.cast(input_queue.size(), dtypes.float32) * (1. / feature_queue_capacity)) # Add multiple queue runners so that the queue is always full. Adding more # than two queue-runners may hog the cpu on the worker to fill up the queue. for _ in range(num_queue_runners): queue_runner.add_queue_runner( queue_runner.QueueRunner( input_queue, [input_queue.enqueue(tensors_to_enqueue)])) dequeued_tensors = input_queue.dequeue() # Reset shapes on dequeued tensors. for i in range(len(tensors_to_enqueue)): dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape()) # Recreate feature mapping according to the original dictionary. dequeued_feature_map = {} index = 0 for key, is_sparse_tensor in tensors_mapping: if is_sparse_tensor: # Three tensors are (indices, values, shape). dequeued_feature_map[key] = ops.SparseTensor( dequeued_tensors[index], dequeued_tensors[index + 1], dequeued_tensors[index + 2]) index += 3 else: dequeued_feature_map[key] = dequeued_tensors[index] index += 1 dequeued_keys = dequeued_tensors[-1] return dequeued_keys, dequeued_feature_map
def main(args): network = importlib.import_module(args.model_def) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.compat.v1.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Placeholder for the learning rate learning_rate_placeholder = tf.compat.v1.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.compat.v1.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.compat.v1.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.compat.v1.placeholder(tf.string, shape=(None, 3), name='image_paths') image_placeholder = tf.compat.v1.placeholder(tf.int8, shape=(None, 3, args.image_size, args.image_size, 3), name='image_in') labels_placeholder = tf.compat.v1.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.int8, tf.int64], shapes=[(3, args.image_size, args.image_size, 3), (3,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([ image_placeholder, # image_paths_placeholder, labels_placeholder ]) nrof_preprocess_threads = 4 images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): """file_contents = tf.io.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_crop: image = tf.image.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) #pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3))""" images.append(tf.image.per_image_standardization(filename)) images_and_labels.append([ images, label ]) image_batch, labels_batch = tf.compat.v1.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(args.image_size, args.image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack(tf.reshape(embeddings, [-1, 3, args.embedding_size]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.compat.v1.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.compat.v1.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op, opt = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.compat.v1.global_variables()) # Create a saver saver = tf.compat.v1.train.Saver(tf.compat.v1.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.compat.v1.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.compat.v1.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.compat.v1.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.compat.v1.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.compat.v1.train.start_queue_runners(coord=coord, sess=sess) train_dataset_path = os.path.join(args.data_dir, 'train') test_dataset_path = os.path.join(args.data_dir, 'test') train_dataset = AWEDataset(train_dataset_path) test_dataset = AWEDataset(test_dataset_path) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) saver.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_dataset, epoch, image_placeholder, # image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, args.embedding_size, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) imgs = test_dataset.images img = [] c_same = 0 c_diff = 0 for i in range(len(imgs)): im2 = imgs[i] if len(im2) > 1: random.shuffle(im2) img.append([ im2[0]['src'], im2[1]['src'], True ]) c_same += 1 while c_diff < (c_same * 5) or (c_diff + c_same) % 3 != 0: a = random.randint(0, len(imgs) - 1) b = random.randint(0, len(imgs) - 1) if a != b: img.append([ imgs[a][-1]['src'], imgs[b][-1]['src'], False ]) c_diff += 1 random.shuffle(img) is_same = [x[2] for x in img] img = [y for x in img for y in x[0:-1]] evaluate(sess, img, embeddings, labels_batch, image_placeholder, # image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, is_same, args.batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, args.embedding_size, args) return model_dir
def _enqueue_data(data, capacity, shuffle=False, min_after_dequeue=None, num_threads=1, seed=None, name="enqueue_input", enqueue_size=1, num_epochs=None, pad_value=None): """Creates a queue filled from a numpy array or pandas `DataFrame`. Returns a queue filled with the rows of the given (`OrderedDict` of) array or `DataFrame`. In the case of a pandas `DataFrame`, the first enqueued `Tensor` corresponds to the index of the `DataFrame`. For (`OrderedDict` of) numpy arrays, the first enqueued `Tensor` contains the row number. Args: data: a numpy `ndarray`, `OrderedDict` of numpy arrays, or a generator yielding `dict`s of numpy arrays or pandas `DataFrame` that will be read into the queue. capacity: the capacity of the queue. shuffle: whether or not to shuffle the rows of the array. min_after_dequeue: minimum number of elements that can remain in the queue after a dequeue operation. Only used when `shuffle` is true. If not set, defaults to `capacity` / 4. num_threads: number of threads used for reading and enqueueing. seed: used to seed shuffling and reader starting points. name: a scope name identifying the data. enqueue_size: the number of rows to enqueue per step. num_epochs: limit enqueuing to a specified number of epochs, if provided. pad_value: default value for dynamic padding of data samples, if provided. Returns: A queue filled with the rows of the given (`OrderedDict` of) array or `DataFrame`. Raises: TypeError: `data` is not a Pandas `DataFrame`, an `OrderedDict` of numpy arrays, a numpy `ndarray`, or a generator producing these. NotImplementedError: padding and shuffling data at the same time. NotImplementedError: padding usage with non generator data type. """ with ops.name_scope(name): if isinstance(data, np.ndarray): types = [dtypes.int64, dtypes.as_dtype(data.dtype)] queue_shapes = [(), data.shape[1:]] get_feed_fn = _ArrayFeedFn elif isinstance(data, collections.OrderedDict): types = [dtypes.int64 ] + [dtypes.as_dtype(col.dtype) for col in data.values()] queue_shapes = [()] + [col.shape[1:] for col in data.values()] get_feed_fn = _OrderedDictNumpyFeedFn elif isinstance(data, tp.FunctionType): x_first_el = six.next(data()) x_first_keys = sorted(x_first_el.keys()) x_first_values = [x_first_el[key] for key in x_first_keys] types = [dtypes.as_dtype(col.dtype) for col in x_first_values] queue_shapes = [col.shape for col in x_first_values] get_feed_fn = _GeneratorFeedFn elif HAS_PANDAS and isinstance(data, pd.DataFrame): types = [ dtypes.as_dtype(dt) for dt in [data.index.dtype] + list(data.dtypes) ] queue_shapes = [() for _ in types] get_feed_fn = _PandasFeedFn else: raise TypeError( "data must be either a numpy array or pandas DataFrame if pandas is " "installed; got {}".format(type(data).__name__)) pad_data = pad_value is not None if pad_data and get_feed_fn is not _GeneratorFeedFn: raise NotImplementedError( "padding is only available with generator usage") if shuffle and pad_data: raise NotImplementedError( "padding and shuffling data at the same time is not implemented" ) # TODO(jamieas): TensorBoard warnings for all warnings below once available. if num_threads > 1 and num_epochs is not None: logging.warning( "enqueue_data was called with num_epochs and num_threads > 1. " "num_epochs is applied per thread, so this will produce more " "epochs than you probably intend. " "If you want to limit epochs, use one thread.") if shuffle and num_threads > 1 and num_epochs is not None: logging.warning( "enqueue_data was called with shuffle=True, num_threads > 1, and " "num_epochs. This will create multiple threads, all reading the " "array/dataframe in order adding to the same shuffling queue; the " "results will likely not be sufficiently shuffled.") if not shuffle and num_threads > 1: logging.warning( "enqueue_data was called with shuffle=False and num_threads > 1. " "This will create multiple threads, all reading the " "array/dataframe in order. If you want examples read in order, use" " one thread; if you want multiple threads, enable shuffling.") if shuffle: min_after_dequeue = int( capacity / 4 if min_after_dequeue is None else min_after_dequeue) queue = data_flow_ops.RandomShuffleQueue(capacity, min_after_dequeue, dtypes=types, shapes=queue_shapes, seed=seed) elif pad_data: min_after_dequeue = 0 # just for the summary text queue_shapes = list( map( lambda x: tuple(list(x[:-1]) + [None]) if len(x) > 0 else x, queue_shapes)) queue = data_flow_ops.PaddingFIFOQueue(capacity, dtypes=types, shapes=queue_shapes) else: min_after_dequeue = 0 # just for the summary text queue = data_flow_ops.FIFOQueue(capacity, dtypes=types, shapes=queue_shapes) enqueue_ops = [] feed_fns = [] for i in range(num_threads): # Note the placeholders have no shapes, so they will accept any # enqueue_size. enqueue_many below will break them up. placeholders = [array_ops.placeholder(t) for t in types] enqueue_ops.append(queue.enqueue_many(placeholders)) seed_i = None if seed is None else (i + 1) * seed if not pad_data: feed_fns.append( get_feed_fn(placeholders, data, enqueue_size, random_start=shuffle, seed=seed_i, num_epochs=num_epochs)) else: feed_fns.append( get_feed_fn(placeholders, data, enqueue_size, random_start=shuffle, seed=seed_i, num_epochs=num_epochs, pad_value=pad_value)) runner = fqr._FeedingQueueRunner( # pylint: disable=protected-access queue=queue, enqueue_ops=enqueue_ops, feed_fns=feed_fns) queue_runner.add_queue_runner(runner) full = (math_ops.cast( math_ops.maximum(0, queue.size() - min_after_dequeue), dtypes.float32) * (1. / (capacity - min_after_dequeue))) # Note that name contains a '/' at the end so we intentionally do not place # a '/' after %s below. summary_name = ( "queue/%sfraction_over_%d_of_%d_full" % (queue.name, min_after_dequeue, capacity - min_after_dequeue)) summary.scalar(summary_name, full) return queue
def queue_parsed_features(parsed_features, keys=None, feature_queue_capacity=100, num_enqueue_threads=2, name=None): """Speeds up parsing by using queues to do it asynchronously. This function adds the tensors in `parsed_features` to a queue, which allows the parsing (or any other expensive op before this) to be asynchronous wrt the rest of the training graph. This greatly improves read latency and speeds up training since the data will already be parsed and ready when each step of training needs it. All queue runners are added to the queue runners collection, and may be started via `start_queue_runners`. All ops are added to the default graph. Args: parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects. keys: `Tensor` of string keys. feature_queue_capacity: Capacity of the parsed features queue. num_enqueue_threads: Number of threads to enqueue the parsed example queue. Using multiple threads to enqueue the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than parsing. name: Name of resulting op. Returns: Returns tuple of: - `Tensor` corresponding to `keys` if provided, otherwise `None`. - A dict of string key to `Tensor` or `SparseTensor` objects corresponding to `parsed_features`. Raises: ValueError: for invalid inputs. """ args = list(parsed_features.values()) if keys is not None: args += [keys] with ops.name_scope(name, 'queue_parsed_features', args): # Lets also add preprocessed tensors into the queue types for each item of # the queue. tensors_to_enqueue = [] # Each entry contains the key, and a boolean which indicates whether the # tensor was a sparse tensor. tensors_mapping = [] # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse # tensors into a queue. This could be taken care in somewhere else so others # can reuse it. Also, QueueBase maybe extended to handle sparse tensors # directly. for key in sorted(parsed_features.keys()): tensor = parsed_features[key] if isinstance(tensor, sparse_tensor.SparseTensor): tensors_mapping.append((key, True)) tensors_to_enqueue.extend( [tensor.indices, tensor.values, tensor.dense_shape]) else: tensors_mapping.append((key, False)) tensors_to_enqueue.append(tensor) if keys is not None: tensors_to_enqueue.append(keys) queue_dtypes = [x.dtype for x in tensors_to_enqueue] input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity, queue_dtypes) # Add a summary op to debug if our feature queue is full or not. summary.scalar( 'queue/parsed_features/%s/fraction_of_%d_full' % (input_queue.name, feature_queue_capacity), math_ops.cast(input_queue.size(), dtypes.float32) * (1. / feature_queue_capacity)) # Use a single QueueRunner with multiple threads to enqueue so the queue is # always full. The threads are coordinated so the last batch will not be # lost. enqueue_ops = [ input_queue.enqueue(tensors_to_enqueue) for _ in range(num_enqueue_threads) ] queue_runner.add_queue_runner( queue_runner.QueueRunner( input_queue, enqueue_ops, queue_closed_exception_types=(errors.OutOfRangeError, errors.CancelledError))) dequeued_tensors = input_queue.dequeue() if not isinstance(dequeued_tensors, list): # input_queue.dequeue() returns a single tensor instead of a list of # tensors if there is only one tensor to dequeue, which breaks the # assumption of a list below. dequeued_tensors = [dequeued_tensors] # Reset shapes on dequeued tensors. for i in range(len(tensors_to_enqueue)): dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape()) # Recreate feature mapping according to the original dictionary. dequeued_parsed_features = {} index = 0 for key, is_sparse_tensor in tensors_mapping: if is_sparse_tensor: # Three tensors are (indices, values, shape). dequeued_parsed_features[key] = sparse_tensor.SparseTensor( dequeued_tensors[index], dequeued_tensors[index + 1], dequeued_tensors[index + 2]) index += 3 else: dequeued_parsed_features[key] = dequeued_tensors[index] index += 1 dequeued_keys = None if keys is not None: dequeued_keys = dequeued_tensors[-1] return dequeued_keys, dequeued_parsed_features
def __init__(self, opt, global_step, total_num_replicas=None, variable_averages=None, variables_to_average=None, use_locking=False, name="sync_replicas"): """Construct a sync_replicas optimizer. Args: opt: The actual optimizer that will be used to compute and apply the gradients. Must be one of the Optimizer classes. replicas_to_aggregate: number of replicas to aggregate for each variable update. total_num_replicas: Total number of tasks/workers/replicas, could be different from replicas_to_aggregate. If total_num_replicas > replicas_to_aggregate: it is backup_replicas + replicas_to_aggregate. If total_num_replicas < replicas_to_aggregate: Replicas compute multiple batches per update to variables. variable_averages: Optional `ExponentialMovingAverage` object, used to maintain moving averages for the variables passed in `variables_to_average`. variables_to_average: a list of variables that need to be averaged. Only needed if variable_averages is passed in. use_locking: If True use locks for update operation. name: string. Optional name of the returned operation. """ if total_num_replicas is None: total_num_replicas = replicas_to_aggregate super(TimeoutReplicasOptimizer, self).__init__(use_locking, name) logging.info( "TimeoutReplicas: total_num_replicas=%s", total_num_replicas) self._n_updates = 0 self._opt = opt self._gradients_applied = False self._variable_averages = variable_averages self._variables_to_average = variables_to_average self._total_num_replicas = total_num_replicas self._global_step = None # The synchronization op will be executed in a queue runner which should # only be executed by one of the replicas (usually the chief). self._chief_queue_runner = None # Remember which accumulator is on which device to set the initial step in # the accumulator to be global step. This list contains list of the # following format: (accumulator, device). self._accumulator_list = [] self._constant_for_comparison = 1 # For timeout, we have one token queue per worker. This makes it so that # a worker can not take the work of another worker if it finishes early. self._sync_token_queues = [0] * self._total_num_replicas for worker in range(self._total_num_replicas): with ops.device(global_step.device): self._sync_token_queues[worker] = data_flow_ops.FIFOQueue(-1, global_step.dtype.base_dtype, shapes=(), shared_name="sync_token_q_%d" % worker)
def main(args): network = importlib.import_module(args.model_def) image_size = (args.image_size, args.image_size) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) stat_file_name = os.path.join(log_dir, 'stat.h5') # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) dataset = facenet.get_dataset(args.data_dir) if args.filter_filename: dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) if args.validation_set_split_ratio > 0.0: train_set, val_set = facenet.split_dataset( dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') else: train_set, val_set = dataset, [] nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The training set should not be empty' val_image_list, val_label_list = facenet.get_image_paths_and_labels( val_set) # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') nrof_preprocess_threads = 4 input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') image_batch, label_batch = facenet.create_input_pipeline( input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Number of classes in training set: %d' % nrof_classes) print('Number of examples in training set: %d' % len(image_list)) print('Number of classes in validation set: %d' % len(val_set)) print('Number of examples in validation set: %d' % len(val_image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Norm for the prelogits eps = 1e-4 prelogits_norm = tf.reduce_mean( tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) # Add center loss prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') nrof_steps = args.max_nrof_epochs * args.epoch_size # Validate every validate_every_n_epochs as well as in the last epoch nrof_val_samples = int( math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs)) stat = { 'loss': np.zeros((nrof_steps, ), np.float32), 'center_loss': np.zeros((nrof_steps, ), np.float32), 'reg_loss': np.zeros((nrof_steps, ), np.float32), 'xent_loss': np.zeros((nrof_steps, ), np.float32), 'prelogits_norm': np.zeros((nrof_steps, ), np.float32), 'accuracy': np.zeros((nrof_steps, ), np.float32), 'val_loss': np.zeros((nrof_val_samples, ), np.float32), 'val_xent_loss': np.zeros((nrof_val_samples, ), np.float32), 'val_accuracy': np.zeros((nrof_val_samples, ), np.float32), 'lfw_accuracy': np.zeros((args.max_nrof_epochs, ), np.float32), 'lfw_valrate': np.zeros((args.max_nrof_epochs, ), np.float32), 'learning_rate': np.zeros((args.max_nrof_epochs, ), np.float32), 'time_train': np.zeros((args.max_nrof_epochs, ), np.float32), 'time_validate': np.zeros((args.max_nrof_epochs, ), np.float32), 'time_evaluate': np.zeros((args.max_nrof_epochs, ), np.float32), 'prelogits_hist': np.zeros((args.max_nrof_epochs, 1000), np.float32), } for epoch in range(1, args.max_nrof_epochs + 1): step = sess.run(global_step, feed_dict=None) # Train for one epoch t = time.time() cont = train( args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max, args.use_fixed_image_standardization) stat['time_train'][epoch - 1] = time.time() - t if not cont: break t = time.time() if len(val_image_list) > 0 and ( (epoch - 1) % args.validate_every_n_epochs == args.validate_every_n_epochs - 1 or epoch == args.max_nrof_epochs): validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, args.validate_every_n_epochs, args.use_fixed_image_standardization) stat['time_validate'][epoch - 1] = time.time() - t # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) # Evaluate on LFW t = time.time() if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization) stat['time_evaluate'][epoch - 1] = time.time() - t print('Saving statistics') with h5py.File(stat_file_name, 'w') as f: for key, value in stat.items(): f.create_dataset(key, data=value) return model_dir
def testFIFOSharedQueue(self): shared_queue = data_flow_ops.FIFOQueue( capacity=256, dtypes=[dtypes_lib.string, dtypes_lib.string]) self._verify_all_data_sources_read(shared_queue)