def _add_saving_op(): """ Define a tensorflow operation to save or restore the network :return: a tensorflow tf.train.Saver operation """ # Define an op to save or restore the network # Only save needed tensors : # - weight and biais from the input layer, the output layer # - weight and biais from the LSTM (which are named kernel and bias respectively) # - currents global_step and learning_rate for var in tf.global_variables(): logging.debug("TF variable : %s - %s", var.name, var) save_list = [var for var in tf.global_variables() if (var.name.find('/input_w:0') != -1) or (var.name.find('/input_b:0') != -1) or (var.name.find('/output_w:0') != -1) or (var.name.find('/output_b:0') != -1) or (var.name.find('global_step:0') != -1) or (var.name.find('learning_rate:0') != -1) or (var.name.find('/kernel:0') != -1) or (var.name.find('/bias:0') != -1)] if len(save_list) == 0: raise ValueError("Trying to define the saving operation before the RNN is built") saver_op = tf.train.Saver(save_list) return saver_op
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True): """ Freezes the state of a session into a prunned computation graph. Creates a new computation graph where variable nodes are replaced by constants taking their current value in the session. The new graph will be prunned so subgraphs that are not neccesary to compute the requested outputs are removed. @param session The TensorFlow session to be frozen. @param keep_var_names A list of variable names that should not be frozen, or None to freeze all the variables in the graph. @param output_names Names of the relevant graph outputs. @param clear_devices Remove the device directives from the graph for better portability. @return The frozen graph definition. """ from tensorflow.python.framework.graph_util import convert_variables_to_constants graph = session.graph with graph.as_default(): freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or [])) output_names = output_names or [] output_names += [v.op.name for v in tf.global_variables()] input_graph_def = graph.as_graph_def() if clear_devices: for node in input_graph_def.node: node.device = "" frozen_graph = convert_variables_to_constants(session, input_graph_def, output_names, freeze_var_names) return frozen_graph
def get_model_params(variable_prefix, split_lstm_matrices=True): if variable_prefix: exclude = [ variable_prefix+"/Variable", variable_prefix+"/Variable_1" ] tmp = { v.op.name: v.eval() for v in tf.global_variables() if (v.op.name.startswith(variable_prefix) and v.op.name not in exclude) } else: exclude = [ "Variable", "Variable_1" ] tmp = { v.op.name: v.eval() for v in tf.global_variables() if v.op.name not in exclude } # Rename keys params = {name.replace("/", "-"): param for name, param in tmp.items()} if split_lstm_matrices: for name in params.keys(): if "LSTMCell" in name: # i = input_gate, j = new_input, f = forget_gate, o = output_gate if "Matrix" in name: i, j, f, o = array_ops.split(1, 4, params[name]) elif "Bias" in name: i, j, f, o = array_ops.split(0, 4, params[name]) else: logging.error("Unknown tensor type..") exit(1) name_i = name.replace("LSTMCell", "LSTMCell-i") name_j = name.replace("LSTMCell", "LSTMCell-j") name_f = name.replace("LSTMCell", "LSTMCell-f") name_o = name.replace("LSTMCell", "LSTMCell-o") params[name_i] = i.eval() params[name_j] = j.eval() params[name_f] = f.eval() params[name_o] = o.eval() del params[name] elif "AttnV" in name: params[name] = array_ops.reshape(params[name], [ params[name].shape[0], 1 ]).eval() elif "AttnW" in name: # remove dims of size 1 params[name] = tf.squeeze(params[name]).eval() return params
def testBatchNorm(self, module): model = module(output_channels=self.output_channels, kernel_shapes=self.kernel_shapes, strides=self.strides, paddings=self.paddings, use_batch_norm=True) self.assertTrue(model.use_batch_norm) input_to_net = tf.placeholder(tf.float32, shape=(1, 100, 100, 3)) # Check Tensorflow flags work is_training = tf.placeholder(tf.bool) test_local_stats = tf.placeholder(tf.bool) model(input_to_net, is_training=is_training, test_local_stats=test_local_stats) # Check Python is_training flag works model(input_to_net, is_training=False, test_local_stats=False) model_variables = model.get_variables() self.assertEqual( len(model_variables), len(self.output_channels) * 3 - 1) # Check that the appropriate moving statistics variables have been created. self.assertTrue( any("moving_variance" in var.name for var in tf.global_variables())) self.assertTrue( any("moving_mean" in var.name for var in tf.global_variables()))
def add_saver(self): """Adds a Saver for all variables in the graph.""" logging.info('Generating op to save variables:\n\t%s', '\n\t'.join([x.name for x in tf.global_variables()])) self.saver = tf.train.Saver( var_list=[x for x in tf.global_variables()], write_version=saver_pb2.SaverDef.V1)
def load_vggish_slim_checkpoint(session, checkpoint_path): """Loads a pre-trained VGGish-compatible checkpoint. This function can be used as an initialization function (referred to as init_fn in TensorFlow documentation) which is called in a Session after initializating all variables. When used as an init_fn, this will load a pre-trained checkpoint that is compatible with the VGGish model definition. Only variables defined by VGGish will be loaded. Args: session: an active TensorFlow session. checkpoint_path: path to a file containing a checkpoint that is compatible with the VGGish model definition. """ # Get the list of names of all VGGish variables that exist in # the checkpoint (i.e., all inference-mode VGGish variables). with tf.Graph().as_default(): define_vggish_slim(training=False) vggish_var_names = [v.name for v in tf.global_variables()] # Get the list of all currently existing variables that match # the list of variable names we just computed. vggish_vars = [v for v in tf.global_variables() if v.name in vggish_var_names] # Use a Saver to restore just the variables selected above. saver = tf.train.Saver(vggish_vars, name='vggish_load_pretrained') saver.restore(session, checkpoint_path)
def add_saver(self): """Adds a Saver for all variables in the graph.""" logging.info('Saving non-quantized variables:\n\t%s', '\n\t'.join( [x.name for x in tf.global_variables() if 'quantized' not in x.name])) self.saver = tf.train.Saver( var_list=[ x for x in tf.global_variables() if 'quantized' not in x.name ], write_version=saver_pb2.SaverDef.V1)
def train(hparams, event_dir=None, model_dir=None, restore_agent=True, epoch=0): """Train.""" with tf.name_scope("rl_train"): train_summary_op, _, initialization = define_train(hparams, event_dir) if event_dir: summary_writer = tf.summary.FileWriter( event_dir, graph=tf.get_default_graph(), flush_secs=60) if model_dir: model_saver = tf.train.Saver( tf.global_variables(".*network_parameters.*")) else: summary_writer = None model_saver = None # TODO(piotrmilos): This should be refactored, possibly with # handlers for each type of env if hparams.environment_spec.simulated_env: env_model_loader = tf.train.Saver( tf.global_variables("next_frame*")) else: env_model_loader = None with tf.Session() as sess: sess.run(tf.global_variables_initializer()) initialization(sess) if env_model_loader: trainer_lib.restore_checkpoint( hparams.world_model_dir, env_model_loader, sess, must_restore=True) start_step = 0 if model_saver and restore_agent: start_step = trainer_lib.restore_checkpoint( model_dir, model_saver, sess) # Fail-friendly, don't train if already trained for this epoch if start_step >= ((hparams.epochs_num * (epoch + 1))): tf.logging.info("Skipping PPO training for epoch %d as train steps " "(%d) already reached", epoch, start_step) return for epoch_index in range(hparams.epochs_num): summary = sess.run(train_summary_op) if summary_writer: summary_writer.add_summary(summary, epoch_index) if (hparams.eval_every_epochs and epoch_index % hparams.eval_every_epochs == 0): if summary_writer and summary: summary_writer.add_summary(summary, epoch_index) else: tf.logging.info("Eval summary not saved") if (model_saver and hparams.save_models_every_epochs and (epoch_index % hparams.save_models_every_epochs == 0 or (epoch_index + 1) == hparams.epochs_num)): ckpt_path = os.path.join( model_dir, "model.ckpt-{}".format(epoch_index + 1 + start_step)) model_saver.save(sess, ckpt_path)
def _create_initializers(self): if self._var_count != len(tf.global_variables()): save_dir = os.path.dirname(self._save_path) if self._save_path else None if save_dir and not tf.gfile.IsDirectory(save_dir): tf.gfile.MakeDirs(save_dir) self._saver = tf.train.Saver(tf.global_variables(), max_to_keep=5) self._init = tf.global_variables_initializer() self._local_init = tf.local_variables_initializer() self._check_inited = tf.assert_variables_initialized() self._var_count = len(tf.global_variables()) if self._summary_writer: self._summaries = tf.summary.merge_all() self._summary_writer.add_graph(tf.get_default_graph())
def testBatchNormScale(self): height, width = 299, 299 num_classes = 1000 inputs = tf.placeholder(tf.float32, (1, height, width, 3)) with tf.contrib.slim.arg_scope( inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)): inception.inception_resnet_v2(inputs, num_classes, is_training=False) gamma_names = set( v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$')) self.assertGreater(len(gamma_names), 0) for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'): self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
def get_train_op(self, loss, learning_rate, optimizer=None, clip_norm=None, learnable_scopes=None, optimizer_scope_name=None): """ Get train operation for given loss Args: loss: loss, tf tensor or scalar learning_rate: scalar or placeholder clip_norm: clip gradients norm by clip_norm learnable_scopes: which scopes are trainable (None for all) optimizer: instance of tf.train.Optimizer, default Adam Returns: train_op """ if optimizer_scope_name is None: opt_scope = tf.variable_scope('Optimizer') else: opt_scope = tf.variable_scope(optimizer_scope_name) with opt_scope: if learnable_scopes is None: variables_to_train = tf.global_variables() else: variables_to_train = [] for scope_name in learnable_scopes: for var in tf.global_variables(): if scope_name in var.name: variables_to_train.append(var) if optimizer is None: optimizer = tf.train.AdamOptimizer # For batch norm it is necessary to update running averages extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(extra_update_ops): def clip_if_not_none(grad): if grad is not None: return tf.clip_by_norm(grad, clip_norm) opt = optimizer(learning_rate) grads_and_vars = opt.compute_gradients(loss, var_list=variables_to_train) if clip_norm is not None: grads_and_vars = [(clip_if_not_none(grad), var) for grad, var in grads_and_vars] train_op = opt.apply_gradients(grads_and_vars) return train_op
def assign_weight(self): ''' Encapsulate unit-class pruning and multi-class pruning print("PRUNE FOR CLASS", self.target_class_id) ''' print("assign weights......") maskDict = [] if (self.multiPruning == True and len(self.target_class_id) > 1): maskDict = self.mask_class_multi_by_value() else: maskDict = self.mask_unit_by_value(self.target_class_id[0]) for tmpLayer in maskDict: if (tmpLayer["name"][0] == "C"): # if the layer is convolutional layer with self.graph.as_default(): layerNum = tmpLayer["name"].strip("Conv") name = "Conv" + layerNum + "/composite_function/kernel:0" for var in tf.global_variables(): if var.name == name: tmpWeights = self.sess.run(var) tmpMask = np.array(tmpLayer["shape"]) tmpWeights[:,:,:, tmpMask == 0] = 0 assign = tf.assign(var, tmpWeights) self.sess.run(assign) # print(self.sess.run(self.graph.get_tensor_by_name(name))==0) if (tmpLayer["name"][0] == "F"): # if the layer is fully connected with self.graph.as_default(): layerNum = tmpLayer["name"].strip("FC") name_W = "FC" + layerNum + "/W:0" name_bias = "FC" + layerNum + "/bias:0" for var in tf.global_variables(): if var.name == name_W: tmpWeights = self.sess.run(var) tmpMask = np.array(tmpLayer["shape"]) tmpWeights[:, tmpMask == 0] = 0 assign = tf.assign(var, tmpWeights) self.sess.run(assign) # print(self.sess.run(self.graph.get_tensor_by_name(name_W))==0) if var.name == name_bias: tmpBias = self.sess.run(var) tmpMask = np.array(tmpLayer["shape"]) tmpBias[tmpMask == 0] = 0 assign = tf.assign(var, tmpBias) self.sess.run(assign) # print(self.sess.run(self.graph.get_tensor_by_name(name_bias))==0) print("assign finished!") '''
def optimize(loss, learning_rate, hparams, use_tpu=False): """Minimize loss.""" loss = weight_decay_and_noise(loss, hparams, learning_rate) loss = tf.identity(loss, name="total_loss") # Print trainable variables. log_variable_sizes(verbose=hparams.summarize_vars) # Print non-trainable variables. non_trainable_variables = list( set(tf.global_variables()) - set(tf.trainable_variables())) log_variable_sizes(non_trainable_variables, tag="Non-trainable variables", verbose=hparams.summarize_vars) if hparams.summarize_vars: summarize_variables() # Summarize non-trainable variables as well summarize_variables(non_trainable_variables, tag="Non-trainable variables") diet_vars = [ v for v in tf.global_variables() if v.dtype == dtypes.float16_ref ] log_variable_sizes( diet_vars, "Diet Variables", verbose=hparams.summarize_vars) opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams, use_tpu) if use_tpu: opt = tf.contrib.tpu.CrossShardOptimizer(opt) opt_summaries = [] if common_layers.should_generate_summaries(): tf.summary.scalar("learning_rate", learning_rate) opt_summaries.append("loss") if hparams.summarize_grads: tf.logging.info("Summarizing gradients") opt_summaries.extend( ["gradients", "gradient_norm", "global_gradient_norm"]) if hparams.clip_grad_norm: tf.logging.info("Clipping gradients, norm: %0.5f", hparams.clip_grad_norm) if hparams.grad_noise_scale: tf.logging.info("Adding noise to gradients, noise scale: %0.5f", hparams.grad_noise_scale) train_op = tf.contrib.layers.optimize_loss( name="training", loss=loss, global_step=tf.train.get_or_create_global_step(), learning_rate=learning_rate, clip_gradients=hparams.clip_grad_norm or None, gradient_noise_scale=hparams.grad_noise_scale or None, optimizer=opt, summaries=opt_summaries, colocate_gradients_with_ops=True) return train_op
def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=[], global_step=None, printable=False): """Save parameters into ckpt file. Parameters ------------ sess : Session. mode_name : string, name of the model, default is ``model.ckpt``. save_dir : string, path / file directory to the ckpt, default is ``checkpoint``. var_list : list of variables, if not given, save all global variables. global_step : int or None, step number. printable : bool, if True, print all params info. Examples --------- - see ``tl.files.load_ckpt()``. """ assert sess is not None ckpt_file = os.path.join(save_dir, mode_name) if var_list == []: var_list = tf.global_variables() print("[*] save %s n_params: %d" % (ckpt_file, len(var_list))) if printable: for idx, v in enumerate(var_list): print(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) saver = tf.train.Saver(var_list) saver.save(sess, ckpt_file, global_step=global_step)
def get_global_variable_by_name(name): """Returns the global variable of given name. name : the name of the global variable """ # return [v for v in tf.variables() if v.name == name][0] return [v for v in tf.global_variables() if v.name == name][0]
def testNotInLocalVariables(self): with self.test_session(): with tf.variable_scope('A'): a = tf.contrib.framework.model_variable('a', [5]) self.assertTrue(a in tf.global_variables()) self.assertTrue(a in tf.get_collection(tf.GraphKeys.MODEL_VARIABLES)) self.assertFalse(a in tf.local_variables())
def train_speech_to_text_network(): logit = speech_to_text_network() # CTC loss indices = tf.where(tf.not_equal(tf.cast(Y, tf.float32), 0.)) target = tf.SparseTensor(indices=indices, values=tf.gather_nd(Y, indices) - 1, shape=tf.cast(tf.shape(Y), tf.int64)) loss = tf.nn.ctc_loss(logit, target, sequence_len, time_major=False) # optimizer lr = tf.Variable(0.001, dtype=tf.float32, trainable=False) optimizer = MaxPropOptimizer(learning_rate=lr, beta2=0.99) var_list = [t for t in tf.trainable_variables()] gradient = optimizer.compute_gradients(loss, var_list=var_list) optimizer_op = optimizer.apply_gradients(gradient) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) for epoch in range(16): sess.run(tf.assign(lr, 0.001 * (0.97 ** epoch))) global pointer pointer = 0 for batch in range(n_batch): batches_wavs, batches_labels = get_next_batches(batch_size) train_loss, _ = sess.run([loss, optimizer_op], feed_dict={X: batches_wavs, Y: batches_labels}) print(epoch, batch, train_loss) if epoch % 5 == 0: saver.save(sess, 'speech.module', global_step=epoch)
def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10, do_resume=False): # epochs=-1, if data: self.data = data steps = 9999999 if steps == -1 else steps session = self.session # with tf.device(_cpu): # import tensorflow.contrib.layers as layers # t = tf.verify_tensor_all_finite(t, msg) tf.add_check_numerics_ops() try: self.summaries = tf.summary.merge_all() except: self.summaries = tf.merge_all_summaries() try: self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph) # except: self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph) # if not dropout: dropout = 1. # keep all x = self.x y = self.y keep_prob = self.keep_prob try: saver = tf.train.Saver(tf.global_variables()) except: saver = tf.train.Saver(tf.all_variables()) snapshot = self.name + str(get_last_tensorboard_run_nr()) checkpoint = tf.train.latest_checkpoint(checkpoint_dir) if do_resume and checkpoint: print("LOADING " + checkpoint + " !!!") saver.restore(session, checkpoint) try: session.run([tf.global_variables_initializer()]) except: session.run([tf.initialize_all_variables()]) step = 0 # show first while step < steps: batch_xs, batch_ys = self.next_batch(batch_size, session) # print("step %d \r" % step)# end=' ') # tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size) # Fit training using batch data feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True} loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict) if step % display_step == 0: seconds = int(time.time()) - start # Calculate batch accuracy, loss feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False} acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed) # self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ') if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!") # restore! if step % test_step == 0: self.test(step) if step % save_step == 0 and step > 0: print("SAVING snapshot %s" % snapshot) saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step) step += 1 print("\nOptimization Finished!") self.test(step, number=10000) # final test
def _match_vars(self, func): reader, chkpt_vars = SaverRestore._read_checkpoint_vars(self.path) graph_vars = tf.global_variables() chkpt_vars_used = set() mismatch = MismatchLogger('graph', 'checkpoint') for v in graph_vars: name = get_savename_from_varname(v.name, varname_prefix=self.prefix) if name in self.ignore and reader.has_tensor(name): logger.info("Variable {} in the graph will not be loaded from the checkpoint!".format(name)) else: if reader.has_tensor(name): func(reader, name, v) chkpt_vars_used.add(name) else: vname = v.op.name if not is_training_name(vname): mismatch.add(vname) mismatch.log() mismatch = MismatchLogger('checkpoint', 'graph') if len(chkpt_vars_used) < len(chkpt_vars): unused = chkpt_vars - chkpt_vars_used for name in sorted(unused): if not is_training_name(name): mismatch.add(name) mismatch.log()
def initialize(self, sess): # Initial file lists are empty np_paths = [] ss_paths = [] # Fresh train directly from ImageNet weights print('Loading initial model weights from {:s}'.format(self.pretrained_model)) variables = tf.global_variables() # Initialize all variables first sess.run(tf.variables_initializer(variables, name='init')) var_keep_dic = self.get_variables_in_checkpoint_file(self.pretrained_model) # Get the variables to restore, ignoring the variables to fix variables_to_restore = self.net.get_variables_to_restore(variables, var_keep_dic) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_model) print('Loaded.') # Need to fix the variables before loading, so that the RGB weights are changed to BGR # For VGG16 it also changes the convolutional weights fc6 and fc7 to # fully connected weights self.net.fix_variables(sess, self.pretrained_model) print('Fixed.') last_snapshot_iter = 0 rate = cfg.TRAIN.LEARNING_RATE stepsizes = list(cfg.TRAIN.STEPSIZE) return rate, last_snapshot_iter, stepsizes, np_paths, ss_paths
def getLoadVars(self): v = tf.global_variables() if(self.resLoad): v = [var for var in v if (("class_weight" in var.name) or ("class_bias" in var.name) or ("conv1" in var.name)) and ("Adam" not in var.name)] else: v = [var for var in v if ("Adam" not in var.name)] return v
def testTrainWithSummary(self): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None), name='images') labels = tf.placeholder(tf.float32, [None, 1000], name='labels') tf.train.get_or_create_global_step() logdir = tempfile.mkdtemp() with tf.contrib.summary.always_record_summaries(): with tf.contrib.summary.create_file_writer( logdir, max_queue=0, name='t0').as_default(): model = resnet50.ResNet50(data_format()) logits = model(images, training=True) loss = tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels) tf.contrib.summary.scalar(name='loss', tensor=loss) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() self.assertEqual(321, len(tf.global_variables())) batch_size = 32 with tf.Session() as sess: sess.run(init) sess.run(tf.contrib.summary.summary_writer_initializer_op()) np_images, np_labels = random_batch(batch_size) sess.run([train_op, tf.contrib.summary.all_summary_ops()], feed_dict={images: np_images, labels: np_labels}) events = summary_test_util.events_from_logdir(logdir) self.assertEqual(len(events), 2) self.assertEqual(events[1].summary.value[0].tag, 'loss')
def load_decode_model(self): """Load G2P model and initialize or load parameters in session.""" if not os.path.exists(os.path.join(self.model_dir, 'checkpoint')): raise RuntimeError("Model not found in %s" % self.model_dir) self.batch_size = 1 # We decode one word at a time. #Load model parameters. num_layers, size = data_utils.load_params(self.model_dir) # Load vocabularies print("Loading vocabularies from %s" % self.model_dir) self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.grapheme")) self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme")) self.rev_ph_vocab =\ data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"), reverse=True) self.session = tf.Session() # Restore model. print("Creating %d layers of %d units." % (num_layers, size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, size, num_layers, 0, self.batch_size, 0, 0, forward_only=True) self.model.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) # Check for saved models and restore them. print("Reading model parameters from %s" % self.model_dir) self.model.saver.restore(self.session, os.path.join(self.model_dir, "model"))
def load(self, dir_name, epoch=0, name=None): """save model to dir Parameters ---------- dir_name: str name of the directory epoch: int """ if name is None or name == self.name: # the name of saved model is the same as ours dir_name = os.path.join(dir_name, self.name) model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) saver = tf.train.Saver(model_vars) saver.restore(self.sess, os.path.join(dir_name, (self.subclass_name + "_%d") % epoch)) else: # load a checkpoint with different name backup_graph = tf.get_default_graph() kv_dict = {} # load checkpoint from another saved graph with tf.Graph().as_default(), tf.Session() as sess: tf.train.import_meta_graph(os.path.join(dir_name, name, (self.subclass_name + "_%d") % epoch + ".meta")) dir_name = os.path.join(dir_name, name) model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(model_vars) saver.restore(sess, os.path.join(dir_name, (self.subclass_name + "_%d") % epoch)) for item in tf.global_variables(): kv_dict[item.name] = sess.run(item) # assign to now graph backup_graph.as_default() model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name) for item in model_vars: old_name = item.name.replace(self.name, name) self.sess.run(tf.assign(item, kv_dict[old_name]))
def evaluate_checkpoint(tt='test', checkpoint=None, output_file=None, output_file_interp=None): """ Evaluate model on specific checkpoint :param tt: 'train', 'test' :param checkpoint: path to checkpoint :param output_file: If not None, the output will write to this path, :return: """ # Import data data_set = KspaceDataSet(base_dir, file_names.values(), stack_size=50, shuffle=False, data_base=FLAGS.database) net = load_graph() # Create a saver and keep all checkpoints saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) # saver = tf.train.import_meta_graph('%s.meta' % checkpoint) sess = tf.Session() saver.restore(sess, checkpoint) # saver.restore(sess, tf.train.latest_checkpoint(checkpoint)) data_set_tt = getattr(data_set, tt) all_acc = [] predict_counter = 0 if output_file is None: # Create output directories output_file = os.path.join(os.path.abspath(os.path.join(checkpoint, os.pardir)), 'predict_subset', tt) os.makedirs(output_file) if output_file is not None: f_out_real = open(os.path.join(output_file, "000000.predict_real.bin"), 'w') f_out_imag = open(os.path.join(output_file, "000000.predict_imag.bin"), 'w') gen_loss_adversarial = 1.0 print("Evaluate Model using checkpoint: %s, data=%s" % (checkpoint, tt)) while data_set_tt.epoch == 0: # Running over all data until epoch > 0 feed = feed_data(data_set, net.labels, net.train_phase, tt=tt, batch_size=FLAGS.mini_batch_size) if feed is not None: feed[net.adv_loss_w] = gen_loss_adversarial predict, result = sess.run([net.predict_g, net.evaluation], feed_dict=feed) all_acc.append(np.array(result)) print('Time: %s , Accuracy for mini_batch is: %s' % (datetime.datetime.now(), result)) if output_file is not None: f_out_real.write(predict['real'].ravel()) f_out_imag.write(predict['imag'].ravel()) else: break predict_counter += FLAGS.mini_batch_size print("Done - " + str(predict_counter)) if predict_counter >= FLAGS.max_predict: break if output_file is not None: f_out_real.close() f_out_imag.close() print("Total accuracy is: %f" % np.array(all_acc).mean())
def train(data, model): with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) n = 0 for epoch in range(epochs): sess.run(tf.assign(model.learning_rate, 0.002 * (0.97 ** epoch))) pointer = 0 for batche in range(data.n_size): n += 1 feed_dict = {model.x_tf: data.x_batches[pointer], model.y_tf: data.y_batches[pointer]} pointer += 1 train_loss, _, _ = sess.run([model.cost, model.final_state, model.train_op], feed_dict=feed_dict) sys.stdout.write('\r') info = "{}/{} (epoch {}) | train_loss {:.3f}" \ .format(epoch * data.n_size + batche, epochs * data.n_size, epoch, train_loss) sys.stdout.write(info) sys.stdout.flush() # save if (epoch * data.n_size + batche) % 1000 == 0 \ or (epoch == epochs-1 and batche == data.n_size-1): checkpoint_path = os.path.join(save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=n) sys.stdout.write('\n') print("model saved to {}".format(checkpoint_path)) sys.stdout.write('\n')
def get_post_init_ops(): """ Copy values of variables on GPU 0 to other GPUs. """ # literally all variables, because it's better to sync optimizer-internal variables as well all_vars = tf.global_variables() + tf.local_variables() var_by_name = dict([(v.name, v) for v in all_vars]) post_init_ops = [] for v in all_vars: if not v.name.startswith('tower'): continue if v.name.startswith('tower0'): logger.warn("[SyncMultiGPUReplicatedBuilder] variable " "{} has prefix 'tower0', this is unexpected.".format(v.name)) continue # TODO some vars (EMA) may still startswith tower0 # in this trainer, the master name doesn't have the towerx/ prefix split_name = v.name.split('/') prefix = split_name[0] realname = '/'.join(split_name[1:]) if prefix in realname: logger.error("[SyncMultiGPUReplicatedBuilder] variable " "{} has its prefix {} appears multiple times in its name!".format(v.name, prefix)) copy_from = var_by_name.get(realname) assert copy_from is not None, var_by_name.keys() post_init_ops.append(v.assign(copy_from.read_value())) logger.info( "'sync_variables_from_main_tower' includes {} operations.".format(len(post_init_ops))) return tf.group(*post_init_ops, name='sync_variables_from_main_tower')
def evaluate(): """ Build evaluation graph and run. """ with tf.Graph().as_default(): with tf.variable_scope('cnn'): m = model.Model(FLAGS, is_train=False) saver = tf.train.Saver(tf.global_variables()) # read test files if FLAGS.train_data: loader = text_input.DataLoader(os.path.join(FLAGS.data_dir, 'train.cPickle'), batch_size=FLAGS.batch_size) else: loader = text_input.DataLoader(os.path.join(FLAGS.data_dir, 'test.cPickle'), batch_size=FLAGS.batch_size) print 'Start evaluation, %d batches needed, with %d examples per batch.' % (loader.num_batch, FLAGS.batch_size) true_count = 0 avg_loss = 0 with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise IOError("Loading checkpoint file failed!") for _ in range(loader.num_batch): x, y = loader.next_batch() true_count_value, loss_value = sess.run([m.true_count_op, m.total_loss], feed_dict={m.inputs:x, m.labels:y}) true_count += true_count_value avg_loss += loss_value accuracy = float(true_count) / (loader.num_batch * FLAGS.batch_size) avg_loss = float(avg_loss) / loader.num_batch print '%s: test_loss = %.6f, test_accuracy = %.3f' % (datetime.now(), avg_loss, accuracy)
def testNest(self, getter1, getter2): with tf.variable_scope("scope1", custom_getter=getter1): with tf.variable_scope("scope2", custom_getter=getter2): tf.get_variable("w", [10, 10], tf.float32) self.assertEqual(1, len(tf.global_variables())) self.assertEqual(0, len(tf.trainable_variables()))
def restore_map(self, from_detection_checkpoint=True, load_all_detection_checkpoint_vars=False): """Returns a map of variables to load from a foreign checkpoint. See parent class for details. Args: from_detection_checkpoint: whether to restore from a full detection checkpoint (with compatible variable names) or to restore from a classification checkpoint for initialization prior to training. load_all_detection_checkpoint_vars: whether to load all variables (when `from_detection_checkpoint` is True). If False, only variables within the appropriate scopes are included. Default False. Returns: A dict mapping variable names (to load from a checkpoint) to variables in the model graph. """ variables_to_restore = {} for variable in tf.global_variables(): var_name = variable.op.name if from_detection_checkpoint and load_all_detection_checkpoint_vars: variables_to_restore[var_name] = variable else: if var_name.startswith(self._extract_features_scope): if not from_detection_checkpoint: var_name = ( re.split('^' + self._extract_features_scope + '/', var_name)[-1]) variables_to_restore[var_name] = variable return variables_to_restore
def main(): parser = argparse.ArgumentParser() parser.add_argument('--output', default='output', type=str) parser.add_argument('--data-directory', default='./datasets/examples/201906120600_201906121200_person/', type=str) parser.add_argument('--data-list-path', default='./datasets/examples/list/val.txt', type=str) parser.add_argument('--restore-from', default='./checkpoint/JPPNet-s2', type=str) parser.add_argument('--inres', default='384,384', type=str) parser.add_argument('--num-steps', default=10, type=int) parser.add_argument('--num-classes', default=20, type=int) args, _ = parser.parse_known_args() args.inres = tuple(int(x) for x in args.inres.split(',')) if not os.path.exists(args.output): os.makedirs(args.output) """Create the model and start the evaluation process.""" # Create queue coordinator. coord = tf.train.Coordinator() h, w = args.inres # Load reader. with tf.name_scope("create_inputs"): reader = ImageReader(args.data_directory, args.data_list_path, None, False, False, coord) image = reader.image image_rev = tf.reverse(image, tf.stack([1])) image_list = reader.image_list image_batch_origin = tf.stack([image, image_rev]) image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)]) image_batch075 = tf.image.resize_images(image_batch_origin, [int(h * 0.75), int(w * 0.75)]) image_batch125 = tf.image.resize_images(image_batch_origin, [int(h * 1.25), int(w * 1.25)]) # Create network. with tf.variable_scope('', reuse=False): net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=args.num_classes) with tf.variable_scope('', reuse=True): net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=args.num_classes) with tf.variable_scope('', reuse=True): net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=args.num_classes) # parsing net parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing'] parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing'] parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing'] parsing_out1_100 = net_100.layers['fc1_human'] parsing_out1_075 = net_075.layers['fc1_human'] parsing_out1_125 = net_125.layers['fc1_human'] # pose net resnet_fea_100 = net_100.layers['res4b22_relu'] resnet_fea_075 = net_075.layers['res4b22_relu'] resnet_fea_125 = net_125.layers['res4b22_relu'] with tf.variable_scope('', reuse=False): pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose') pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose') parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing') parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing') with tf.variable_scope('', reuse=True): pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose') pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose') parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing') parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing') with tf.variable_scope('', reuse=True): pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose') pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose') parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing') parsing_out3_125, parsing_fea3_125 = parsing_refine(parsing_out2_125, pose_out2_125, parsing_fea2_125, name='fc3_parsing') parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, tf.shape(image_batch_origin)[1:3,]), tf.image.resize_images(parsing_out1_075, tf.shape(image_batch_origin)[1:3,]), tf.image.resize_images(parsing_out1_125, tf.shape(image_batch_origin)[1:3,])]), axis=0) parsing_out2 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out2_100, tf.shape(image_batch_origin)[1:3,]), tf.image.resize_images(parsing_out2_075, tf.shape(image_batch_origin)[1:3,]), tf.image.resize_images(parsing_out2_125, tf.shape(image_batch_origin)[1:3,])]), axis=0) parsing_out3 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out3_100, tf.shape(image_batch_origin)[1:3,]), tf.image.resize_images(parsing_out3_075, tf.shape(image_batch_origin)[1:3,]), tf.image.resize_images(parsing_out3_125, tf.shape(image_batch_origin)[1:3,])]), axis=0) raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0) head_output, tail_output = tf.unstack(raw_output, num=2, axis=0) tail_list = tf.unstack(tail_output, num=20, axis=2) tail_list_rev = [None] * 20 for xx in range(14): tail_list_rev[xx] = tail_list[xx] tail_list_rev[14] = tail_list[15] tail_list_rev[15] = tail_list[14] tail_list_rev[16] = tail_list[17] tail_list_rev[17] = tail_list[16] tail_list_rev[18] = tail_list[19] tail_list_rev[19] = tail_list[18] tail_output_rev = tf.stack(tail_list_rev, axis=2) tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1])) raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0) raw_output_all = tf.expand_dims(raw_output_all, dim=0) raw_output_all = tf.argmax(raw_output_all, dimension=3) pred_all = tf.expand_dims(raw_output_all, dim=3) # Create 4-d tensor. # Which variables to load. restore_var = tf.global_variables() # Set up tf session and initialize variables. config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer()) # Load weights. loader = tf.train.Saver(var_list=restore_var) if args.restore_from is not None: if load(loader, sess, args.restore_from): print(" [*] Load SUCCESS") else: print(" [!] Load failed...") # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Iterate over training steps. for step in tqdm(range(args.num_steps)): parsing_ = sess.run(pred_all) if step % 100 == 0: print('step {:d}'.format(step)) print (image_list[step]) img_split = image_list[step].split('/') img_id = img_split[-1][:-4] msk = decode_labels(parsing_, num_classes=args.num_classes) parsing_im = Image.fromarray(msk[0]) parsing_im.save('{}/{}_vis.png'.format(args.output, img_id)) cv2.imwrite('{}/{}.png'.format(args.output, img_id), parsing_[0,:,:,0]) coord.request_stop() coord.join(threads)
def __init__(self): self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE self.classes = utils.read_class_names(cfg.YOLO.CLASSES) self.num_classes = len(self.classes) self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT self.time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY self.max_bbox_per_scale = 150 self.train_logdir = "./data/log/train" self.trainset = Dataset('train') self.testset = Dataset('test') self.steps_per_period = len(self.trainset) self.sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) with tf.name_scope('define_input'): self.input_data = tf.placeholder(dtype=tf.float32, name='input_data') self.label_sbbox = tf.placeholder(dtype=tf.float32, name='label_sbbox') self.label_mbbox = tf.placeholder(dtype=tf.float32, name='label_mbbox') self.label_lbbox = tf.placeholder(dtype=tf.float32, name='label_lbbox') self.true_sbboxes = tf.placeholder(dtype=tf.float32, name='sbboxes') self.true_mbboxes = tf.placeholder(dtype=tf.float32, name='mbboxes') self.true_lbboxes = tf.placeholder(dtype=tf.float32, name='lbboxes') self.trainable = tf.placeholder(dtype=tf.bool, name='training') with tf.name_scope("define_loss"): self.model = YOLOV3(self.input_data, self.trainable) self.net_var = tf.global_variables() self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss( self.label_sbbox, self.label_mbbox, self.label_lbbox, self.true_sbboxes, self.true_mbboxes, self.true_lbboxes) self.loss = self.giou_loss + self.conf_loss + self.prob_loss with tf.name_scope('learn_rate'): self.global_step = tf.Variable(1.0, dtype=tf.float64, trainable=False, name='global_step') warmup_steps = tf.constant(self.warmup_periods * self.steps_per_period, dtype=tf.float64, name='warmup_steps') train_steps = tf.constant( (self.first_stage_epochs + self.second_stage_epochs) * self.steps_per_period, dtype=tf.float64, name='train_steps') self.learn_rate = tf.cond( pred=self.global_step < warmup_steps, true_fn=lambda: self.global_step / warmup_steps * self. learn_rate_init, false_fn=lambda: self.learn_rate_end + 0.5 * (self.learn_rate_init - self.learn_rate_end) * (1 + tf.cos( (self.global_step - warmup_steps) / (train_steps - warmup_steps) * np.pi))) global_step_update = tf.assign_add(self.global_step, 1.0) with tf.name_scope("define_weight_decay"): moving_ave = tf.train.ExponentialMovingAverage( self.moving_ave_decay).apply(tf.trainable_variables()) with tf.name_scope("define_first_stage_train"): self.first_stage_trainable_var_list = [] for var in tf.trainable_variables(): var_name = var.op.name var_name_mess = str(var_name).split('/') if var_name_mess[0] in [ 'conv_sbbox', 'conv_mbbox', 'conv_lbbox' ]: self.first_stage_trainable_var_list.append(var) first_stage_optimizer = tf.train.AdamOptimizer( self.learn_rate).minimize( self.loss, var_list=self.first_stage_trainable_var_list) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies( [first_stage_optimizer, global_step_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_frozen_variables = tf.no_op() with tf.name_scope("define_second_stage_train"): second_stage_trainable_var_list = tf.trainable_variables() second_stage_optimizer = tf.train.AdamOptimizer( self.learn_rate).minimize( self.loss, var_list=second_stage_trainable_var_list) with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): with tf.control_dependencies( [second_stage_optimizer, global_step_update]): with tf.control_dependencies([moving_ave]): self.train_op_with_all_variables = tf.no_op() with tf.name_scope('loader_and_saver'): self.loader = tf.train.Saver(self.net_var) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10) with tf.name_scope('summary'): tf.summary.scalar("learn_rate", self.learn_rate) tf.summary.scalar("giou_loss", self.giou_loss) tf.summary.scalar("conf_loss", self.conf_loss) tf.summary.scalar("prob_loss", self.prob_loss) tf.summary.scalar("total_loss", self.loss) logdir = "./data/log/" if os.path.exists(logdir): shutil.rmtree(logdir) os.mkdir(logdir) self.write_op = tf.summary.merge_all() self.summary_writer = tf.summary.FileWriter(logdir, graph=self.sess.graph)
def main(_): tic = time.time() print('tensorflow version:', tf.__version__) tf.logging.set_verbosity(tf.logging.INFO) if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') # init net_name_scope_pruned = FLAGS.net_name_scope_pruned net_name_scope_checkpoint = FLAGS.net_name_scope_checkpoint indexed_prune_scopes_for_units = valid_indexed_prune_scopes_for_units kept_percentages = sorted(map(float, FLAGS.kept_percentages.split(','))) num_options = len(kept_percentages) num_units = len(indexed_prune_scopes_for_units) print('num_options=%d, num_blocks=%d' % (num_options, num_units)) print('HG: total number of configurations=%d' % (num_options**num_units)) # find the configurations to evaluate if FLAGS.configuration_type == 'sample': configs = get_sampled_configurations(num_units, num_options, FLAGS.total_num_configurations) elif FLAGS.configuration_type == 'special': configs = get_special_configurations(num_units, num_options) num_configurations = len(configs) #Getting MPI rank integer comm = MPI.COMM_WORLD rank = comm.Get_rank() if rank >= num_configurations: print("ERROR: rank(%d) > num_configurations(%d)" % (rank, num_configurations)) return FLAGS.configuration_index = FLAGS.start_configuration_index + rank config = configs[FLAGS.configuration_index] print('HG: kept_percentages=%s, start_config_index=%d, num_configs=%d, rank=%d, config_index=%d' \ %(str(kept_percentages), FLAGS.start_configuration_index, num_configurations, rank, FLAGS.configuration_index)) # prepare for training with the specific config indexed_prune_scopes, kept_percentage = config_to_indexed_prune_scopes( config, indexed_prune_scopes_for_units, kept_percentages) prune_info = indexed_prune_scopes_to_prune_info(indexed_prune_scopes, kept_percentage) # prepare file system results_dir = os.path.join( FLAGS.train_dir, 'id' + str(FLAGS.configuration_index)) #+'_'+str(FLAGS.max_number_of_steps)) train_dir = os.path.join(results_dir, 'train') if (not FLAGS.continue_training) or ( not tf.train.latest_checkpoint(train_dir)): prune_scopes = indexed_prune_scopes_to_prune_scopes( indexed_prune_scopes, net_name_scope_checkpoint) shorten_scopes = indexed_prune_scopes_to_shorten_scopes( indexed_prune_scopes, net_name_scope_checkpoint) variables_init_value = get_init_values_for_pruned_layers( prune_scopes, shorten_scopes, kept_percentage) reinit_scopes = [ re.sub(net_name_scope_checkpoint, net_name_scope_pruned, v) for v in prune_scopes + shorten_scopes ] prepare_file_system(train_dir) def write_detailed_info(info): with open(os.path.join(train_dir, 'train_details.txt'), 'a') as f: f.write(info + '\n') info = 'train_dir:' + train_dir + '\n' info += 'options:' + str(kept_percentages) + '\n' info += 'configuration: ' + str(config) + '\n' info += 'indexed_prune_scopes: ' + str(indexed_prune_scopes) + '\n' info += 'kept_percentage: ' + str(kept_percentage) print(info) write_detailed_info(info) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.train_dataset_name, FLAGS.dataset_dir) test_dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.test_dataset_name, FLAGS.dataset_dir) batch_queue = train_inputs(dataset, deploy_config, FLAGS) test_images, test_labels = test_inputs(test_dataset, deploy_config, FLAGS) images, labels = batch_queue.dequeue() ###################### # Select the network# ###################### network_fn_pruned = nets_factory.get_network_fn_pruned( FLAGS.model_name, prune_info=prune_info, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay) print('HG: prune_info:') pprint(prune_info) #################### # Define the model # #################### logits_train, _ = network_fn_pruned(images, is_training=True, is_local_train=False, reuse_variables=False, scope=net_name_scope_pruned) logits_eval, _ = network_fn_pruned(test_images, is_training=False, is_local_train=False, reuse_variables=True, scope=net_name_scope_pruned) cross_entropy = add_cross_entropy(logits_train, labels) correct_prediction = add_correct_prediction(logits_eval, test_labels) ############################# # Specify the loss function # ############################# tf.add_to_collection('subgraph_losses', cross_entropy) # get regularization loss regularization_losses = get_regularization_losses_within_scopes() print_list('regularization_losses', regularization_losses) # total loss and its summary total_loss = tf.add_n(tf.get_collection('subgraph_losses'), name='total_loss') for l in tf.get_collection('subgraph_losses') + [total_loss]: tf.summary.scalar(l.op.name + '/summary', l) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.variables_device()): global_step = tf.Variable(0, trainable=False, name='global_step') with tf.device(deploy_config.optimizer_device()): learning_rate = configure_learning_rate(dataset.num_samples, global_step, FLAGS) optimizer = configure_optimizer(learning_rate, FLAGS) tf.summary.scalar('learning_rate', learning_rate) ############################# # Add train operation # ############################# variables_to_train = get_trainable_variables_within_scopes() train_op = add_train_op(optimizer, total_loss, global_step, var_list=variables_to_train) print_list("variables_to_train", variables_to_train) # Gather update_ops: the updates for the batch_norm variables created by network_fn_pruned. update_ops = get_update_ops_within_scopes() print_list("update_ops", update_ops) # add train_tensor update_ops.append(train_op) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # add summary op summary_op = tf.summary.merge_all() print("HG: trainable_variables=", len(tf.trainable_variables())) print("HG: model_variables=", len(tf.model_variables())) print("HG: global_variables=", len(tf.global_variables())) sess_config = tf.ConfigProto(intra_op_parallelism_threads=16, inter_op_parallelism_threads=16) with tf.Session(config=sess_config) as sess: ########################### # Prepare for filewriter. # ########################### train_writer = tf.summary.FileWriter(train_dir, sess.graph) # if restart the training or there is no checkpoint in the train_dir if (not FLAGS.continue_training) or ( not tf.train.latest_checkpoint(train_dir)): ######################################### # Reinit pruned model variable # ######################################### variables_to_reinit = get_model_variables_within_scopes( reinit_scopes) print_list("Initialize pruned variables", variables_to_reinit) assign_ops = [] for v in variables_to_reinit: key = re.sub(net_name_scope_pruned, net_name_scope_checkpoint, v.op.name) if key in variables_init_value: value = variables_init_value.get(key) # print(key, value) assign_ops.append( tf.assign(v, tf.convert_to_tensor(value), validate_shape=True)) # v.set_shape(value.shape) else: raise ValueError( "Key not in variables_init_value, key=", key) assign_op = tf.group(*assign_ops) sess.run(assign_op) ################################################# # Restore unchanged model variable. # ################################################# variables_to_restore = { re.sub(net_name_scope_pruned, net_name_scope_checkpoint, v.op.name): v for v in get_model_variables_within_scopes() if v not in variables_to_reinit } print_list("restore model variables", variables_to_restore.values()) load_checkpoint(sess, FLAGS.checkpoint_path, var_list=variables_to_restore) else: ########################################### ## Restore all variables from checkpoint ## ########################################### variables_to_restore = get_global_variables_within_scopes() load_checkpoint(sess, train_dir, var_list=variables_to_restore) ################################################# # init unitialized global variable. # ################################################# variables_to_init = get_global_variables_within_scopes( sess.run(tf.report_uninitialized_variables())) print_list("init unitialized variables", variables_to_init) sess.run(tf.variables_initializer(variables_to_init)) init_global_step_value = sess.run(global_step) print('initial global step: ', init_global_step_value) if init_global_step_value >= FLAGS.max_number_of_steps: print('Exit: init_global_step_value (%d) >= FLAG.max_number_of_steps (%d)' \ %(init_global_step_value, FLAGS.max_number_of_steps)) return ########################### # Record CPU usage # ########################### mpstat_output_filename = os.path.join(train_dir, "cpu-usage.log") os.system("mpstat -P ALL 1 > " + mpstat_output_filename + " 2>&1 &") ########################### # Kicks off the training. # ########################### coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) saver = tf.train.Saver(max_to_keep=FLAGS.max_to_keep) print('HG: # of threads=', len(threads)) duration = 0 duration_cnt = 0 train_time = 0 train_only_cnt = 0 print("start to train at:", datetime.now()) for i in range(init_global_step_value, FLAGS.max_number_of_steps + 1): # run optional meta data, or summary, while run train tensor #if i < FLAGS.max_number_of_steps: if i > init_global_step_value: # train while run metadata if i % FLAGS.runmeta_every_n_steps == FLAGS.runmeta_every_n_steps - 1: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() loss_value = sess.run(train_tensor, options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%d-train' % i) # Create the Timeline object, and write it to a json file fetched_timeline = timeline.Timeline( run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format( ) with open( os.path.join(train_dir, 'timeline_' + str(i) + '.json'), 'w') as f: f.write(chrome_trace) # train while record summary elif i % FLAGS.summary_every_n_steps == 0: train_summary, loss_value = sess.run( [summary_op, train_tensor]) train_writer.add_summary(train_summary, i) # train only else: start_time = time.time() loss_value = sess.run(train_tensor) train_only_cnt += 1 train_time += time.time() - start_time duration_cnt += 1 duration += time.time() - start_time # log loss information if i % FLAGS.log_every_n_steps == 0 and duration_cnt > 0: log_frequency = duration_cnt examples_per_sec = log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / log_frequency) summary = tf.Summary() summary.value.add(tag='examples_per_sec', simple_value=examples_per_sec) summary.value.add(tag='sec_per_batch', simple_value=sec_per_batch) train_writer.add_summary(summary, i) format_str = ( '%s: step %d, loss = %.3f (%.1f examples/sec; %.3f sec/batch)' ) print(format_str % (datetime.now(), i, loss_value, examples_per_sec, sec_per_batch)) duration = 0 duration_cnt = 0 info = format_str % (datetime.now(), i, loss_value, examples_per_sec, sec_per_batch) write_detailed_info(info) else: # run only total loss when i=0 train_summary, loss_value = sess.run( [summary_op, total_loss]) #loss_value = sess.run(total_loss) train_writer.add_summary(train_summary, i) format_str = ('%s: step %d, loss = %.3f') print(format_str % (datetime.now(), i, loss_value)) info = format_str % (datetime.now(), i, loss_value) write_detailed_info(info) # record the evaluation accuracy is_last_step = (i == FLAGS.max_number_of_steps) if i % FLAGS.evaluate_every_n_steps == 0 or is_last_step: #run_meta = (i==FLAGS.evaluate_every_n_steps) test_accuracy, run_metadata = evaluate_accuracy( sess, coord, test_dataset.num_samples, test_images, test_labels, test_images, test_labels, correct_prediction, FLAGS.test_batch_size, run_meta=False) summary = tf.Summary() summary.value.add(tag='accuracy', simple_value=test_accuracy) train_writer.add_summary(summary, i) #if run_meta: # eval_writer.add_run_metadata(run_metadata, 'step%d-eval' % i) info = ('%s: step %d, test_accuracy = %.6f') % ( datetime.now(), i, test_accuracy) print(info) write_detailed_info(info) ########################### # Save model parameters . # ########################### #saver = tf.train.Saver(var_list=get_model_variables_within_scopes([net_name_scope_pruned+'/'])) save_path = saver.save( sess, os.path.join(train_dir, 'model.ckpt-' + str(i))) print("HG: Model saved in file: %s" % save_path) coord.request_stop() coord.join(threads) total_time = time.time() - tic train_speed = train_time * 1.0 / train_only_cnt train_time = train_speed * ( FLAGS.max_number_of_steps ) # - init_global_step_value) #/train_only_cnt info = "HG: training speed(sec/batch): %.6f\n" % (train_speed) info += "HG: training time(min): %.1f, total time(min): %.1f" % ( train_time / 60.0, total_time / 60.0) print(info) write_detailed_info(info)
# Train summaries for tensorboard train_summary_op = tf.summary.merge([loss_gen_summary,loss_discr_lab_summary, loss_discr_score_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries for tensorboard dev_summary_op = tf.summary.merge([loss_discr_score_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(batch_noise, batch_img_score, batch_score, batch_img_lab, batch_lab, train_gen): """ A single training step i.e. training the score discriminator, the label discriminator and if necessary the generator. Args: batch_noise: input batch for the generator batch_img_score: input batch for the score discriminator batch_img_lab: input batch for the label discriminator batch_score: scores corresponding the input score images batch_lab: labels corresponding the input label images train_gen(bool): whether or not to train the generator
def get_vars(self, scope): realScope = self.directory + "/" + scope nnVars = [x for x in tf.global_variables() if realScope in x.name] return sorted(nnVars, key=lambda v: v.name)
def train(x_train, y_train, vocab_processor, x_dev, y_dev, embedding): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) if params.model_version == 'TextCNN': model = TextCNN( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, params.filter_sizes.split(","))), num_filters=params.num_filters, l2_reg_lambda=params.l2_reg_lambda) elif params.model_version == 'CNN_LSTM': model = CNN_LSTM( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, params.filter_sizes.split(","))), num_hidden = params.num_hidden, num_filters=params.num_filters, l2_reg_lambda=params.l2_reg_lambda) elif params.model_version == 'LSTM_CNN': model = LSTM_CNN( sequence_length = x_train.shape[1], num_classes = y_train.shape[1], vocab_size = len(vocab_processor.vocabulary_), embedding_size = FLAGS.embedding_dim, filter_sizes= list(map(int, params.filter_sizes.split(","))), num_filters=params.num_filters, l2_reg_lambda=params.l2_reg_lambda, num_hidden=params.num_hidden) elif params.model_version == 'LSTM': model = LSTM( sequence_length = x_train.shape[1], num_classes = y_train.shape[1], vocab_size = len(vocab_processor.vocabulary_), num_hidden = params.num_hidden, embedding_size = params.embedding_dim, l2_reg_lambda=params.l2_reg_lambda) elif params.model_version == 'Bi_LSTM': model = Bi_LSTM( sequence_length = x_train.shape[1], num_classes = y_train.shape[1], vocab_size = len(vocab_processor.vocabulary_), num_hidden = params.num_hidden, embedding_size = params.embedding_dim, l2_reg_lambda=params.l2_reg_lambda) else: raise AttributeError("No model found at model_dir") # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(params.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = '{}_'.format(params.model_version) + str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) logging.critical("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=params.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables init = tf.global_variables_initializer() sess.run(model.embedding_init, feed_dict={model.embedding_placeholder: embedding}) sess.run(init) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { model.input_x: x_batch, model.input_y: y_batch, model.dropout_keep_prob: params.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() logging.critical("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { model.input_x: x_batch, model.input_y: y_batch, model.dropout_keep_prob: params.dropout_keep_prob } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, model.loss, model.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() logging.critical("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), params.batch_size, params.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % params.evaluate_every == 0: logging.info("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) logging.info("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) logging.info("Saved model checkpoint to {}\n".format(path))
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = tf.train.MomentumOptimizer(learning_rate, 0.9) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) reuse_variables = True grads = opt.compute_gradients(total_loss) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() # save moving average variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # batch norm updates with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables()) summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: print('continue training from previous checkpoint') ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) saver.restore(sess, ckpt) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = icdar.get_batch(num_workers=FLAGS.num_readers, input_size=FLAGS.input_size, batch_size=FLAGS.batch_size_per_gpu * len(gpus)) start = time.time() for step in range(FLAGS.max_steps): data = next(data_generator) ml, tl, _ = sess.run( [model_loss, total_loss, train_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) if np.isnan(tl): print('Loss diverged, stop training') break if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu * len(gpus)) / (time.time() - start) start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second' .format(step, ml, tl, avg_time_per_step, avg_examples_per_second)) if step % FLAGS.save_checkpoint_steps == 0: saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) if step % FLAGS.save_summary_steps == 0: _, tl, summary_str = sess.run( [train_op, total_loss, summary_op], feed_dict={ input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4] }) summary_writer.add_summary(summary_str, global_step=step)
def run(): # Create the data loader object. data_loader = utils.DataLoader(args.batch_size, dataset_dirs=args.train_dataset, is_argumentation=args.data_argumentation, target_image_size=args.target_image_size) [args.norm_mean, args.norm_std] = [data_loader.norm_mean, data_loader.norm_std] # Create a Tensorflow Model model = Model(args) # Initialize a TensorFlow session with tf.Session() as sess: # Add all the variables to the list of variables to be saved saver = tf.train.Saver(tf.global_variables()) # Initialize all the variables in the graph sess.run(tf.global_variables_initializer()) '''Summaries''' train_writer = tf.summary.FileWriter( os.path.join(args.model_dir, 'log'), sess.graph) tf.summary.scalar('total_loss', model.total_loss) tf.summary.scalar('trans_loss', model.trans_loss) tf.summary.scalar('rot_loss', model.rot_loss) tf.summary.scalar('rot_loss0', model.rot_loss0) tf.summary.scalar('lamda_sigmoid', model.lamda_weights_sigmoid) all_summaries = tf.summary.merge_all() if os.path.isfile(os.path.join(args.model_dir, 'config.pkl')): '''Train from saved model''' # Get the checkpoint state to load the model from ckpt_file = os.path.join(args.model_dir, 'model-53.ckpt-48000') print('loading model: ', ckpt_file) saver = tf.train.Saver() # Restore the model at the checpoint saver.restore(sess, ckpt_file) print('model restored.') # Assign the global step sess.run(tf.assign(model.global_step, 0)) '''Training Loop''' for e in range(args.num_epochs): # Assign the learning rate (decayed acc. to the epoch number) sess.run( tf.assign( model.lr, max(args.learning_rate_clip, args.learning_rate * (args.decay_rate**e)))) # shuffle_data data_loader.shuffle_data(mode='train') # For each batch in this epoch train_loss = 0. for b in range(data_loader.num_batches): # Tic start = time.time() # Get the source and target data of the current batch # x has the source data, y has the target data x, y = data_loader.next_batch(b) feed = {model.input_data: x, model.target_data: y} # Fetch the loss of the model on this batch, the final LSTM state from the session batch_total_loss, batch_trans_loss, batch_rot_loss, global_step, summaries, _ = sess.run( [ model.total_loss, model.trans_loss, model.rot_loss, model.global_step, all_summaries, model.train_op ], feed) train_writer.add_summary(summaries, e * data_loader.num_batches + b) # Toc end = time.time() # Print epoch, batch, loss and time taken train_loss += batch_total_loss if b % args.display == 0: print( "{}/{} (epoch {}), train_loss = {}, time/batch = {:.3f}, learning rate = {:.9f}" .format(e * data_loader.num_batches + b, args.num_epochs * data_loader.num_batches, e, train_loss / (b + 1), end - start, sess.run(model.lr))) '''Save Model''' # Save the model if the current epoch and batch number match the frequency if (e * data_loader.num_batches + b + 1) % args.save_every == 0 and ( (e * data_loader.num_batches + b) > 0): checkpoint_path = os.path.join(args.model_dir, 'model-{}.ckpt'.format(e)) saver = tf.train.Saver() saver.save(sess, checkpoint_path, global_step=global_step) print("model saved to {}".format(checkpoint_path)) # Save the arguments int the config file with open(os.path.join(args.model_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f)
def __init__(self, config, use_lstm=False, num_samples=512, forward=False, scope_name='gen_seq2seq', dtype=tf.float32): self.scope_name = scope_name with tf.variable_scope(self.scope_name): self.source_vocab_size = config.vocab_size self.target_vocab_size = config.vocab_size self.buckets = config.buckets self.learning_rate = tf.Variable(float(config.learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * config.learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.batch_size = config.batch_size self.emb_dim = config.emb_dim self.num_layers = config.num_layers self.max_gradient_norm = config.max_gradient_norm #self.up_reward = tf.placeholder(tf.bool, name="up_reward") self.mc_search = tf.placeholder(tf.bool, name="mc_search") self.forward_only = tf.placeholder(tf.bool, name="forward_only") # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.GRUCell(self.emb_dim) if use_lstm: single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_dim) cell = single_cell if self.num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * self.num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return rl_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=self.source_vocab_size, num_decoder_symbols=self.target_vocab_size, embedding_size=self.emb_dim, output_projection=output_projection, feed_previous=do_decode, mc_search=self.mc_search, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange( self.buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(self.buckets[-1][1] + 1): self.decoder_inputs.append( tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append( tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) self.reward = [ tf.placeholder(tf.float32, name="reward_%i" % i) for i in range(len(self.buckets)) ] # Our targets are decoder inputs shifted by one. targets = [ self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1) ] self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, self.buckets, self.emb_dim, self.batch_size, lambda x, y: seq2seq_f( x, y, tf.select(self.forward_only, True, False)), output_projection=output_projection, softmax_loss_function=softmax_loss_function) with tf.name_scope("gradient_descent"): self.gradient_norms = [] self.updates = [] self.gen_params = [ p for p in tf.trainable_variables() if self.scope_name in p.name ] opt = tf.train.GradientDescentOptimizer(self.learning_rate) for b in xrange(len(self.buckets)): adjusted_losses = tf.mul(self.losses[b], self.reward[b]) gradients = tf.gradients(adjusted_losses, self.gen_params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, self.max_gradient_norm) self.gradient_norms.append(norm) self.updates.append( opt.apply_gradients(zip(clipped_gradients, self.gen_params), global_step=self.global_step)) self.gen_variables = [ k for k in tf.global_variables() if self.scope_name in k.name ] self.saver = tf.train.Saver(self.gen_variables)
def train_textCNN2Input(x_train, x_1_train,y_train, vocab_processor, x_dev,x_1_dev, y_dev, parameter,gpu_id): path=None import tensorflow as tf with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=parameter["allow_soft_placement"], log_device_placement=parameter["log_device_placement"]) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN2Input( sequence_length=x_train.shape[1], sequence_length_1=x_1_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=parameter["embedding_dim"], filter_sizes=list(map(int, parameter["filter_sizes"].split(","))), num_filters=parameter["num_filters"], l2_reg_lambda=parameter["l2_reg_lambda"], gpu=gpu_id) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(parameter["learning_rate"]) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "textCNN2Inputs_runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=parameter["num_checkpoints"]) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "textCNN_vocab")) # Initialize all variables if "init_checkpoint" in parameter: sess.run(tf.train.init_from_checkpoint(parameter['init_checkpoint'],{'/':'/'})) else: sess.run(tf.global_variables_initializer()) def train_step(x_batch, x_1_batch,y_batch,dropout_keep_prob): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_x_1: x_1_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch,x_1_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_x_1: x_1_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) if writer: writer.add_summary(summaries, step) # Generate batches batches = batch_iter( list(zip(x_train,x_1_train, y_train)), parameter["batch_size"], parameter["num_epochs"]) # Training loop. For each batch... for batch in batches: x_batch,x_1_batch, y_batch = zip(*batch) train_step(x_batch,x_1_batch, y_batch, parameter["dropout_keep_prob"]) current_step = tf.train.global_step(sess, global_step) if current_step % parameter["evaluate_every"] == 0: print("\nEvaluation:") dev_step(x_dev,x_1_dev, y_dev, writer=dev_summary_writer) print("") if current_step % parameter["checkpoint_every"] == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) return path
def forward(self): config = self.config N, PL, QL, d, nh = config.batch_size if not self.demo else 1, self.c_maxlen, self.q_maxlen, config.hidden, config.num_heads with tf.variable_scope("Input_Embedding_Layer"): c_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_mat, self.c), 1.0 - self.dropout) q_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_mat, self.q), 1.0 - self.dropout) c_emb = highway(c_emb, size=d, scope="highway", dropout=self.dropout, reuse=None) q_emb = highway(q_emb, size=d, scope="highway", dropout=self.dropout, reuse=True) with tf.variable_scope("Embedding_Encoder_Layer"): c = residual_block(c_emb, num_blocks=1, num_conv_layers=4, kernel_size=7, mask=self.c_mask, num_filters=d, num_heads=nh, seq_len=self.c_len, scope="Encoder_Residual_Block", bias=False, dropout=self.dropout) q = residual_block( q_emb, num_blocks=1, num_conv_layers=4, kernel_size=7, mask=self.q_mask, num_filters=d, num_heads=nh, seq_len=self.q_len, scope="Encoder_Residual_Block", reuse=True, # Share the weights between passage and question bias=False, dropout=self.dropout) with tf.variable_scope("Context_to_Query_Attention_Layer"): S = optimized_trilinear_for_attention([c, q], self.c_maxlen, self.q_maxlen, input_keep_prob=1.0 - self.dropout) mask_q = tf.expand_dims(self.q_mask, 1) S_ = tf.nn.softmax(mask_logits(S, mask=mask_q)) mask_c = tf.expand_dims(self.c_mask, 2) S_T = tf.transpose( tf.nn.softmax(mask_logits(S, mask=mask_c), dim=1), (0, 2, 1)) self.c2q = tf.matmul(S_, q) self.q2c = tf.matmul(tf.matmul(S_, S_T), c) attention_outputs = [c, self.c2q, c * self.c2q, c * self.q2c] with tf.variable_scope("Model_Encoder_Layer"): inputs = tf.concat(attention_outputs, axis=-1) self.enc = [conv(inputs, d, name="input_projection")] for i in range(3): if i % 2 == 0: # dropout every 2 blocks self.enc[i] = tf.nn.dropout(self.enc[i], 1.0 - self.dropout) self.enc.append( residual_block(self.enc[i], num_blocks=7, num_conv_layers=2, kernel_size=5, mask=self.c_mask, num_filters=d, num_heads=nh, seq_len=self.c_len, scope="Model_Encoder", bias=False, reuse=True if i > 0 else None, dropout=self.dropout)) with tf.variable_scope("Output_Layer"): start_logits = tf.squeeze( conv(tf.concat([self.enc[1], self.enc[2]], axis=-1), 1, bias=False, name="start_pointer"), -1) end_logits = tf.squeeze( conv(tf.concat([self.enc[1], self.enc[3]], axis=-1), 1, bias=False, name="end_pointer"), -1) self.logits = [ mask_logits(start_logits, mask=self.c_mask), mask_logits(end_logits, mask=self.c_mask) ] logits1, logits2 = [l for l in self.logits] outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, -1) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) if config.l2_norm is not None: variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) l2_loss = tf.contrib.layers.apply_regularization( regularizer, variables) self.loss += l2_loss if config.decay is not None: self.var_ema = tf.train.ExponentialMovingAverage(config.decay) ema_op = self.var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.loss = tf.identity(self.loss) self.assign_vars = [] for var in tf.global_variables(): v = self.var_ema.average(var) if v: self.assign_vars.append(tf.assign(var, v))
def initialize(): """Initialize all the uninitialized variables in the global scope.""" new_variables = set(tf.global_variables()) - ALREADY_INITIALIZED get_session().run(tf.variables_initializer(new_variables)) ALREADY_INITIALIZED.update(new_variables)
def train(self): with tf.Session() as sess: tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, self.__bert_checkpoint_path) print("init bert model params") tf.train.init_from_checkpoint(self.__bert_checkpoint_path, assignment_map) print("init bert model params done") sess.run(tf.variables_initializer(tf.global_variables())) current_step = 0 start = time.time() for epoch in range(self.config["epochs"]): print("----- Epoch {}/{} -----".format(epoch + 1, self.config["epochs"])) for batch in self.data_obj.next_batch(self.t_in_ids, self.t_in_masks, self.t_seg_ids, self.t_lab_ids, self.t_seq_len): loss, true_y, predictions = self.model.train( sess, batch, self.config["keep_prob"]) f1, precision, recall = gen_metrics( pred_y=predictions, true_y=true_y, label_to_index=self.lab_to_idx) print( "train: step: {}, loss: {}, recall: {}, precision: {}, f1: {}" .format(current_step, loss, recall, precision, f1)) current_step += 1 if self.data_obj and current_step % self.config[ "checkpoint_every"] == 0: eval_losses = [] eval_recalls = [] eval_precisions = [] eval_f1s = [] for eval_batch in self.data_obj.next_batch( self.e_in_ids, self.e_in_masks, self.e_seg_ids, self.e_lab_ids, self.e_seq_len): eval_loss, eval_true_y, eval_predictions = self.model.eval( sess, eval_batch) eval_losses.append(eval_loss) f1, precision, recall = gen_metrics( pred_y=eval_predictions, true_y=eval_true_y, labels=self.lab_to_idx) eval_recalls.append(recall) eval_precisions.append(precision) eval_f1s.append(f1) print("\n") print( "eval: loss: {}, recall: {}, precision: {}, f1: {}" .format(mean(eval_losses), mean(eval_recalls), mean(eval_precisions), mean(eval_f1s))) print("\n") if self.config["ckpt_model_path"]: save_path = self.config["ckpt_model_path"] if not os.path.exists(save_path): os.makedirs(save_path) model_save_path = os.path.join( save_path, self.config["model_name"]) self.model.saver.save(sess, model_save_path, global_step=current_step) end = time.time() print("total train time: ", end - start)
def train(opt, x_train, y_train, x_test, y_test): tf.set_random_seed(opt.seed) model = LSTM(opt) tf_loss, tf_rcst_loss, tf_logit_outputs, tf_pixels, tf_onehot_labels = model.build_model( ) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) # restore from the pre-trained model saver = tf.train.Saver(max_to_keep=opt.max_epochs, write_version=1) saver.restore(sess, opt.rcst_model_base_path) tf_learning_rate = tf.placeholder(tf.float32) with tf.variable_scope(tf.get_variable_scope(), reuse=False): train_op = tf.train.AdamOptimizer(tf_learning_rate).minimize(tf_loss) uninitialized_vars = [] for var in tf.global_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) if len(uninitialized_vars): init_new_vars_op = tf.variables_initializer(uninitialized_vars) sess.run(init_new_vars_op) iter_cnt = 0 for epoch in range(0, opt.max_epochs): if epoch == 0: current_learning_rate = opt.learning_rate elif epoch != 0 and epoch % opt.learning_rate_decay_every == 0: current_learning_rate = current_learning_rate * opt.learning_rate_decay_rate # training for start, end in zip( range(0, x_train.shape[0], opt.batch_size), range(opt.batch_size, x_train.shape[0], opt.batch_size)): start_time = time.time() current_batch_pixels_train = x_train[start:end] current_batch_labels_train = y_train[start:end] feed_dict = { tf_learning_rate: current_learning_rate, tf_pixels: current_batch_pixels_train, tf_onehot_labels: current_batch_labels_train } _, loss_val, rcst_loss_val, logit_outputs = sess.run( [train_op, tf_loss, tf_rcst_loss, tf_logit_outputs], feed_dict) # 计算训练精度 pred_y = np.argmax(logit_outputs, axis=1) true_y = np.argmax(current_batch_labels_train, axis=1) current_acc = sum(pred_y == true_y) / float(opt.batch_size) iter_cnt += 1 end_time = time.time() print( "iter {:4d} epoch {:3d} lr {:.5f} loss {:.4f} rcst_loss {:.4f} train_acc {:.4f} time batch {:.4f}" .format(iter_cnt, epoch, current_learning_rate, loss_val, rcst_loss_val, current_acc, end_time - start_time)) # validation if np.mod(epoch, 1) == 0: print("epoch {} is done, saving the model ...".format(epoch)) saver.save(sess, os.path.join(opt.rcst_model_save_path, 'model_epoch'), global_step=epoch) true_cnt = 0 test_batch_cnt = 0 for start, end in zip( range(0, x_test.shape[0], opt.batch_size), range(opt.batch_size, x_test.shape[0], opt.batch_size)): current_batch_pixels_test = x_test[start:end] current_batch_labels_test = y_test[start:end] feed_dict = { tf_learning_rate: current_learning_rate, tf_pixels: current_batch_pixels_test, tf_onehot_labels: current_batch_labels_test } loss_test, logit_outputs = sess.run( [tf_loss, tf_logit_outputs], feed_dict) # 计算验证精度 pred_y = np.argmax(logit_outputs, axis=1) true_y = np.argmax(current_batch_labels_test, axis=1) true_cnt += sum(pred_y == true_y) test_batch_cnt += 1 test_acc = true_cnt / float(test_batch_cnt * opt.batch_size) print("epoch {} test_acc {:.4f} test_num: {}".format( epoch, test_acc, test_batch_cnt * opt.batch_size))
def train(self): # Create session tfconfig = tf.ConfigProto( allow_soft_placement=True ) # allow_soft_placement = true : select GPU automatically tfconfig.gpu_options.allow_growth = True # tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.90 sess = tf.Session(config=tfconfig) with sess.graph.as_default(): tf.set_random_seed(cfg.FLAGS.rng_seed) layers = self.net.create_architecture(sess, "TRAIN", self.imdb.num_classes, tag='default') loss = layers['total_loss'] lr = tf.Variable(cfg.FLAGS.learning_rate, trainable=False) momentum = cfg.FLAGS.momentum optimizer = tf.train.MomentumOptimizer(lr, momentum) gvs = optimizer.compute_gradients(loss) # Double bias # Double the gradient of the bias if set if cfg.FLAGS.double_bias: final_gvs = [] with tf.variable_scope('Gradient_Mult'): for grad, var in gvs: scale = 1. if cfg.FLAGS.double_bias and '/biases:' in var.name: scale *= 2. if not np.allclose(scale, 1.0): grad = tf.multiply(grad, scale) final_gvs.append((grad, var)) train_op = optimizer.apply_gradients(final_gvs) else: train_op = optimizer.apply_gradients(gvs) # We will handle the snapshots ourselves self.saver = tf.train.Saver(max_to_keep=100000) # Write the train and validation information to tensorboard writer = tf.summary.FileWriter('default/', sess.graph) # valwriter = tf.summary.FileWriter(self.tbvaldir) # Load weights # Fresh train directly from ImageNet weights print('Loading initial model weights from {:s}'.format( cfg.FLAGS.pretrained_model)) variables = tf.global_variables() # Initialize all variables first sess.run(tf.variables_initializer(variables, name='init')) var_keep_dic = self.get_variables_in_checkpoint_file( cfg.FLAGS.pretrained_model) # Get the variables to restore, ignorizing the variables to fix variables_to_restore = self.net.get_variables_to_restore( variables, var_keep_dic, sess, cfg.FLAGS.pretrained_model) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, cfg.FLAGS.pretrained_model) print('Loaded.') # Need to fix the variables before loading, so that the RGB weights are changed to BGR # For VGG16 it also changes the convolutional weights fc6 and fc7 to # fully connected weights self.net.fix_variables(sess, cfg.FLAGS.pretrained_model) print('Fixed.') sess.run(tf.assign(lr, cfg.FLAGS.learning_rate)) last_snapshot_iter = 0 timer = Timer() iter = last_snapshot_iter + 1 last_summary_time = time.time() print('START TRAINING: ...') while iter < cfg.FLAGS.max_iters + 1: try: # Learning rate if iter == cfg.FLAGS.step_size + 1: # Add snapshot here before reducing the learning rate # self.snapshot(sess, iter) sess.run( tf.assign(lr, cfg.FLAGS.learning_rate * cfg.FLAGS.gamma)) timer.tic() # Get training data, one batch at a time blobs = self.data_layer.forward() iter += 1 # Compute the graph without summary if iter % 100 == 0: rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = self.net.train_step_with_summary( sess, blobs, train_op) timer.toc() run_metadata = tf.RunMetadata() writer.add_run_metadata(run_metadata, 'step%03d' % iter) writer.add_summary(summary, iter) else: rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step( sess, blobs, train_op) timer.toc() # Display training information if iter % (cfg.FLAGS.display) == 0: print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n ' '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \ (iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box)) print('speed: {:.3f}s / iter'.format(timer.average_time)) if iter % cfg.FLAGS.snapshot_iterations == 0: self.snapshot(sess, iter) except Exception as e: print(e)
def build_attack(self, gradient_names, init_vars=True, max_to_keep=5, lr=None): def str_match_list(v, vl): for e in vl: if e in v: return True return False with self.model.graph.as_default(): input_tensor = {} for input_name in self.model.inputs_shape: input_tensor[input_name] = \ tf.Variable( tf.random.normal( [self.batch_size] + list(self.model.inputs_shape[input_name]), dtype=tf.float32), name=input_name ) self.model.output[input_name] = input_tensor[input_name].name target_tensor = {} target_tensor_raw = {} for target_name in self.model.targets_shape: target_tensor_raw[target_name] = tf.Variable(tf.random.normal( [self.batch_size] + list(self.model.targets_shape[target_name]), dtype=tf.float32), name=target_name) target_tensor[target_name] = tf.nn.softmax( target_tensor_raw[target_name], axis=-1) self.model.output[target_name] = target_tensor[target_name].cid loss = self.model.forward(inputs=input_tensor, targets=target_tensor, trainable=False) assert loss is not None gradients = [[e, tf.gradients(loss, e)[0]] for e in tf.global_variables() if str_match_list(e.cid, gradient_names)] optimizer = tf.train.GradientDescentOptimizer(lr or self.model.lr) attack_loss = [] for v, g in gradients: tmp_g = tf.compat.v1.placeholder(tf.float32, g.shape, g.cid.split(':')[0]) self.model.input[v.cid] = tmp_g.cid self.model.output[v.cid] = g.cid self.model.output[v.cid + '_y'] = tf.gradients( tf.reduce_sum(tf.square(g)), target_tensor['y'])[0].cid assert g.shape == tmp_g.shape attack_loss.append(tf.reduce_sum(tf.square(g - tmp_g))) attack_loss = tf.reduce_sum(attack_loss) mask_tensor_x = tf.concat([ tf.zeros([1] + list(self.model.inputs_shape['x'])), tf.ones([1] + list(self.model.inputs_shape['x'])) ], axis=0) mask_tensor_y = tf.concat([ tf.zeros([1] + list(self.model.targets_shape['y'])), tf.ones([1] + list(self.model.targets_shape['y'])) ], axis=0) batch_index = tf.compat.v1.placeholder(tf.int32, [ self.batch_size, ], name='batch_index') self.model.input['batch_index'] = batch_index.cid update_mask_x = tf.gather(mask_tensor_x, batch_index, axis=0) update_mask_y = tf.gather(mask_tensor_y, batch_index, axis=0) self.model.output['attack_loss'] = attack_loss.cid self.model.output['loss'] = loss.cid optimizer_gradients = optimizer.compute_gradients( attack_loss, [input_tensor['x'], target_tensor_raw['y']]) optimizer_gradients = [list(e) for e in optimizer_gradients] optimizer_gradients[0][0] = tf.multiply(update_mask_x, optimizer_gradients[0][0]) optimizer_gradients[1][0] = tf.multiply(update_mask_y, optimizer_gradients[1][0]) attack_train_op = optimizer.apply_gradients(optimizer_gradients) self.model.op['attack_train_op'] = attack_train_op.cid ######################## # TMP self.model.output['grad_x'] = tf.gradients( attack_loss, input_tensor['x'])[0].cid self.model.output['grad_y'] = tf.gradients( attack_loss, target_tensor['y'])[0].cid ######################## self.model.build_essential(init_vars=init_vars, max_to_keep=max_to_keep)
# Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) # Write vocabulary vocab_processor.save(os.path.join(checkpoint_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) print("init all variables") graph_def = tf.get_default_graph().as_graph_def() graphpb_txt = str(graph_def) with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f: f.write(graphpb_txt) if FLAGS.word2vec_model : # initial matrix with random uniform
def main(): print('Starting...') model_dir = ModelDir(OPTS.model) model = model_dir.get_model() if OPTS.elmo: # Fix absolute path names from other codalab runs lm = model.lm_model if lm.lm_vocab_file.startswith('/0x'): lm.lm_vocab_file = os.sep.join(lm.lm_vocab_file.split(os.sep)[2:]) if lm.options_file.startswith('/0x'): lm.options_file = os.sep.join(lm.options_file.split(os.sep)[2:]) if lm.weight_file.startswith('/0x'): lm.weight_file = os.sep.join(lm.weight_file.split(os.sep)[2:]) if lm.weight_file.startswith('/0x'): lm.embed_weights_file = os.sep.join(lm.embed_weights_file.split(os.sep)[2:]) lm.embed_weights_file = None #if not isinstance(model, ParagraphQuestionModel): # raise ValueError("This script is built to work for ParagraphQuestionModel models only") input_data, vocab = read_input_data(model) print('Loading word vectors...') model.set_input_spec(ParagraphAndQuestionSpec(batch_size=None), vocab) print('Starting Tensorflow session...') config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True sess = tf.Session(config=config) with sess.as_default(): prediction = model.get_prediction() # Take 0-th here because we know we only truncate to one paragraph start_logits_tf = prediction.start_logits[0] end_logits_tf = prediction.end_logits[0] none_logit_tf = prediction.none_logit[0] if OPTS.elmo: # See elmo/run_on_user_text.py all_vars = tf.global_variables() + tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS) lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")} vars = [x for x in all_vars if x.name not in lm_var_names] model_dir.restore_checkpoint(sess, vars) sess.run(tf.variables_initializer([x for x in all_vars if x.name in lm_var_names])) else: model_dir.restore_checkpoint(sess) pred_obj = {} na_prob_obj = {} pred_always_ans_obj = {} analysis_obj = {} for context_raw, context_toks, ex in tqdm(input_data): encoded = model.encode(ex, is_train=False) start_logits, end_logits, none_logit = sess.run( [start_logits_tf, end_logits_tf, none_logit_tf], feed_dict=encoded) # beam, p_na = logits_to_probs( # context_raw, context_toks, start_logits, end_logits, none_logit, # beam_size=DEFAULT_BEAM_SIZE) beam, p_na = logits_to_probs( context_raw, context_toks, start_logits, end_logits, none_logit, beam_size=10) # print(beam[0][0]) ans = beam[0][0] # start, end = beam[0][2],beam[0][3] non_empty_ans = [x[0] for x in beam if x[0]][0] qid = ex[0].question_id pred_obj[qid] = ans na_prob_obj[qid] = p_na pred_always_ans_obj[qid] = non_empty_ans analysis_obj[qid] = [{'answer': b[0], 'span':[b[2], b[3]], 'prob':b[1]} for b in beam] # print(analysis_obj[qid]) with open(OPTS.output_file, 'w') as f: json.dump(pred_obj, f) if OPTS.na_prob_file: with open(OPTS.na_prob_file, 'w') as f: json.dump(na_prob_obj, f) if OPTS.always_answer_file: with open(OPTS.always_answer_file, 'w') as f: json.dump(pred_always_ans_obj, f) if OPTS.analysis_file: with open(OPTS.analysis_file, 'w') as f: json.dump(analysis_obj, f, indent=2)
def __init__(self, board_width, board_height, block, init_model=None, transfer_model=None, cuda=False): print() print('building network ...') print() self.planes_num = 9 # feature planes self.nb_block = block # resnet blocks if not cuda: # use GPU or not ,if there are a few GPUs,it's better to assign GPU ID os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" self.board_width = board_width self.board_height = board_height # Make a session print(tf.__version__) self.session = tf.InteractiveSession() # 1. Input: self.input_states = tf.placeholder( tf.float32, shape=[None, self.planes_num, board_height, board_width]) self.action_fc_train, self.evaluation_fc2_train = self.network( input_states=self.input_states, reuse=False, is_train=True) self.action_fc_test, self.evaluation_fc2_test = self.network( input_states=self.input_states, reuse=True, is_train=False) self.network_all_params = tf.global_variables() # Define the Loss function # 1. Label: the array containing if the game wins or not for each state self.labels = tf.placeholder(tf.float32, shape=[None, 1]) # 2. Predictions: the array containing the evaluation score of each state # which is self.evaluation_fc2 # 3-1. Value Loss function self.value_loss = tf.losses.mean_squared_error( self.labels, self.evaluation_fc2_train) # 3-2. Policy Loss function self.mcts_probs = tf.placeholder( tf.float32, shape=[None, board_height * board_width]) self.policy_loss = tf.negative( tf.reduce_mean( tf.reduce_sum( tf.multiply(self.mcts_probs, self.action_fc_train), 1))) # 3-3. L2 penalty (regularization) l2_penalty_beta = 1e-4 vars = tf.trainable_variables() l2_penalty = l2_penalty_beta * tf.add_n( [tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()]) # 3-4 Add up to be the Loss function self.loss = self.value_loss + self.policy_loss + l2_penalty # Define the optimizer we use for training self.learning_rate = tf.placeholder(tf.float32) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize(self.loss) # calc policy entropy, for monitoring only self.entropy = tf.negative( tf.reduce_mean( tf.reduce_sum( tf.exp(self.action_fc_test) * self.action_fc_test, 1))) # self.network_params = tf.trainable_variables() self.network_params = tf.global_variables() # for transfer learning use # For saving and restoring self.saver = tf.train.Saver() self.restore_params = [] for params in self.network_params: # print(params,'**'*100) if ('conv2d' in params.name) or ('resnet' in params.name) or ( 'bn' in params.name) or ('flatten_layer' in params.name): self.restore_params.append(params) self.saver_restore = tf.train.Saver(self.restore_params) init = tf.global_variables_initializer() self.session.run(init) if init_model is not None: self.restore_model(init_model) print('model loaded!') elif transfer_model is not None: self.saver_restore.restore(self.session, transfer_model) print('transfer model loaded !') else: print('can not find saved model, learn from scratch !') # self.print_params() # opponent net for evaluating self.action_fc_train_oppo, self.evaluation_fc2_train_oppo = self.network( input_states=self.input_states, reuse=False, is_train=True, label='_oppo') self.action_fc_test_oppo, self.evaluation_fc2_test_oppo = self.network( input_states=self.input_states, reuse=True, is_train=False, label='_oppo') self.network_oppo_all_params = tf.global_variables( )[len(tf.global_variables()) - len(self.network_all_params):]
[loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch,
def train(self, sess, num_train_patterns, num_valid_patterns): print('Training...') ckpt = tf.train.get_checkpoint_state(self.config.train_dir) init_op = tf.global_variables_initializer() sess.run(init_op) if ckpt and ckpt.model_checkpoint_path: #self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=10.0, max_to_keep=2) self.saver = tf.train.Saver(max_to_keep=2) self.saver.restore(sess, ckpt.model_checkpoint_path) self.step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: #self.saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=10.0, max_to_keep=2) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) self.step = 0 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) self.summarizer = tf.summary.FileWriter(self.config.train_dir, sess.graph) print_interval = 10 # steps update_interval = 10 # steps summary_interval = 200 # steps validate_interval = 200 # steps checkpoint_interval = 1000 # steps print('Start iterating...') start_time = time.time() batch_grads = None while True: # compute epochs epochs = 1.0*(self.step+1)*self.config.batch_size/num_train_patterns do_print = ((self.step+1) % print_interval == 0) do_update = ((self.step+1) % update_interval == 0) do_summary = ((self.step+1) % summary_interval == 0) do_validate = ((self.step+1) % validate_interval == 0) do_checkpoint = ((self.step+1) % checkpoint_interval == 0) # training networks step_grads,_,step_losses = sess.run([self.grad_list, self.bn_op, self.losses]) step_grads = [np.nan_to_num(grad) for grad in step_grads] # handle nan batch_grads = self.cumulate_gradients(batch_grads, step_grads) step_losses = step_losses # update gradients if do_update: grad_dict = {} for k in range(len(self.grad_placeholder)): grad_dict[self.grad_placeholder[k][0]] = batch_grads[k] / update_interval sess.run(self.update_op, feed_dict=grad_dict) batch_grads = None # validation if do_validate: self.validate_loss(sess, num_valid_patterns) # log if do_summary: summary_str = sess.run(self.train_summary_op) self.summarizer.add_summary(summary_str, self.step) if do_checkpoint: self.saver.save(sess, os.path.join(self.config.train_dir,'model.ckpt'), global_step=self.step+1) if do_print: now_time = time.time() batch_duration = now_time - start_time start_time = now_time log_str = 'Step %7d: %5.1f sec, epoch: %7.2f, loss: %7.3g %7.3g %7.3g %7.3g %7.3g\n' \ % (self.step+1, batch_duration, epochs, step_losses[0], step_losses[1], step_losses[2], step_losses[3], step_losses[4]) print(log_str) log_file_name = os.path.join(self.config.train_dir, 'log.txt') with open(log_file_name, 'a') as log_file: log_file.write(log_str) if epochs >= self.config.max_epochs: break self.step += 1 coord.request_stop() coord.join(threads)
def main(): print('\nBegin to generate pictures ...\n') Format = '.jpg' for i in range(20): file_name1 = path1 + '/' + str(i + 1) + '.jpg' file_name2 = path2 + '/' + str(i + 1) + '.jpg' img1 = imread(file_name1) / 255.0 img2 = imread(file_name2) / 255.0 print('file1:', file_name1) print('file2:', file_name2) Shape1 = img1.shape if len(Shape1) > 2: img1 = img1[:, :, 0] * 0.3 + img1[:, :, 1] * 0.59 + img1[:, :, 2] * 0.11 Shape2 = img2.shape h = Shape2[0] w = Shape2[1] if len(Shape2) > 2: img2 = img2[:, :, 0] * 0.3 + img2[:, :, 1] * 0.59 + img2[:, :, 2] * 0.11 img1 = transform.resize(img1, (h, w)) img2 = transform.resize(img2, (h, w)) img1 = img1.reshape([1, h, w, 1]) img2 = img2.reshape([1, h, w, 1]) with tf.Graph().as_default(), tf.Session() as sess: # SOURCE1 = tf.placeholder(tf.float32, shape = shape, name = 'SOURCE1') # SOURCE2 = tf.placeholder(tf.float32, shape = shape, name = 'SOURCE2') # print('SOURCE1 shape:', SOURCE1.shape) M = Model(BATCH_SIZE=1, INPUT_H=h, INPUT_W=w, is_training=False) # G = Generator('Generator') # output_image= G.transform(I1=SOURCE1, I2=SOURCE2) # restore the trained model and run the style transferring g_list = tf.global_variables() # for i in g_list: # print(i.name) # g_list=tf.trainable_variables() saver = tf.train.Saver(var_list=g_list) model_save_path = MODEL_SAVE_PATH print(model_save_path) sess.run(tf.global_variables_initializer()) saver.restore(sess, model_save_path) output = sess.run(M.generated_img, feed_dict={M.SOURCE1: img1, M.SOURCE2: img2}) output = output[0, :, :, 0] fig = plt.figure() f1 = fig.add_subplot(311) f2 = fig.add_subplot(312) f3 = fig.add_subplot(313) f1.imshow(img1[0, :, :, 0], cmap='gray') f2.imshow(img2[0, :, :, 0], cmap='gray') f3.imshow(output, cmap='gray') plt.show() if not os.path.exists(output_path): os.makedirs(output_path) imsave(output_path + 'results_' + str(i + 1) + Format, output) del M
def update_model(self): ### 3. Perform experience replay and train the network. # note that this is only done if the replay buffer contains enough samples # for us to learn something useful -- until then, the model will not be # initialized and random actions should be taken if (self.t > self.learning_starts and \ self.t % self.learning_freq == 0 and \ self.replay_buffer.can_sample(self.batch_size)): # Here, you should perform training. Training consists of four steps: # 3.a: use the replay buffer to sample a batch of transitions (see the # replay buffer code for function definition, each batch that you sample # should consist of current observations, current actions, rewards, # next observations, and done indicator). # 3.b: initialize the model if it has not been initialized yet; to do # that, call # initialize_interdependent_variables(self.session, tf.global_variables(), { # self.obs_t_ph: obs_t_batch, # self.obs_tp1_ph: obs_tp1_batch, # }) # where obs_t_batch and obs_tp1_batch are the batches of observations at # the current and next time step. The boolean variable model_initialized # indicates whether or not the model has been initialized. # Remember that you have to update the target network too (see 3.d)! # 3.c: train the model. To do this, you'll need to use the self.train_fn and # self.total_error ops that were created earlier: self.total_error is what you # created to compute the total Bellman error in a batch, and self.train_fn # will actually perform a gradient step and update the network parameters # to reduce total_error. When calling self.session.run on these you'll need to # populate the following placeholders: # self.obs_t_ph # self.act_t_ph # self.rew_t_ph # self.obs_tp1_ph # self.done_mask_ph # (this is needed for computing self.total_error) # self.learning_rate -- you can get this from self.optimizer_spec.lr_schedule.value(t) # (this is needed by the optimizer to choose the learning rate) # 3.d: periodically update the target network by calling # self.session.run(self.update_target_fn) # you should update every target_update_freq steps, and you may find the # variable self.num_param_updates useful for this (it was initialized to 0) ##### # YOUR CODE HERE # 3a. obs_t_batch, act_batch, rew_batch, obs_tp1_batch, done_mask = self.replay_buffer.sample(self.batch_size) # 3b. if not self.model_initialized: initialize_interdependent_variables(self.session, tf.global_variables(), { self.obs_t_ph: obs_t_batch, self.obs_tp1_ph: obs_tp1_batch, }) self.session.run(self.update_target_fn) self.model_initialized = True # 3c. self.session.run(self.train_fn, feed_dict={self.obs_t_ph: obs_t_batch, self.act_t_ph: act_batch, self.rew_t_ph: rew_batch, self.obs_tp1_ph: obs_tp1_batch, self.done_mask_ph: done_mask, self.learning_rate: self.optimizer_spec.lr_schedule.value(self.t)}) # 3d. if self.t % self.target_update_freq == 0: self.session.run(self.update_target_fn) self.num_param_updates += 1 self.t += 1
def train(self): self.t_vars = tf.global_variables() self.original_weights = [ var for var in self.t_vars if ('conv_layer' in var.name and '/w' in var.name) ] self.binary_weights = [ var for var in self.t_vars if ('conv_layer' in var.name and 'binary' in var.name) ] self.scaling_factors = [ var for var in self.t_vars if 'scaling_factor' in var.name ] self.training_vars = tf.trainable_variables() self.loss_value = tf.reduce_mean( tf.abs(self.actual_output - self.output)) self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1, self.beta2, self.epsilon) tf.initialize_all_variables().run() for i in range(self.no_epochs): iterations = int(self.train_size / self.batch_size) self.initialize_matrices() print("Epoch", i + 1) for k in range(iterations): print("Batch:", k + 1) print( "Determining binarized_weights and optimal scaling factor") for j in range(1, (len(self.original_weights) + 1)): string = 'conv_layer_' + str(j) v1 = [ v for v in self.original_weights if string in v.name ][0] v2 = [v for v in self.binary_weights if string in v.name][0] scales = [ v for v in self.scaling_factors if string in v.name ][0] v1 = v1.eval(session=sess) sh = v1.shape v3 = np.zeros((sh)) v4 = np.zeros((sh[3])) for l in range(sh[3]): for x in range(sh[0]): for y in range(sh[1]): for z in range(sh[2]): v3[x][y][z][l] = self.discretize_function( v1[x][y][z][l]) v4[l] = self.L1_norm( self.element_wise_mult(self.D[string][:, :, :, l], v1[:, :, :, l])) v4[l] = (v4[l] * 1.0) / (self.L1_norm( self.D[string][:, :, :, l])) v2.assign(v3).eval() scales.assign(v4).eval() print("Training................") (images, output) = self.generate_data() vals = self.sess.run( self.optimizer.compute_gradients( self.loss_value, var_list=self.training_vars), feed_dict={ self.actual_output: output, self.input_: images }) # Here vals is a list of (gradient,variable) pairs indices = [] for j in range(len(vals)): if (len(vals[j][1].shape) >= 4): indices.append(j) for j in range(len(self.original_weights)): gradient = vals[indices[j]][0] weight = self.original_weights[j].eval() #print (gradient.shape,weight.shape) string = 'conv_layer_' + str(j + 1) self.m1[string] = self.beta1 * self.m1[string] + ( 1.0 - self.beta1) * gradient self.m2[string] = self.beta2 * self.m2[string] + ( 1.0 - self.beta2) * (self.element_wise_mult( gradient, gradient)) m1_unbiased = ((self.m1[string] * 1.0) / (1.0 - self.beta1)) m2_unbiased = ((self.m2[string] * 1.0) / (1.0 - self.beta2)) #print(m2_unbiased) #abc = input() self.D[string] = (1.0 / self.learning_rate) * ( self.epsilon + np.sqrt(m2_unbiased)) #self.D[string] = np.nan_to_num(self.D[string]) #print(self.D[string]) #abc = input() weight = weight - (np.divide(m1_unbiased, self.D[string])) self.original_weights[j].assign(weight).eval() weight = self.weights_FCLayer1.eval() gradient = [x[0] for x in vals if (len(x[0].shape) == 2)][0] weight = weight - (self.learning_rate * gradient) self.weights_FCLayer1.assign(weight).eval() weight = self.weights_FCLayer2.eval() gradient = [x[0] for x in vals if (len(x[0].shape) == 2)][1] weight = weight - (self.learning_rate * gradient) self.weights_FCLayer2.assign(weight).eval() weight = self.weights_FCLayer.eval() gradient = [x[0] for x in vals if (len(x[0].shape) == 2)][2] weight = weight - (self.learning_rate * gradient) self.weights_FCLayer.assign(weight).eval() self.learning_rate = self.update_learning_rate( self.learning_rate, i)
def build_graph(self): # Reset previous graph. reset_graph() # Placeholders. x_source = tf.placeholder(tf.int32, shape=[None, None], name="x_source") source_seq_length = tf.placeholder(tf.int32, shape=[None], name="source_seq_length") x_target = tf.placeholder(tf.int32, shape=[None, None], name="x_target") target_seq_length = tf.placeholder(tf.int32, shape=[None], name="target_seq_length") labels = tf.placeholder(tf.float32, shape=[None], name="labels") input_dropout = tf.placeholder_with_default(1.0, shape=[], name="input_dropout") output_dropout = tf.placeholder_with_default(1.0, shape=[], name="output_dropout") decision_threshold = tf.placeholder_with_default( 0.5, shape=[], name="decision_threshold") # Embedding layer. with tf.variable_scope("embeddings"): if self.config.source_embeddings_path is not None and self.config.target_embeddings_path is not None: source_pretrained_embeddings,\ target_pretrained_embeddings = get_pretrained_embeddings( source_embeddings_path, target_embeddings_path, source_vocab, target_vocab) assert source_pretrained_embeddings.shape[ 1] == target_pretrained_embeddings.shape[1] self.config.embedding_size = source_pretrained_embeddings.shape[ 1] if self.config.fix_pretrained: source_embeddings = tf.get_variable( name="source_embeddings_matrix", shape=[ self.config.source_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( source_pretrained_embeddings), trainable=False) target_embeddings = tf.get_variable( name="target_embeddings_matrix", shape=[ self.config.target_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( target_pretrained_embeddings), trainable=False) else: source_embeddings = tf.get_variable( name="source_embeddings_matrix", shape=[ self.config.source_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( source_pretrained_embeddings)) target_embeddings = tf.get_variable( name="target_embeddings_matrix", shape=[ self.config.target_vocab_size, self.config.embedding_size ], initializer=tf.constant_initializer( target_pretrained_embeddings)) else: source_embeddings = tf.get_variable( name="source_embeddings_matrix", shape=[ self.config.source_vocab_size, self.config.embedding_size ]) target_embeddings = tf.get_variable( name="target_embeddings_matrix", shape=[ self.config.target_vocab_size, self.config.embedding_size ]) source_rnn_inputs = tf.nn.embedding_lookup(source_embeddings, x_source) target_rnn_inputs = tf.nn.embedding_lookup(target_embeddings, x_target) source_rnn_inputs = tf.nn.dropout(source_rnn_inputs, keep_prob=input_dropout, name="source_seq_embeddings") target_rnn_inputs = tf.nn.dropout(target_rnn_inputs, keep_prob=input_dropout, name="target_seq_embeddings") # BiRNN encoder. with tf.variable_scope("birnn") as scope: if self.config.use_lstm: cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) else: cell_fw = tf.nn.rnn_cell.GRUCell(self.config.state_size) cell_bw = tf.nn.rnn_cell.GRUCell(self.config.state_size) cell_fw = tf.nn.rnn_cell.DropoutWrapper( cell_fw, output_keep_prob=output_dropout) cell_bw = tf.nn.rnn_cell.DropoutWrapper( cell_bw, output_keep_prob=output_dropout) if self.config.num_layers > 1: if self.config.use_lstm: cell_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) for _ in range(self.config.num_layers) ]) cell_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.LSTMCell(self.config.state_size, use_peepholes=True) for _ in range(self.config.num_layers) ]) else: cell_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.GRUCell(self.config.state_size) for _ in range(self.config.num_layers) ]) cell_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.nn.rnn_cell.GRUCell(self.config.state_size) for _ in range(self.config.num_layers) ]) with tf.variable_scope(scope): source_rnn_outputs, source_final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=source_rnn_inputs, sequence_length=source_seq_length, dtype=tf.float32) with tf.variable_scope(scope, reuse=True): target_rnn_outputs, target_final_state = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=target_rnn_inputs, sequence_length=target_seq_length, dtype=tf.float32) self.config.state_size *= 2 # Mean and max pooling only work for 1 layer BiRNN. if self.config.use_mean_pooling: source_final_state = self.average_pooling( source_rnn_outputs, source_seq_length) target_final_state = self.average_pooling( target_rnn_outputs, target_seq_length) elif self.config.use_max_pooling: source_final_state = self.max_pooling(source_rnn_outputs) target_final_state = self.max_pooling(target_rnn_outputs) else: source_final_state_fw, source_final_state_bw = source_final_state target_final_state_fw, target_final_state_bw = target_final_state if self.config.num_layers > 1: source_final_state_fw = source_final_state_fw[-1] source_final_state_bw = source_final_state_bw[-1] target_final_state_fw = target_final_state_fw[-1] target_final_state_bw = target_final_state_bw[-1] if self.config.use_lstm: source_final_state_fw = source_final_state_fw.h source_final_state_bw = source_final_state_bw.h target_final_state_fw = target_final_state_fw.h target_final_state_bw = target_final_state_bw.h source_final_state = tf.concat( [source_final_state_fw, source_final_state_bw], axis=1) target_final_state = tf.concat( [target_final_state_fw, target_final_state_bw], axis=1) # Feed-forward neural network. with tf.variable_scope("feed_forward"): h_multiply = tf.multiply(source_final_state, target_final_state) h_abs_diff = tf.abs( tf.subtract(source_final_state, target_final_state)) W_1 = tf.get_variable( name="W_1", shape=[self.config.state_size, self.config.hidden_size]) W_2 = tf.get_variable( name="W_2", shape=[self.config.state_size, self.config.hidden_size]) b_1 = tf.get_variable(name="b_1", shape=[self.config.hidden_size], initializer=tf.constant_initializer(0.0)) h_semantic = tf.tanh( tf.matmul(h_multiply, W_1) + tf.matmul(h_abs_diff, W_2) + b_1) W_3 = tf.get_variable(name="W_3", shape=[self.config.hidden_size, 1]) b_2 = tf.get_variable(name="b_2", shape=[1], initializer=tf.constant_initializer(0.0)) logits = tf.matmul(h_semantic, W_3) + b_2 logits = tf.squeeze(logits, name="logits") # Sigmoid output layer. with tf.name_scope("output"): probs = tf.sigmoid(logits, name="probs") predicted_class = tf.cast(tf.greater(probs, decision_threshold), tf.float32, name="predicted_class") # Loss. with tf.name_scope("cross_entropy"): losses = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=labels, name="cross_entropy_per_sequence") mean_loss = tf.reduce_mean(losses, name="cross_entropy_loss") # Optimization. with tf.name_scope("optimization"): global_step = tf.Variable(initial_value=0, trainable=False, name="global_step") optimizer = tf.train.AdamOptimizer(self.config.learning_rate) trainable_variables = tf.trainable_variables() gradients = tf.gradients(mean_loss, trainable_variables, name="gradients") clipped_gradients, global_norm = tf.clip_by_global_norm( gradients, clip_norm=self.config.max_gradient_norm, name="clipped_gradients") train_op = optimizer.apply_gradients(zip(clipped_gradients, trainable_variables), global_step=global_step) # Evaluation metrics. accuracy = tf.metrics.accuracy(labels, predicted_class, name="accuracy") precision = tf.metrics.precision(labels, predicted_class, name="precision") recall = tf.metrics.recall(labels, predicted_class, name="recall") # Add summaries. tf.summary.scalar("loss", mean_loss) tf.summary.scalar("global_norm", global_norm) tf.summary.scalar("accuracy", accuracy[0]) tf.summary.scalar("precision", precision[0]) tf.summary.scalar("recall", recall[0]) tf.summary.scalar("logits" + "/sparsity", tf.nn.zero_fraction(logits)) tf.summary.histogram("logits" + "/activations", logits) tf.summary.histogram("probs", probs) # Add histogram for trainable variables. for var in trainable_variables: tf.summary.histogram(var.op.name, var) # Add histogram for gradients. for grad, var in zip(clipped_gradients, trainable_variables): if grad is not None: tf.summary.histogram(var.op.name + "/gradients", grad) # Assign placeholders and operations. self.x_source = x_source self.x_target = x_target self.source_seq_length = source_seq_length self.target_seq_length = target_seq_length self.labels = labels self.input_dropout = input_dropout self.output_dropout = output_dropout self.decision_threshold = decision_threshold self.train_op = train_op self.probs = probs self.predicted_class = predicted_class self.mean_loss = mean_loss self.accuracy = accuracy self.precision = precision self.recall = recall self.summaries = tf.summary.merge_all() self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
def main(_): tf.reset_default_graph() # Import data cifar = cf.cifar10(batchSize=FLAGS.batch_size, downloadDir=FLAGS.data_dir) with tf.variable_scope('inputs'): # Create the model x = tf.placeholder(tf.float32, [None, FLAGS.img_width * FLAGS.img_height * FLAGS.img_channels]) # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, FLAGS.num_classes]) # Variable to state whether training or testing testFlag = tf.placeholder(tf.uint8, [1]) # Build the graph for the deep net y_conv, img_summary = deepnn(x,testFlag) # Define your loss function - softmax_cross_entropy with tf.variable_scope("x_entropy"): cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) # Define your AdamOptimiser, using FLAGS.learning_rate to minimixe the loss function # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy) batch_number = tf.Variable(0, trainable=False) our_learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, batch_number, 1000, 0.8) optimizer = tf.train.AdamOptimizer(our_learning_rate).minimize(cross_entropy, global_step=batch_number) # calculate the prediction and the accuracy correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(y_conv,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss_summary = tf.summary.scalar('Loss', cross_entropy) acc_summary = tf.summary.scalar('Accuracy', accuracy) # summaries for TensorBoard visualisation validation_summary = tf.summary.merge([img_summary, acc_summary]) training_summary = tf.summary.merge([img_summary, loss_summary]) test_summary = tf.summary.merge([img_summary, acc_summary]) # saver for checkpoints saver = tf.train.Saver(tf.global_variables(), max_to_keep=1) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: summary_writer = tf.summary.FileWriter(run_log_dir + '_train', sess.graph, flush_secs=5) summary_writer_validation = tf.summary.FileWriter(run_log_dir + '_validate', sess.graph, flush_secs=5) sess.run(tf.global_variables_initializer()) # Training and validation for step in range(FLAGS.max_steps): # Training: Backpropagation using train set (trainImages, trainLabels) = cifar.getTrainBatch() (testImages, testLabels) = cifar.getTestBatch() _, summary_str = sess.run([optimizer, training_summary], feed_dict={x: trainImages, testFlag:[0], y_: trainLabels}) if step % (FLAGS.log_frequency + 1)== 0: summary_writer.add_summary(summary_str, step) # Validation: Monitoring accuracy using validation set if step % FLAGS.log_frequency == 0: validation_accuracy, summary_str = sess.run([accuracy, validation_summary], feed_dict={x: testImages,testFlag:[1], y_: testLabels}) print('step %d, accuracy on validation batch: %g' % (step, validation_accuracy)) summary_writer_validation.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % FLAGS.save_model == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(run_log_dir + '_train', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # Testing # resetting the internal batch indexes cifar.reset() evaluated_images = 0 test_accuracy = 0 batch_count = 0 # don't loop back when we reach the end of the test set while evaluated_images != cifar.nTestSamples: (testImages, testLabels) = cifar.getTestBatch(allowSmallerBatches=True) test_accuracy_temp, _ = sess.run([accuracy, test_summary], feed_dict={x: testImages,testFlag:[1], y_: testLabels}) batch_count = batch_count + 1 test_accuracy = test_accuracy + test_accuracy_temp evaluated_images = evaluated_images + testLabels.shape[0] test_accuracy = test_accuracy / batch_count print('test set: accuracy on test set: %0.3f' % test_accuracy)
max_gradient=10.0, # sampler arguments sampler=None, batch_size=FLAGS.batch_size, global_step=global_step, is_learn_q=FLAGS.is_learn_q) config = tf.ConfigProto(gpu_options=tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_fraction, allow_growth=True), allow_soft_placement=True, log_device_placement=False) os.mkdir(FLAGS.savedir) restore_var_list = [] if not FLAGS.is_learn_q: for var in tf.global_variables(): print "var_name: ", var.name if 'Adam' in var.name or 'optimizers/beta1_power' in var.name \ or 'optimizers/beta2_power' in var.name\ or var.name == 'global_step:0': pass else: restore_var_list.append(var) else: for var in tf.global_variables(): print "var_name: ", var.name if 'Adam' in var.name or 'optimizers/beta1_power' in var.name \ or 'optimizers/beta2_power' in var.name\ or 'q_logits' in var.name\ or var.name == 'global_step:0': pass
def train(self): config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: with tf.device("/gpu:%d" % cfg.GPU_ID): counter = self.build_model(sess) # changed by lihang liu # saver = tf.train.Saver(tf.all_variables(), # keep_checkpoint_every_n_hours=2) saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=2) # summary_op = tf.merge_all_summaries() # change by lihang liu # summary_writer = tf.train.SummaryWriter(self.log_dir, # sess.graph) summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) keys = ["d_loss", "g_loss"] log_vars = [] log_keys = [] for k, v in self.log_vars: if k in keys: log_vars.append(v) log_keys.append(k) # print(k, v) generator_lr = cfg.TRAIN.GENERATOR_LR discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR num_embedding = cfg.TRAIN.NUM_EMBEDDING lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH number_example = self.dataset.train._num_examples updates_per_epoch = int(number_example / self.batch_size) epoch_start = int(counter / updates_per_epoch) for epoch in range(epoch_start, self.max_epoch): widgets = ["epoch #%d|" % epoch, Percentage(), Bar(), ETA()] pbar = ProgressBar(maxval=updates_per_epoch, widgets=widgets) pbar.start() if epoch % lr_decay_step == 0 and epoch != 0: generator_lr *= 0.5 discriminator_lr *= 0.5 all_log_vals = [] for i in range(updates_per_epoch): pbar.update(i) # training d images, wrong_images, embeddings, _, _ =\ self.dataset.train.next_batch(self.batch_size, num_embedding) feed_dict = {self.images: images, self.wrong_images: wrong_images, self.embeddings: embeddings, self.generator_lr: generator_lr, self.discriminator_lr: discriminator_lr } # train d feed_out = [self.discriminator_trainer, self.d_sum, self.hist_sum, log_vars] _, d_sum, hist_sum, log_vals = sess.run(feed_out, feed_dict) summary_writer.add_summary(d_sum, counter) summary_writer.add_summary(hist_sum, counter) all_log_vals.append(log_vals) # train g feed_out = [self.generator_trainer, self.g_sum] _, g_sum = sess.run(feed_out, feed_dict) summary_writer.add_summary(g_sum, counter) # save checkpoint counter += 1 if counter % self.snapshot_interval == 0: snapshot_path = "%s/%s_%s.ckpt" %\ (self.checkpoint_dir, self.exp_name, str(counter)) fn = saver.save(sess, snapshot_path) print("Model saved in file: %s" % fn) img_sum = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY) summary_writer.add_summary(img_sum, counter) avg_log_vals = np.mean(np.array(all_log_vals), axis=0) dic_logs = {} for k, v in zip(log_keys, avg_log_vals): dic_logs[k] = v # print(k, v) log_line = "; ".join("%s: %s" % (str(k), str(dic_logs[k])) for k in dic_logs) print("Epoch %d | " % (epoch) + log_line) sys.stdout.flush() if np.any(np.isnan(avg_log_vals)): raise ValueError("NaN detected!")
def build_model(self): self.g_net = Generator( max_seq_length=self.data.tags_idx.shape[1], vocab_size=self.vocab_size, embedding_size=self.FLAGS.embedding_dim, hidden_size=self.FLAGS.hidden, img_row=self.img_row, img_col=self.img_col) self.d_net = Discriminator( max_seq_length=self.data.tags_idx.shape[1], vocab_size=self.vocab_size, embedding_size=self.FLAGS.embedding_dim, hidden_size=self.FLAGS.hidden, img_row=self.img_row, img_col=self.img_col) self.seq = tf.placeholder(tf.float32, [None, len(self.data.eyes_idx)+len(self.data.hair_idx)], name="seq") self.img = tf.placeholder(tf.float32, [None, self.img_row, self.img_col, 3], name="img") self.z = tf.placeholder(tf.float32, [None, self.FLAGS.z_dim]) self.w_seq = tf.placeholder(tf.float32, [None, len(self.data.eyes_idx)+len(self.data.hair_idx)], name="w_seq") self.w_img = tf.placeholder(tf.float32, [None, self.img_row, self.img_col, 3], name="w_img") r_img, r_seq = self.img, self.seq self.f_img = self.g_net(r_seq, self.z) self.sampler = tf.identity(self.g_net(r_seq, self.z, reuse=True, train=False), name='sampler') # TODO """ r img, r text -> 1 f img, r text -> 0 r img, w text -> 0 w img, r text -> 0 """ self.d = self.d_net(r_seq, r_img, reuse=False) # r img, r text self.d_1 = self.d_net(r_seq, self.f_img) # f img, r text self.d_2 = self.d_net(self.w_seq, self.img) # r img, w text self.d_3 = self.d_net(r_seq, self.w_img) # w img, r text # epsilon = tf.random_uniform([], 0.0, 1.0) # img_hat = epsilon * r_img + (1 - epsilon) * self.f_img # d_hat = self.d_net(r_seq, img_hat) # ddx = tf.gradients(d_hat, img_hat)[0] # ddx = tf.reshape(ddx, [-1, self.img_row * self.img_col * 3]) # ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=1)) # ddx = tf.reduce_mean(tf.square(ddx - 1.0) * self.alpha) # self.g_loss = -tf.reduce_mean(self.d_1) # self.d_loss = tf.reduce_mean(self.d) - (tf.reduce_mean(self.d_1)+tf.reduce_mean(self.d_2)+tf.reduce_mean(self.d_3))/3. # self.d_loss = -(self.d_loss - ddx) # dcgan self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_1, labels=tf.ones_like(self.d_1))) self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d, labels=tf.ones_like(self.d))) \ + (tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_1, labels=tf.zeros_like(self.d_1))) + \ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_2, labels=tf.zeros_like(self.d_2))) +\ tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_3, labels=tf.zeros_like(self.d_3))) ) / 3 self.global_step = tf.Variable(0, name='g_global_step', trainable=False) with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self.d_updates = tf.train.AdamOptimizer(self.FLAGS.lr, beta1=0.5, beta2=0.9).minimize(self.d_loss, var_list=self.d_net.vars) self.g_updates = tf.train.AdamOptimizer(self.FLAGS.lr, beta1=0.5, beta2=0.9).minimize(self.g_loss, var_list=self.g_net.vars, global_step=self.global_step) self.sess.run(tf.global_variables_initializer()) self.saver = tf.train.Saver(tf.global_variables())
""" Tensorflow提供的持久化Api中有恢复时重命名的功能 这样的功能对使用滑动平均的模型有很大便利性,因为tensorflow中 实现滑动平均使用了影子变量,通过恢复重命名的机制可以方便的将 影子变量映射到当前模型中来 本脚本给出一个简单的样例 """ import tensorflow as tf # step = tf.Variable(0,dtype=tf.float32) v = tf.Variable(0, dtype=tf.float32, name="v") # 在没有声明滑动平均模型的时候只有一个变量v # 输出v:0 for var in tf.global_variables(): print(var.name) # 加上ema模型 ema = tf.train.ExponentialMovingAverage(0.99) maintain_averages_op = ema.apply(tf.global_variables()) # 声明ema模型后,再打印 # 输出v:0 # v/ExponentialMovingAverage:0 for var in tf.global_variables(): print(var.name) saver = tf.train.Saver() with tf.Session() as sess: init_op = tf.global_variables_initializer() sess.run(init_op)