Exemplo n.º 1
0
    def _add_saving_op():
        """
        Define a tensorflow operation to save or restore the network

        :return: a tensorflow tf.train.Saver operation
        """
        # Define an op to save or restore the network

        # Only save needed tensors :
        #   - weight and biais from the input layer, the output layer
        #   - weight and biais from the LSTM (which are named kernel and bias respectively)
        #   - currents global_step and learning_rate

        for var in tf.global_variables():
            logging.debug("TF variable : %s - %s", var.name, var)

        save_list = [var for var in tf.global_variables()
                     if (var.name.find('/input_w:0') != -1) or (var.name.find('/input_b:0') != -1) or
                        (var.name.find('/output_w:0') != -1) or (var.name.find('/output_b:0') != -1) or
                        (var.name.find('global_step:0') != -1) or (var.name.find('learning_rate:0') != -1) or
                        (var.name.find('/kernel:0') != -1) or (var.name.find('/bias:0') != -1)]
        if len(save_list) == 0:
            raise ValueError("Trying to define the saving operation before the RNN is built")

        saver_op = tf.train.Saver(save_list)
        return saver_op
Exemplo n.º 2
0
def freeze_session(session, keep_var_names=None, output_names=None, clear_devices=True):
    """
    Freezes the state of a session into a prunned computation graph.

    Creates a new computation graph where variable nodes are replaced by
    constants taking their current value in the session. The new graph will be
    prunned so subgraphs that are not neccesary to compute the requested
    outputs are removed.
    @param session The TensorFlow session to be frozen.
    @param keep_var_names A list of variable names that should not be frozen,
                          or None to freeze all the variables in the graph.
    @param output_names Names of the relevant graph outputs.
    @param clear_devices Remove the device directives from the graph for better portability.
    @return The frozen graph definition.
    """
    from tensorflow.python.framework.graph_util import convert_variables_to_constants
    graph = session.graph
    with graph.as_default():
        freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(keep_var_names or []))
        output_names = output_names or []
        output_names += [v.op.name for v in tf.global_variables()]
        input_graph_def = graph.as_graph_def()
        if clear_devices:
            for node in input_graph_def.node:
                node.device = ""
        frozen_graph = convert_variables_to_constants(session, input_graph_def,
                                                      output_names, freeze_var_names)
        return frozen_graph
Exemplo n.º 3
0
def get_model_params(variable_prefix, split_lstm_matrices=True):
  if variable_prefix:
    exclude = [ variable_prefix+"/Variable", variable_prefix+"/Variable_1" ]
    tmp = { v.op.name: v.eval() for v in tf.global_variables() if (v.op.name.startswith(variable_prefix) and v.op.name not in exclude) }
  else:
    exclude = [ "Variable", "Variable_1" ]
    tmp = { v.op.name: v.eval() for v in tf.global_variables() if v.op.name not in exclude }
  # Rename keys
  params = {name.replace("/", "-"): param for name, param in tmp.items()}
  if split_lstm_matrices:
    for name in params.keys():
      if "LSTMCell" in name:
        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        if "Matrix" in name:
          i, j, f, o = array_ops.split(1, 4, params[name])
        elif "Bias" in name:
          i, j, f, o = array_ops.split(0, 4, params[name])
        else:
          logging.error("Unknown tensor type..")
          exit(1)
        name_i = name.replace("LSTMCell", "LSTMCell-i")
        name_j = name.replace("LSTMCell", "LSTMCell-j")
        name_f = name.replace("LSTMCell", "LSTMCell-f")
        name_o = name.replace("LSTMCell", "LSTMCell-o")
        params[name_i] = i.eval()
        params[name_j] = j.eval()
        params[name_f] = f.eval()
        params[name_o] = o.eval()
        del params[name]
      elif "AttnV" in name:
        params[name] = array_ops.reshape(params[name], [ params[name].shape[0], 1 ]).eval()
      elif "AttnW" in name:
        # remove dims of size 1
        params[name] = tf.squeeze(params[name]).eval()
  return params
Exemplo n.º 4
0
  def testBatchNorm(self, module):
    model = module(output_channels=self.output_channels,
                   kernel_shapes=self.kernel_shapes,
                   strides=self.strides,
                   paddings=self.paddings,
                   use_batch_norm=True)
    self.assertTrue(model.use_batch_norm)
    input_to_net = tf.placeholder(tf.float32, shape=(1, 100, 100, 3))

    # Check Tensorflow flags work
    is_training = tf.placeholder(tf.bool)
    test_local_stats = tf.placeholder(tf.bool)

    model(input_to_net,
          is_training=is_training,
          test_local_stats=test_local_stats)

    # Check Python is_training flag works
    model(input_to_net, is_training=False, test_local_stats=False)

    model_variables = model.get_variables()

    self.assertEqual(
        len(model_variables),
        len(self.output_channels) * 3 - 1)

    # Check that the appropriate moving statistics variables have been created.
    self.assertTrue(
        any("moving_variance" in var.name
            for var in tf.global_variables()))
    self.assertTrue(
        any("moving_mean" in var.name
            for var in tf.global_variables()))
Exemplo n.º 5
0
 def add_saver(self):
   """Adds a Saver for all variables in the graph."""
   logging.info('Generating op to save variables:\n\t%s',
                '\n\t'.join([x.name for x in tf.global_variables()]))
   self.saver = tf.train.Saver(
       var_list=[x for x in tf.global_variables()],
       write_version=saver_pb2.SaverDef.V1)
Exemplo n.º 6
0
def load_vggish_slim_checkpoint(session, checkpoint_path):
  """Loads a pre-trained VGGish-compatible checkpoint.

  This function can be used as an initialization function (referred to as
  init_fn in TensorFlow documentation) which is called in a Session after
  initializating all variables. When used as an init_fn, this will load
  a pre-trained checkpoint that is compatible with the VGGish model
  definition. Only variables defined by VGGish will be loaded.

  Args:
    session: an active TensorFlow session.
    checkpoint_path: path to a file containing a checkpoint that is
      compatible with the VGGish model definition.
  """
  # Get the list of names of all VGGish variables that exist in
  # the checkpoint (i.e., all inference-mode VGGish variables).
  with tf.Graph().as_default():
    define_vggish_slim(training=False)
    vggish_var_names = [v.name for v in tf.global_variables()]

  # Get the list of all currently existing variables that match
  # the list of variable names we just computed.
  vggish_vars = [v for v in tf.global_variables() if v.name in vggish_var_names]

  # Use a Saver to restore just the variables selected above.
  saver = tf.train.Saver(vggish_vars, name='vggish_load_pretrained')
  saver.restore(session, checkpoint_path)
Exemplo n.º 7
0
 def add_saver(self):
   """Adds a Saver for all variables in the graph."""
   logging.info('Saving non-quantized variables:\n\t%s', '\n\t'.join(
       [x.name for x in tf.global_variables() if 'quantized' not in x.name]))
   self.saver = tf.train.Saver(
       var_list=[
           x for x in tf.global_variables() if 'quantized' not in x.name
       ],
       write_version=saver_pb2.SaverDef.V1)
Exemplo n.º 8
0
def train(hparams, event_dir=None, model_dir=None,
          restore_agent=True, epoch=0):
  """Train."""
  with tf.name_scope("rl_train"):
    train_summary_op, _, initialization = define_train(hparams, event_dir)
    if event_dir:
      summary_writer = tf.summary.FileWriter(
          event_dir, graph=tf.get_default_graph(), flush_secs=60)
    if model_dir:
      model_saver = tf.train.Saver(
          tf.global_variables(".*network_parameters.*"))
    else:
      summary_writer = None
      model_saver = None

    # TODO(piotrmilos): This should be refactored, possibly with
    # handlers for each type of env
    if hparams.environment_spec.simulated_env:
      env_model_loader = tf.train.Saver(
          tf.global_variables("next_frame*"))
    else:
      env_model_loader = None

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      initialization(sess)
      if env_model_loader:
        trainer_lib.restore_checkpoint(
            hparams.world_model_dir, env_model_loader, sess, must_restore=True)
      start_step = 0
      if model_saver and restore_agent:
        start_step = trainer_lib.restore_checkpoint(
            model_dir, model_saver, sess)

      # Fail-friendly, don't train if already trained for this epoch
      if start_step >= ((hparams.epochs_num * (epoch + 1))):
        tf.logging.info("Skipping PPO training for epoch %d as train steps "
                        "(%d) already reached", epoch, start_step)
        return

      for epoch_index in range(hparams.epochs_num):
        summary = sess.run(train_summary_op)
        if summary_writer:
          summary_writer.add_summary(summary, epoch_index)
        if (hparams.eval_every_epochs and
            epoch_index % hparams.eval_every_epochs == 0):
          if summary_writer and summary:
            summary_writer.add_summary(summary, epoch_index)
          else:
            tf.logging.info("Eval summary not saved")
        if (model_saver and hparams.save_models_every_epochs and
            (epoch_index % hparams.save_models_every_epochs == 0 or
             (epoch_index + 1) == hparams.epochs_num)):
          ckpt_path = os.path.join(
              model_dir, "model.ckpt-{}".format(epoch_index + 1 + start_step))
          model_saver.save(sess, ckpt_path)
Exemplo n.º 9
0
 def _create_initializers(self):
   if self._var_count != len(tf.global_variables()):
     save_dir = os.path.dirname(self._save_path) if self._save_path else None
     if save_dir and not tf.gfile.IsDirectory(save_dir):
       tf.gfile.MakeDirs(save_dir)
     self._saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
     self._init = tf.global_variables_initializer()
     self._local_init = tf.local_variables_initializer()
     self._check_inited = tf.assert_variables_initialized()
     self._var_count = len(tf.global_variables())
     if self._summary_writer:
       self._summaries = tf.summary.merge_all()
       self._summary_writer.add_graph(tf.get_default_graph())
Exemplo n.º 10
0
  def testBatchNormScale(self):
    height, width = 299, 299
    num_classes = 1000
    inputs = tf.placeholder(tf.float32, (1, height, width, 3))
    with tf.contrib.slim.arg_scope(
        inception.inception_resnet_v2_arg_scope(batch_norm_scale=True)):
      inception.inception_resnet_v2(inputs, num_classes, is_training=False)

    gamma_names = set(
        v.op.name for v in tf.global_variables('.*/BatchNorm/gamma:0$'))
    self.assertGreater(len(gamma_names), 0)
    for v in tf.global_variables('.*/BatchNorm/moving_mean:0$'):
      self.assertIn(v.op.name[:-len('moving_mean')] + 'gamma', gamma_names)
Exemplo n.º 11
0
    def get_train_op(self,
                     loss,
                     learning_rate,
                     optimizer=None,
                     clip_norm=None,
                     learnable_scopes=None,
                     optimizer_scope_name=None):
        """ Get train operation for given loss

        Args:
            loss: loss, tf tensor or scalar
            learning_rate: scalar or placeholder
            clip_norm: clip gradients norm by clip_norm
            learnable_scopes: which scopes are trainable (None for all)
            optimizer: instance of tf.train.Optimizer, default Adam

        Returns:
            train_op
        """
        if optimizer_scope_name is None:
            opt_scope = tf.variable_scope('Optimizer')
        else:
            opt_scope = tf.variable_scope(optimizer_scope_name)
        with opt_scope:
            if learnable_scopes is None:
                variables_to_train = tf.global_variables()
            else:
                variables_to_train = []
                for scope_name in learnable_scopes:
                    for var in tf.global_variables():
                        if scope_name in var.name:
                            variables_to_train.append(var)

            if optimizer is None:
                optimizer = tf.train.AdamOptimizer

            # For batch norm it is necessary to update running averages
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(extra_update_ops):

                def clip_if_not_none(grad):
                    if grad is not None:
                        return tf.clip_by_norm(grad, clip_norm)

                opt = optimizer(learning_rate)
                grads_and_vars = opt.compute_gradients(loss, var_list=variables_to_train)
                if clip_norm is not None:
                    grads_and_vars = [(clip_if_not_none(grad), var)
                                      for grad, var in grads_and_vars]
                train_op = opt.apply_gradients(grads_and_vars)
        return train_op
Exemplo n.º 12
0
    def assign_weight(self):
        '''
        Encapsulate unit-class pruning and multi-class pruning print("PRUNE FOR CLASS", self.target_class_id)
        '''
        print("assign weights......")
        maskDict = []
        if (self.multiPruning == True and len(self.target_class_id) > 1):
            maskDict = self.mask_class_multi_by_value()
        else:
            maskDict = self.mask_unit_by_value(self.target_class_id[0])

        for tmpLayer in maskDict:
            if (tmpLayer["name"][0] == "C"): # if the layer is convolutional layer
                with self.graph.as_default():
                    layerNum = tmpLayer["name"].strip("Conv")
                    name = "Conv" + layerNum + "/composite_function/kernel:0"
                    for var in tf.global_variables():
                        if var.name == name:
                            tmpWeights = self.sess.run(var)
                            tmpMask = np.array(tmpLayer["shape"])

                            tmpWeights[:,:,:, tmpMask == 0] = 0
                            assign = tf.assign(var, tmpWeights)
                            self.sess.run(assign)
    
                            # print(self.sess.run(self.graph.get_tensor_by_name(name))==0)
            if (tmpLayer["name"][0] == "F"): # if the layer is fully connected
                with self.graph.as_default():
                    layerNum = tmpLayer["name"].strip("FC")
                    name_W = "FC" + layerNum + "/W:0"
                    name_bias = "FC" + layerNum + "/bias:0"
                    for var in tf.global_variables():
                        if var.name == name_W:
                            tmpWeights = self.sess.run(var)
                            tmpMask = np.array(tmpLayer["shape"])

                            tmpWeights[:, tmpMask == 0] = 0
                            assign = tf.assign(var, tmpWeights)
                            self.sess.run(assign)

                            # print(self.sess.run(self.graph.get_tensor_by_name(name_W))==0)
                        if var.name == name_bias:
                            tmpBias = self.sess.run(var)
                            tmpMask = np.array(tmpLayer["shape"])

                            tmpBias[tmpMask == 0] = 0
                            assign = tf.assign(var, tmpBias)
                            self.sess.run(assign)
                            # print(self.sess.run(self.graph.get_tensor_by_name(name_bias))==0)
        print("assign finished!")
        '''
Exemplo n.º 13
0
def optimize(loss, learning_rate, hparams, use_tpu=False):
  """Minimize loss."""
  loss = weight_decay_and_noise(loss, hparams, learning_rate)
  loss = tf.identity(loss, name="total_loss")
  # Print trainable variables.
  log_variable_sizes(verbose=hparams.summarize_vars)
  # Print non-trainable variables.
  non_trainable_variables = list(
      set(tf.global_variables()) - set(tf.trainable_variables()))
  log_variable_sizes(non_trainable_variables, tag="Non-trainable variables",
                     verbose=hparams.summarize_vars)
  if hparams.summarize_vars:
    summarize_variables()
    # Summarize non-trainable variables as well
    summarize_variables(non_trainable_variables, tag="Non-trainable variables")
  diet_vars = [
      v for v in tf.global_variables() if v.dtype == dtypes.float16_ref
  ]
  log_variable_sizes(
      diet_vars, "Diet Variables", verbose=hparams.summarize_vars)
  opt = ConditionalOptimizer(hparams.optimizer, learning_rate, hparams, use_tpu)
  if use_tpu:
    opt = tf.contrib.tpu.CrossShardOptimizer(opt)

  opt_summaries = []
  if common_layers.should_generate_summaries():
    tf.summary.scalar("learning_rate", learning_rate)
    opt_summaries.append("loss")
    if hparams.summarize_grads:
      tf.logging.info("Summarizing gradients")
      opt_summaries.extend(
          ["gradients", "gradient_norm", "global_gradient_norm"])

  if hparams.clip_grad_norm:
    tf.logging.info("Clipping gradients, norm: %0.5f", hparams.clip_grad_norm)
  if hparams.grad_noise_scale:
    tf.logging.info("Adding noise to gradients, noise scale: %0.5f",
                    hparams.grad_noise_scale)

  train_op = tf.contrib.layers.optimize_loss(
      name="training",
      loss=loss,
      global_step=tf.train.get_or_create_global_step(),
      learning_rate=learning_rate,
      clip_gradients=hparams.clip_grad_norm or None,
      gradient_noise_scale=hparams.grad_noise_scale or None,
      optimizer=opt,
      summaries=opt_summaries,
      colocate_gradients_with_ops=True)
  return train_op
Exemplo n.º 14
0
def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=[], global_step=None, printable=False):
    """Save parameters into ckpt file.

    Parameters
    ------------
    sess : Session.
    mode_name : string, name of the model, default is ``model.ckpt``.
    save_dir : string, path / file directory to the ckpt, default is ``checkpoint``.
    var_list : list of variables, if not given, save all global variables.
    global_step : int or None, step number.
    printable : bool, if True, print all params info.

    Examples
    ---------
    - see ``tl.files.load_ckpt()``.
    """
    assert sess is not None
    ckpt_file = os.path.join(save_dir, mode_name)
    if var_list == []:
        var_list = tf.global_variables()

    print("[*] save %s n_params: %d" % (ckpt_file, len(var_list)))

    if printable:
        for idx, v in enumerate(var_list):
            print("  param {:3}: {:15}   {}".format(idx, v.name, str(v.get_shape())))

    saver = tf.train.Saver(var_list)
    saver.save(sess, ckpt_file, global_step=global_step)
def get_global_variable_by_name(name):
	"""Returns the global variable of given name.

	name : the name of the global variable
	"""
	# return [v for v in tf.variables() if v.name == name][0]
	return [v for v in tf.global_variables() if v.name == name][0]
Exemplo n.º 16
0
 def testNotInLocalVariables(self):
   with self.test_session():
     with tf.variable_scope('A'):
       a = tf.contrib.framework.model_variable('a', [5])
       self.assertTrue(a in tf.global_variables())
       self.assertTrue(a in tf.get_collection(tf.GraphKeys.MODEL_VARIABLES))
       self.assertFalse(a in tf.local_variables())
Exemplo n.º 17
0
def train_speech_to_text_network():
    logit = speech_to_text_network()

    # CTC loss
    indices = tf.where(tf.not_equal(tf.cast(Y, tf.float32), 0.))
    target = tf.SparseTensor(indices=indices, values=tf.gather_nd(Y, indices) - 1, shape=tf.cast(tf.shape(Y), tf.int64))
    loss = tf.nn.ctc_loss(logit, target, sequence_len, time_major=False)
    # optimizer
    lr = tf.Variable(0.001, dtype=tf.float32, trainable=False)
    optimizer = MaxPropOptimizer(learning_rate=lr, beta2=0.99)
    var_list = [t for t in tf.trainable_variables()]
    gradient = optimizer.compute_gradients(loss, var_list=var_list)
    optimizer_op = optimizer.apply_gradients(gradient)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver(tf.global_variables())

        for epoch in range(16):
            sess.run(tf.assign(lr, 0.001 * (0.97 ** epoch)))

            global pointer
            pointer = 0
            for batch in range(n_batch):
                batches_wavs, batches_labels = get_next_batches(batch_size)
                train_loss, _ = sess.run([loss, optimizer_op], feed_dict={X: batches_wavs, Y: batches_labels})
                print(epoch, batch, train_loss)
            if epoch % 5 == 0:
                saver.save(sess, 'speech.module', global_step=epoch)
Exemplo n.º 18
0
	def train(self, data=0, steps=-1, dropout=None, display_step=10, test_step=200, batch_size=10,
	          do_resume=False):  # epochs=-1,
		if data: self.data = data
		steps = 9999999 if steps == -1 else steps
		session = self.session
		# with tf.device(_cpu):

		# import tensorflow.contrib.layers as layers
		# t = tf.verify_tensor_all_finite(t, msg)
		tf.add_check_numerics_ops()
		try:
			self.summaries = tf.summary.merge_all()
		except:
			self.summaries = tf.merge_all_summaries()
		try:
			self.summary_writer = tf.summary.FileWriter(current_logdir(), session.graph)  #
		except:
			self.summary_writer = tf.train.SummaryWriter(current_logdir(), session.graph)  #
		if not dropout: dropout = 1.  # keep all
		x = self.x
		y = self.y
		keep_prob = self.keep_prob
		try:
			saver = tf.train.Saver(tf.global_variables())
		except:
			saver = tf.train.Saver(tf.all_variables())
		snapshot = self.name + str(get_last_tensorboard_run_nr())
		checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
		if do_resume and checkpoint:
			print("LOADING " + checkpoint + " !!!")
			saver.restore(session, checkpoint)
		try:
			session.run([tf.global_variables_initializer()])
		except:
			session.run([tf.initialize_all_variables()])
		step = 0  # show first
		while step < steps:
			batch_xs, batch_ys = self.next_batch(batch_size, session)
			# print("step %d \r" % step)# end=' ')

			# tf.train.shuffle_batch_join(example_list, batch_size, capacity=min_queue_size + batch_size * 16, min_queue_size)
			# Fit training using batch data
			feed_dict = {x: batch_xs, y: batch_ys, keep_prob: dropout, self.train_phase: True}
			loss, _ = session.run([self.cost, self.optimizer], feed_dict=feed_dict)
			if step % display_step == 0:
				seconds = int(time.time()) - start
				# Calculate batch accuracy, loss
				feed = {x: batch_xs, y: batch_ys, keep_prob: 1., self.train_phase: False}
				acc, summary = session.run([self.accuracy, self.summaries], feed_dict=feed)
				# self.summary_writer.add_summary(summary, step) # only test summaries for smoother curve
				print("\rStep {:d} Loss= {:.6f} Accuracy= {:.3f} Time= {:d}s".format(step, loss, acc, seconds), end=' ')
				if str(loss) == "nan": return print("\nLoss gradiant explosion, exiting!!!")  # restore!
			if step % test_step == 0: self.test(step)
			if step % save_step == 0 and step > 0:
				print("SAVING snapshot %s" % snapshot)
				saver.save(session, checkpoint_dir + snapshot + ".ckpt", self.global_step)

			step += 1
		print("\nOptimization Finished!")
		self.test(step, number=10000)  # final test
Exemplo n.º 19
0
    def _match_vars(self, func):
        reader, chkpt_vars = SaverRestore._read_checkpoint_vars(self.path)
        graph_vars = tf.global_variables()
        chkpt_vars_used = set()

        mismatch = MismatchLogger('graph', 'checkpoint')
        for v in graph_vars:
            name = get_savename_from_varname(v.name, varname_prefix=self.prefix)
            if name in self.ignore and reader.has_tensor(name):
                logger.info("Variable {} in the graph will not be loaded from the checkpoint!".format(name))
            else:
                if reader.has_tensor(name):
                    func(reader, name, v)
                    chkpt_vars_used.add(name)
                else:
                    vname = v.op.name
                    if not is_training_name(vname):
                        mismatch.add(vname)
        mismatch.log()
        mismatch = MismatchLogger('checkpoint', 'graph')
        if len(chkpt_vars_used) < len(chkpt_vars):
            unused = chkpt_vars - chkpt_vars_used
            for name in sorted(unused):
                if not is_training_name(name):
                    mismatch.add(name)
        mismatch.log()
Exemplo n.º 20
0
  def initialize(self, sess):
    # Initial file lists are empty
    np_paths = []
    ss_paths = []
    # Fresh train directly from ImageNet weights
    print('Loading initial model weights from {:s}'.format(self.pretrained_model))
    variables = tf.global_variables()
    # Initialize all variables first
    sess.run(tf.variables_initializer(variables, name='init'))
    var_keep_dic = self.get_variables_in_checkpoint_file(self.pretrained_model)
    # Get the variables to restore, ignoring the variables to fix
    variables_to_restore = self.net.get_variables_to_restore(variables, var_keep_dic)

    restorer = tf.train.Saver(variables_to_restore)
    restorer.restore(sess, self.pretrained_model)
    print('Loaded.')
    # Need to fix the variables before loading, so that the RGB weights are changed to BGR
    # For VGG16 it also changes the convolutional weights fc6 and fc7 to
    # fully connected weights
    self.net.fix_variables(sess, self.pretrained_model)
    print('Fixed.')
    last_snapshot_iter = 0
    rate = cfg.TRAIN.LEARNING_RATE
    stepsizes = list(cfg.TRAIN.STEPSIZE)

    return rate, last_snapshot_iter, stepsizes, np_paths, ss_paths
Exemplo n.º 21
0
 def getLoadVars(self):
     v = tf.global_variables()
     if(self.resLoad):
         v = [var for var in v if (("class_weight" in var.name) or ("class_bias" in var.name) or ("conv1" in var.name)) and ("Adam" not in var.name)]
     else:
         v = [var for var in v if ("Adam" not in var.name)]
     return v
Exemplo n.º 22
0
  def testTrainWithSummary(self):
    with tf.Graph().as_default():
      images = tf.placeholder(tf.float32, image_shape(None), name='images')
      labels = tf.placeholder(tf.float32, [None, 1000], name='labels')

      tf.train.get_or_create_global_step()
      logdir = tempfile.mkdtemp()
      with tf.contrib.summary.always_record_summaries():
        with tf.contrib.summary.create_file_writer(
            logdir, max_queue=0,
            name='t0').as_default():
          model = resnet50.ResNet50(data_format())
          logits = model(images, training=True)
          loss = tf.losses.softmax_cross_entropy(
              logits=logits, onehot_labels=labels)
          tf.contrib.summary.scalar(name='loss', tensor=loss)
          optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
          train_op = optimizer.minimize(loss)

      init = tf.global_variables_initializer()
      self.assertEqual(321, len(tf.global_variables()))

      batch_size = 32
      with tf.Session() as sess:
        sess.run(init)
        sess.run(tf.contrib.summary.summary_writer_initializer_op())
        np_images, np_labels = random_batch(batch_size)
        sess.run([train_op, tf.contrib.summary.all_summary_ops()],
                 feed_dict={images: np_images, labels: np_labels})

      events = summary_test_util.events_from_logdir(logdir)
      self.assertEqual(len(events), 2)
      self.assertEqual(events[1].summary.value[0].tag, 'loss')
Exemplo n.º 23
0
  def load_decode_model(self):
    """Load G2P model and initialize or load parameters in session."""
    if not os.path.exists(os.path.join(self.model_dir, 'checkpoint')):
      raise RuntimeError("Model not found in %s" % self.model_dir)

    self.batch_size = 1 # We decode one word at a time.
    #Load model parameters.
    num_layers, size = data_utils.load_params(self.model_dir)
    # Load vocabularies
    print("Loading vocabularies from %s" % self.model_dir)
    self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.grapheme"))
    self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.phoneme"))

    self.rev_ph_vocab =\
      data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                 reverse=True)

    self.session = tf.Session()

    # Restore model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers, 0,
                                            self.batch_size, 0, 0,
                                            forward_only=True)
    self.model.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    # Check for saved models and restore them.
    print("Reading model parameters from %s" % self.model_dir)
    self.model.saver.restore(self.session, os.path.join(self.model_dir,
                                                        "model"))
Exemplo n.º 24
0
    def load(self, dir_name, epoch=0, name=None):
        """save model to dir

        Parameters
        ----------
        dir_name: str
            name of the directory
        epoch: int
        """
        if name is None or name == self.name:  # the name of saved model is the same as ours
            dir_name = os.path.join(dir_name, self.name)
            model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name)
            saver = tf.train.Saver(model_vars)
            saver.restore(self.sess, os.path.join(dir_name, (self.subclass_name + "_%d") % epoch))
        else:  # load a checkpoint with different name
            backup_graph = tf.get_default_graph()
            kv_dict = {}

            # load checkpoint from another saved graph
            with tf.Graph().as_default(), tf.Session() as sess:
                tf.train.import_meta_graph(os.path.join(dir_name, name, (self.subclass_name + "_%d") % epoch + ".meta"))
                dir_name = os.path.join(dir_name, name)
                model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name)
                sess.run(tf.global_variables_initializer())
                saver = tf.train.Saver(model_vars)
                saver.restore(sess, os.path.join(dir_name, (self.subclass_name + "_%d") % epoch))
                for item in tf.global_variables():
                    kv_dict[item.name] = sess.run(item)

            # assign to now graph
            backup_graph.as_default()
            model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, self.name)
            for item in model_vars:
                old_name = item.name.replace(self.name, name)
                self.sess.run(tf.assign(item, kv_dict[old_name]))
Exemplo n.º 25
0
def evaluate_checkpoint(tt='test', checkpoint=None, output_file=None, output_file_interp=None):
    """
    Evaluate model on specific checkpoint
    :param tt: 'train', 'test'
    :param checkpoint: path to checkpoint
    :param output_file: If not None, the output will write to this path,
    :return:
    """
    # Import data
    data_set = KspaceDataSet(base_dir, file_names.values(), stack_size=50, shuffle=False, data_base=FLAGS.database)

    net = load_graph()

    # Create a saver and keep all checkpoints
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=None)
    # saver = tf.train.import_meta_graph('%s.meta' % checkpoint)
    sess = tf.Session()
    saver.restore(sess, checkpoint)
    # saver.restore(sess, tf.train.latest_checkpoint(checkpoint))

    data_set_tt = getattr(data_set, tt)

    all_acc = []
    predict_counter = 0
    if output_file is None:
        # Create output directories
        output_file = os.path.join(os.path.abspath(os.path.join(checkpoint, os.pardir)), 'predict_subset', tt)
        os.makedirs(output_file)

    if output_file is not None:
        f_out_real = open(os.path.join(output_file, "000000.predict_real.bin"), 'w')
        f_out_imag = open(os.path.join(output_file, "000000.predict_imag.bin"), 'w')

    gen_loss_adversarial = 1.0

    print("Evaluate Model using checkpoint: %s, data=%s" % (checkpoint, tt))
    while data_set_tt.epoch == 0:
            # Running over all data until epoch > 0
            feed = feed_data(data_set, net.labels, net.train_phase,
                             tt=tt, batch_size=FLAGS.mini_batch_size)
            if feed is not None:
                feed[net.adv_loss_w] = gen_loss_adversarial
                predict, result = sess.run([net.predict_g, net.evaluation], feed_dict=feed)

                all_acc.append(np.array(result))
                print('Time: %s , Accuracy for mini_batch is: %s' % (datetime.datetime.now(), result))
                if output_file is not None:
                    f_out_real.write(predict['real'].ravel())
                    f_out_imag.write(predict['imag'].ravel())
            else:
                break
            predict_counter += FLAGS.mini_batch_size
            print("Done - " + str(predict_counter))
            if predict_counter >= FLAGS.max_predict:
                break

    if output_file is not None:
        f_out_real.close()
        f_out_imag.close()
    print("Total accuracy is: %f" % np.array(all_acc).mean())
Exemplo n.º 26
0
def train(data, model):
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables())
        n = 0
        for epoch in range(epochs):
            sess.run(tf.assign(model.learning_rate, 0.002 * (0.97 ** epoch)))
            pointer = 0
            for batche in range(data.n_size):
                n += 1
                feed_dict = {model.x_tf: data.x_batches[pointer], model.y_tf: data.y_batches[pointer]}
                pointer += 1
                train_loss, _, _ = sess.run([model.cost, model.final_state, model.train_op], feed_dict=feed_dict)
                sys.stdout.write('\r')
                info = "{}/{} (epoch {}) | train_loss {:.3f}" \
                    .format(epoch * data.n_size + batche,
                            epochs * data.n_size, epoch, train_loss)
                sys.stdout.write(info)
                sys.stdout.flush()
                # save
                if (epoch * data.n_size + batche) % 1000 == 0 \
                        or (epoch == epochs-1 and batche == data.n_size-1):
                    checkpoint_path = os.path.join(save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=n)
                    sys.stdout.write('\n')
                    print("model saved to {}".format(checkpoint_path))
            sys.stdout.write('\n')
Exemplo n.º 27
0
 def get_post_init_ops():
     """
     Copy values of variables on GPU 0 to other GPUs.
     """
     # literally all variables, because it's better to sync optimizer-internal variables as well
     all_vars = tf.global_variables() + tf.local_variables()
     var_by_name = dict([(v.name, v) for v in all_vars])
     post_init_ops = []
     for v in all_vars:
         if not v.name.startswith('tower'):
             continue
         if v.name.startswith('tower0'):
             logger.warn("[SyncMultiGPUReplicatedBuilder] variable "
                         "{} has prefix 'tower0', this is unexpected.".format(v.name))
             continue        # TODO some vars (EMA) may still startswith tower0
         # in this trainer, the master name doesn't have the towerx/ prefix
         split_name = v.name.split('/')
         prefix = split_name[0]
         realname = '/'.join(split_name[1:])
         if prefix in realname:
             logger.error("[SyncMultiGPUReplicatedBuilder] variable "
                          "{} has its prefix {} appears multiple times in its name!".format(v.name, prefix))
         copy_from = var_by_name.get(realname)
         assert copy_from is not None, var_by_name.keys()
         post_init_ops.append(v.assign(copy_from.read_value()))
     logger.info(
         "'sync_variables_from_main_tower' includes {} operations.".format(len(post_init_ops)))
     return tf.group(*post_init_ops, name='sync_variables_from_main_tower')
Exemplo n.º 28
0
def evaluate():
    """ Build evaluation graph and run. """
    with tf.Graph().as_default():
        with tf.variable_scope('cnn'):
            m = model.Model(FLAGS, is_train=False)
        saver = tf.train.Saver(tf.global_variables())

        # read test files
        if FLAGS.train_data:
            loader = text_input.DataLoader(os.path.join(FLAGS.data_dir, 'train.cPickle'), batch_size=FLAGS.batch_size)
        else:
            loader = text_input.DataLoader(os.path.join(FLAGS.data_dir, 'test.cPickle'), batch_size=FLAGS.batch_size)
        print 'Start evaluation, %d batches needed, with %d examples per batch.' % (loader.num_batch, FLAGS.batch_size)

        true_count = 0
        avg_loss = 0

        with tf.Session() as sess:
            ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                raise IOError("Loading checkpoint file failed!")

            for _ in range(loader.num_batch):
                x, y = loader.next_batch()
                true_count_value, loss_value = sess.run([m.true_count_op, m.total_loss], 
                    feed_dict={m.inputs:x, m.labels:y})
                true_count += true_count_value
                avg_loss += loss_value

            accuracy = float(true_count) / (loader.num_batch * FLAGS.batch_size)
            avg_loss = float(avg_loss) / loader.num_batch
            print '%s: test_loss = %.6f, test_accuracy = %.3f' % (datetime.now(), avg_loss, accuracy)
Exemplo n.º 29
0
  def testNest(self, getter1, getter2):
    with tf.variable_scope("scope1", custom_getter=getter1):
      with tf.variable_scope("scope2", custom_getter=getter2):
        tf.get_variable("w", [10, 10], tf.float32)

    self.assertEqual(1, len(tf.global_variables()))
    self.assertEqual(0, len(tf.trainable_variables()))
Exemplo n.º 30
0
  def restore_map(self,
                  from_detection_checkpoint=True,
                  load_all_detection_checkpoint_vars=False):
    """Returns a map of variables to load from a foreign checkpoint.

    See parent class for details.

    Args:
      from_detection_checkpoint: whether to restore from a full detection
        checkpoint (with compatible variable names) or to restore from a
        classification checkpoint for initialization prior to training.
      load_all_detection_checkpoint_vars: whether to load all variables (when
         `from_detection_checkpoint` is True). If False, only variables within
         the appropriate scopes are included. Default False.

    Returns:
      A dict mapping variable names (to load from a checkpoint) to variables in
      the model graph.
    """
    variables_to_restore = {}
    for variable in tf.global_variables():
      var_name = variable.op.name
      if from_detection_checkpoint and load_all_detection_checkpoint_vars:
        variables_to_restore[var_name] = variable
      else:
        if var_name.startswith(self._extract_features_scope):
          if not from_detection_checkpoint:
            var_name = (
                re.split('^' + self._extract_features_scope + '/',
                         var_name)[-1])
          variables_to_restore[var_name] = variable

    return variables_to_restore
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--output', default='output', type=str)
    parser.add_argument('--data-directory', default='./datasets/examples/201906120600_201906121200_person/', type=str)
    parser.add_argument('--data-list-path', default='./datasets/examples/list/val.txt', type=str)
    parser.add_argument('--restore-from', default='./checkpoint/JPPNet-s2', type=str)
    parser.add_argument('--inres', default='384,384', type=str)
    parser.add_argument('--num-steps', default=10, type=int)
    parser.add_argument('--num-classes', default=20, type=int)
    args, _ = parser.parse_known_args()
    args.inres = tuple(int(x) for x in args.inres.split(','))
    
    if not os.path.exists(args.output):
        os.makedirs(args.output)

    """Create the model and start the evaluation process."""
    
    # Create queue coordinator.
    coord = tf.train.Coordinator()
    h, w = args.inres
    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_directory, args.data_list_path, None, False, False, coord)
        image = reader.image
        image_rev = tf.reverse(image, tf.stack([1]))
        image_list = reader.image_list

    image_batch_origin = tf.stack([image, image_rev])
    image_batch = tf.image.resize_images(image_batch_origin, [int(h), int(w)])
    image_batch075 = tf.image.resize_images(image_batch_origin, [int(h * 0.75), int(w * 0.75)])
    image_batch125 = tf.image.resize_images(image_batch_origin, [int(h * 1.25), int(w * 1.25)])
    
    # Create network.
    with tf.variable_scope('', reuse=False):
        net_100 = JPPNetModel({'data': image_batch}, is_training=False, n_classes=args.num_classes)
    with tf.variable_scope('', reuse=True):
        net_075 = JPPNetModel({'data': image_batch075}, is_training=False, n_classes=args.num_classes)
    with tf.variable_scope('', reuse=True):
        net_125 = JPPNetModel({'data': image_batch125}, is_training=False, n_classes=args.num_classes)

    
    # parsing net
    parsing_fea1_100 = net_100.layers['res5d_branch2b_parsing']
    parsing_fea1_075 = net_075.layers['res5d_branch2b_parsing']
    parsing_fea1_125 = net_125.layers['res5d_branch2b_parsing']

    parsing_out1_100 = net_100.layers['fc1_human']
    parsing_out1_075 = net_075.layers['fc1_human']
    parsing_out1_125 = net_125.layers['fc1_human']

    # pose net
    resnet_fea_100 = net_100.layers['res4b22_relu']
    resnet_fea_075 = net_075.layers['res4b22_relu']
    resnet_fea_125 = net_125.layers['res4b22_relu']

    with tf.variable_scope('', reuse=False):
        pose_out1_100, pose_fea1_100 = pose_net(resnet_fea_100, 'fc1_pose')
        pose_out2_100, pose_fea2_100 = pose_refine(pose_out1_100, parsing_out1_100, pose_fea1_100, name='fc2_pose')
        parsing_out2_100, parsing_fea2_100 = parsing_refine(parsing_out1_100, pose_out1_100, parsing_fea1_100, name='fc2_parsing')
        parsing_out3_100, parsing_fea3_100 = parsing_refine(parsing_out2_100, pose_out2_100, parsing_fea2_100, name='fc3_parsing')

    with tf.variable_scope('', reuse=True):
        pose_out1_075, pose_fea1_075 = pose_net(resnet_fea_075, 'fc1_pose')
        pose_out2_075, pose_fea2_075 = pose_refine(pose_out1_075, parsing_out1_075, pose_fea1_075, name='fc2_pose')
        parsing_out2_075, parsing_fea2_075 = parsing_refine(parsing_out1_075, pose_out1_075, parsing_fea1_075, name='fc2_parsing')
        parsing_out3_075, parsing_fea3_075 = parsing_refine(parsing_out2_075, pose_out2_075, parsing_fea2_075, name='fc3_parsing')

    with tf.variable_scope('', reuse=True):
        pose_out1_125, pose_fea1_125 = pose_net(resnet_fea_125, 'fc1_pose')
        pose_out2_125, pose_fea2_125 = pose_refine(pose_out1_125, parsing_out1_125, pose_fea1_125, name='fc2_pose')
        parsing_out2_125, parsing_fea2_125 = parsing_refine(parsing_out1_125, pose_out1_125, parsing_fea1_125, name='fc2_parsing')
        parsing_out3_125, parsing_fea3_125 = parsing_refine(parsing_out2_125, pose_out2_125, parsing_fea2_125, name='fc3_parsing')


    parsing_out1 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out1_100, tf.shape(image_batch_origin)[1:3,]),
                                           tf.image.resize_images(parsing_out1_075, tf.shape(image_batch_origin)[1:3,]),
                                           tf.image.resize_images(parsing_out1_125, tf.shape(image_batch_origin)[1:3,])]), axis=0)
    parsing_out2 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out2_100, tf.shape(image_batch_origin)[1:3,]),
                                           tf.image.resize_images(parsing_out2_075, tf.shape(image_batch_origin)[1:3,]),
                                           tf.image.resize_images(parsing_out2_125, tf.shape(image_batch_origin)[1:3,])]), axis=0)
    parsing_out3 = tf.reduce_mean(tf.stack([tf.image.resize_images(parsing_out3_100, tf.shape(image_batch_origin)[1:3,]),
                                           tf.image.resize_images(parsing_out3_075, tf.shape(image_batch_origin)[1:3,]),
                                           tf.image.resize_images(parsing_out3_125, tf.shape(image_batch_origin)[1:3,])]), axis=0)

    raw_output = tf.reduce_mean(tf.stack([parsing_out1, parsing_out2, parsing_out3]), axis=0)
    head_output, tail_output = tf.unstack(raw_output, num=2, axis=0)
    tail_list = tf.unstack(tail_output, num=20, axis=2)
    tail_list_rev = [None] * 20
    for xx in range(14):
        tail_list_rev[xx] = tail_list[xx]
    tail_list_rev[14] = tail_list[15]
    tail_list_rev[15] = tail_list[14]
    tail_list_rev[16] = tail_list[17]
    tail_list_rev[17] = tail_list[16]
    tail_list_rev[18] = tail_list[19]
    tail_list_rev[19] = tail_list[18]
    tail_output_rev = tf.stack(tail_list_rev, axis=2)
    tail_output_rev = tf.reverse(tail_output_rev, tf.stack([1]))

    
    raw_output_all = tf.reduce_mean(tf.stack([head_output, tail_output_rev]), axis=0)
    raw_output_all = tf.expand_dims(raw_output_all, dim=0)
    raw_output_all = tf.argmax(raw_output_all, dimension=3)
    pred_all = tf.expand_dims(raw_output_all, dim=3) # Create 4-d tensor.

    # Which variables to load.
    restore_var = tf.global_variables()
    # Set up tf session and initialize variables. 
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()
    
    sess.run(init)
    sess.run(tf.local_variables_initializer())
    
    # Load weights.
    loader = tf.train.Saver(var_list=restore_var)
    if args.restore_from is not None:
        if load(loader, sess, args.restore_from):
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")
    
    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)


    # Iterate over training steps.
    for step in tqdm(range(args.num_steps)):
        parsing_ = sess.run(pred_all)
        if step % 100 == 0:
            print('step {:d}'.format(step))
            print (image_list[step])
        img_split = image_list[step].split('/')
        img_id = img_split[-1][:-4]

        msk = decode_labels(parsing_, num_classes=args.num_classes)
        parsing_im = Image.fromarray(msk[0])
        parsing_im.save('{}/{}_vis.png'.format(args.output, img_id))
        cv2.imwrite('{}/{}.png'.format(args.output, img_id), parsing_[0,:,:,0])

    coord.request_stop()
    coord.join(threads)
Exemplo n.º 32
0
    def __init__(self):
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes = len(self.classes)
        self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
        self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END
        self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
        self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
        self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS
        self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT
        self.time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                  time.localtime(time.time()))
        self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY
        self.max_bbox_per_scale = 150
        self.train_logdir = "./data/log/train"
        self.trainset = Dataset('train')
        self.testset = Dataset('test')
        self.steps_per_period = len(self.trainset)
        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))

        with tf.name_scope('define_input'):
            self.input_data = tf.placeholder(dtype=tf.float32,
                                             name='input_data')
            self.label_sbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_sbbox')
            self.label_mbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_mbbox')
            self.label_lbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_lbbox')
            self.true_sbboxes = tf.placeholder(dtype=tf.float32,
                                               name='sbboxes')
            self.true_mbboxes = tf.placeholder(dtype=tf.float32,
                                               name='mbboxes')
            self.true_lbboxes = tf.placeholder(dtype=tf.float32,
                                               name='lbboxes')
            self.trainable = tf.placeholder(dtype=tf.bool, name='training')

        with tf.name_scope("define_loss"):
            self.model = YOLOV3(self.input_data, self.trainable)
            self.net_var = tf.global_variables()
            self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss(
                self.label_sbbox, self.label_mbbox, self.label_lbbox,
                self.true_sbboxes, self.true_mbboxes, self.true_lbboxes)
            self.loss = self.giou_loss + self.conf_loss + self.prob_loss

        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0,
                                           dtype=tf.float64,
                                           trainable=False,
                                           name='global_step')
            warmup_steps = tf.constant(self.warmup_periods *
                                       self.steps_per_period,
                                       dtype=tf.float64,
                                       name='warmup_steps')
            train_steps = tf.constant(
                (self.first_stage_epochs + self.second_stage_epochs) *
                self.steps_per_period,
                dtype=tf.float64,
                name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.
                learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 *
                (self.learn_rate_init - self.learn_rate_end) * (1 + tf.cos(
                    (self.global_step - warmup_steps) /
                    (train_steps - warmup_steps) * np.pi)))
            global_step_update = tf.assign_add(self.global_step, 1.0)

        with tf.name_scope("define_weight_decay"):
            moving_ave = tf.train.ExponentialMovingAverage(
                self.moving_ave_decay).apply(tf.trainable_variables())

        with tf.name_scope("define_first_stage_train"):
            self.first_stage_trainable_var_list = []
            for var in tf.trainable_variables():
                var_name = var.op.name
                var_name_mess = str(var_name).split('/')
                if var_name_mess[0] in [
                        'conv_sbbox', 'conv_mbbox', 'conv_lbbox'
                ]:
                    self.first_stage_trainable_var_list.append(var)

            first_stage_optimizer = tf.train.AdamOptimizer(
                self.learn_rate).minimize(
                    self.loss, var_list=self.first_stage_trainable_var_list)
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies(
                    [first_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_frozen_variables = tf.no_op()

        with tf.name_scope("define_second_stage_train"):
            second_stage_trainable_var_list = tf.trainable_variables()
            second_stage_optimizer = tf.train.AdamOptimizer(
                self.learn_rate).minimize(
                    self.loss, var_list=second_stage_trainable_var_list)

            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies(
                    [second_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_all_variables = tf.no_op()

        with tf.name_scope('loader_and_saver'):
            self.loader = tf.train.Saver(self.net_var)
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

        with tf.name_scope('summary'):
            tf.summary.scalar("learn_rate", self.learn_rate)
            tf.summary.scalar("giou_loss", self.giou_loss)
            tf.summary.scalar("conf_loss", self.conf_loss)
            tf.summary.scalar("prob_loss", self.prob_loss)
            tf.summary.scalar("total_loss", self.loss)

            logdir = "./data/log/"
            if os.path.exists(logdir): shutil.rmtree(logdir)
            os.mkdir(logdir)
            self.write_op = tf.summary.merge_all()
            self.summary_writer = tf.summary.FileWriter(logdir,
                                                        graph=self.sess.graph)
Exemplo n.º 33
0
def main(_):
    tic = time.time()
    print('tensorflow version:', tf.__version__)
    tf.logging.set_verbosity(tf.logging.INFO)
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')
    # init
    net_name_scope_pruned = FLAGS.net_name_scope_pruned
    net_name_scope_checkpoint = FLAGS.net_name_scope_checkpoint
    indexed_prune_scopes_for_units = valid_indexed_prune_scopes_for_units
    kept_percentages = sorted(map(float, FLAGS.kept_percentages.split(',')))

    num_options = len(kept_percentages)
    num_units = len(indexed_prune_scopes_for_units)
    print('num_options=%d, num_blocks=%d' % (num_options, num_units))
    print('HG: total number of configurations=%d' % (num_options**num_units))

    # find the  configurations to evaluate
    if FLAGS.configuration_type == 'sample':
        configs = get_sampled_configurations(num_units, num_options,
                                             FLAGS.total_num_configurations)
    elif FLAGS.configuration_type == 'special':
        configs = get_special_configurations(num_units, num_options)
    num_configurations = len(configs)

    #Getting MPI rank integer
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    if rank >= num_configurations:
        print("ERROR: rank(%d) > num_configurations(%d)" %
              (rank, num_configurations))
        return
    FLAGS.configuration_index = FLAGS.start_configuration_index + rank
    config = configs[FLAGS.configuration_index]
    print('HG: kept_percentages=%s, start_config_index=%d, num_configs=%d, rank=%d, config_index=%d' \
           %(str(kept_percentages), FLAGS.start_configuration_index, num_configurations, rank, FLAGS.configuration_index))

    # prepare for training with the specific config
    indexed_prune_scopes, kept_percentage = config_to_indexed_prune_scopes(
        config, indexed_prune_scopes_for_units, kept_percentages)
    prune_info = indexed_prune_scopes_to_prune_info(indexed_prune_scopes,
                                                    kept_percentage)

    # prepare file system
    results_dir = os.path.join(
        FLAGS.train_dir, 'id' +
        str(FLAGS.configuration_index))  #+'_'+str(FLAGS.max_number_of_steps))
    train_dir = os.path.join(results_dir, 'train')

    if (not FLAGS.continue_training) or (
            not tf.train.latest_checkpoint(train_dir)):
        prune_scopes = indexed_prune_scopes_to_prune_scopes(
            indexed_prune_scopes, net_name_scope_checkpoint)
        shorten_scopes = indexed_prune_scopes_to_shorten_scopes(
            indexed_prune_scopes, net_name_scope_checkpoint)
        variables_init_value = get_init_values_for_pruned_layers(
            prune_scopes, shorten_scopes, kept_percentage)
        reinit_scopes = [
            re.sub(net_name_scope_checkpoint, net_name_scope_pruned, v)
            for v in prune_scopes + shorten_scopes
        ]

        prepare_file_system(train_dir)

    def write_detailed_info(info):
        with open(os.path.join(train_dir, 'train_details.txt'), 'a') as f:
            f.write(info + '\n')

    info = 'train_dir:' + train_dir + '\n'
    info += 'options:' + str(kept_percentages) + '\n'
    info += 'configuration: ' + str(config) + '\n'
    info += 'indexed_prune_scopes: ' + str(indexed_prune_scopes) + '\n'
    info += 'kept_percentage: ' + str(kept_percentage)
    print(info)
    write_detailed_info(info)

    with tf.Graph().as_default():

        #######################
        # Config model_deploy #
        #######################
        deploy_config = model_deploy.DeploymentConfig(
            num_clones=FLAGS.num_clones,
            clone_on_cpu=FLAGS.clone_on_cpu,
            replica_id=FLAGS.task,
            num_replicas=FLAGS.worker_replicas,
            num_ps_tasks=FLAGS.num_ps_tasks)

        ######################
        # Select the dataset #
        ######################
        dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                              FLAGS.train_dataset_name,
                                              FLAGS.dataset_dir)
        test_dataset = dataset_factory.get_dataset(FLAGS.dataset_name,
                                                   FLAGS.test_dataset_name,
                                                   FLAGS.dataset_dir)

        batch_queue = train_inputs(dataset, deploy_config, FLAGS)
        test_images, test_labels = test_inputs(test_dataset, deploy_config,
                                               FLAGS)
        images, labels = batch_queue.dequeue()

        ######################
        # Select the network#
        ######################

        network_fn_pruned = nets_factory.get_network_fn_pruned(
            FLAGS.model_name,
            prune_info=prune_info,
            num_classes=(dataset.num_classes - FLAGS.labels_offset),
            weight_decay=FLAGS.weight_decay)
        print('HG: prune_info:')
        pprint(prune_info)

        ####################
        # Define the model #
        ####################
        logits_train, _ = network_fn_pruned(images,
                                            is_training=True,
                                            is_local_train=False,
                                            reuse_variables=False,
                                            scope=net_name_scope_pruned)
        logits_eval, _ = network_fn_pruned(test_images,
                                           is_training=False,
                                           is_local_train=False,
                                           reuse_variables=True,
                                           scope=net_name_scope_pruned)

        cross_entropy = add_cross_entropy(logits_train, labels)
        correct_prediction = add_correct_prediction(logits_eval, test_labels)

        #############################
        # Specify the loss function #
        #############################
        tf.add_to_collection('subgraph_losses', cross_entropy)
        # get regularization loss
        regularization_losses = get_regularization_losses_within_scopes()
        print_list('regularization_losses', regularization_losses)

        # total loss and its summary
        total_loss = tf.add_n(tf.get_collection('subgraph_losses'),
                              name='total_loss')
        for l in tf.get_collection('subgraph_losses') + [total_loss]:
            tf.summary.scalar(l.op.name + '/summary', l)

        #########################################
        # Configure the optimization procedure. #
        #########################################
        with tf.device(deploy_config.variables_device()):
            global_step = tf.Variable(0, trainable=False, name='global_step')

        with tf.device(deploy_config.optimizer_device()):
            learning_rate = configure_learning_rate(dataset.num_samples,
                                                    global_step, FLAGS)
            optimizer = configure_optimizer(learning_rate, FLAGS)
            tf.summary.scalar('learning_rate', learning_rate)

        #############################
        # Add train operation       #
        #############################
        variables_to_train = get_trainable_variables_within_scopes()
        train_op = add_train_op(optimizer,
                                total_loss,
                                global_step,
                                var_list=variables_to_train)
        print_list("variables_to_train", variables_to_train)

        # Gather update_ops: the updates for the batch_norm variables created by network_fn_pruned.
        update_ops = get_update_ops_within_scopes()
        print_list("update_ops", update_ops)

        # add train_tensor
        update_ops.append(train_op)
        update_op = tf.group(*update_ops)
        with tf.control_dependencies([update_op]):
            train_tensor = tf.identity(total_loss, name='train_op')

        # add summary op
        summary_op = tf.summary.merge_all()

        print("HG: trainable_variables=", len(tf.trainable_variables()))
        print("HG: model_variables=", len(tf.model_variables()))
        print("HG: global_variables=", len(tf.global_variables()))

        sess_config = tf.ConfigProto(intra_op_parallelism_threads=16,
                                     inter_op_parallelism_threads=16)
        with tf.Session(config=sess_config) as sess:
            ###########################
            # Prepare for filewriter. #
            ###########################
            train_writer = tf.summary.FileWriter(train_dir, sess.graph)

            # if restart the training or there is no checkpoint in the train_dir
            if (not FLAGS.continue_training) or (
                    not tf.train.latest_checkpoint(train_dir)):
                #########################################
                # Reinit  pruned model variable  #
                #########################################
                variables_to_reinit = get_model_variables_within_scopes(
                    reinit_scopes)
                print_list("Initialize pruned variables", variables_to_reinit)
                assign_ops = []
                for v in variables_to_reinit:
                    key = re.sub(net_name_scope_pruned,
                                 net_name_scope_checkpoint, v.op.name)
                    if key in variables_init_value:
                        value = variables_init_value.get(key)
                        # print(key, value)
                        assign_ops.append(
                            tf.assign(v,
                                      tf.convert_to_tensor(value),
                                      validate_shape=True))
                        # v.set_shape(value.shape)
                    else:
                        raise ValueError(
                            "Key not in variables_init_value, key=", key)
                assign_op = tf.group(*assign_ops)
                sess.run(assign_op)

                #################################################
                # Restore unchanged model variable. #
                #################################################
                variables_to_restore = {
                    re.sub(net_name_scope_pruned, net_name_scope_checkpoint,
                           v.op.name): v
                    for v in get_model_variables_within_scopes()
                    if v not in variables_to_reinit
                }
                print_list("restore model variables",
                           variables_to_restore.values())
                load_checkpoint(sess,
                                FLAGS.checkpoint_path,
                                var_list=variables_to_restore)

            else:
                ###########################################
                ## Restore all variables from checkpoint ##
                ###########################################
                variables_to_restore = get_global_variables_within_scopes()
                load_checkpoint(sess, train_dir, var_list=variables_to_restore)

            #################################################
            # init unitialized global variable. #
            #################################################
            variables_to_init = get_global_variables_within_scopes(
                sess.run(tf.report_uninitialized_variables()))
            print_list("init unitialized variables", variables_to_init)
            sess.run(tf.variables_initializer(variables_to_init))

            init_global_step_value = sess.run(global_step)
            print('initial global step: ', init_global_step_value)
            if init_global_step_value >= FLAGS.max_number_of_steps:
                print('Exit: init_global_step_value (%d) >= FLAG.max_number_of_steps (%d)' \
                    %(init_global_step_value, FLAGS.max_number_of_steps))
                return

            ###########################
            # Record CPU usage  #
            ###########################
            mpstat_output_filename = os.path.join(train_dir, "cpu-usage.log")
            os.system("mpstat -P ALL 1 > " + mpstat_output_filename +
                      " 2>&1 &")

            ###########################
            # Kicks off the training. #
            ###########################
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            saver = tf.train.Saver(max_to_keep=FLAGS.max_to_keep)
            print('HG: # of threads=', len(threads))

            duration = 0
            duration_cnt = 0
            train_time = 0
            train_only_cnt = 0

            print("start to train at:", datetime.now())
            for i in range(init_global_step_value,
                           FLAGS.max_number_of_steps + 1):
                # run optional meta data, or summary, while run train tensor
                #if i < FLAGS.max_number_of_steps:
                if i > init_global_step_value:
                    # train while run metadata
                    if i % FLAGS.runmeta_every_n_steps == FLAGS.runmeta_every_n_steps - 1:
                        run_options = tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE)
                        run_metadata = tf.RunMetadata()

                        loss_value = sess.run(train_tensor,
                                              options=run_options,
                                              run_metadata=run_metadata)
                        train_writer.add_run_metadata(run_metadata,
                                                      'step%d-train' % i)

                        # Create the Timeline object, and write it to a json file
                        fetched_timeline = timeline.Timeline(
                            run_metadata.step_stats)
                        chrome_trace = fetched_timeline.generate_chrome_trace_format(
                        )
                        with open(
                                os.path.join(train_dir,
                                             'timeline_' + str(i) + '.json'),
                                'w') as f:
                            f.write(chrome_trace)

                    # train while record summary
                    elif i % FLAGS.summary_every_n_steps == 0:
                        train_summary, loss_value = sess.run(
                            [summary_op, train_tensor])
                        train_writer.add_summary(train_summary, i)

                    # train only
                    else:
                        start_time = time.time()
                        loss_value = sess.run(train_tensor)
                        train_only_cnt += 1
                        train_time += time.time() - start_time
                        duration_cnt += 1
                        duration += time.time() - start_time

                    # log loss information
                    if i % FLAGS.log_every_n_steps == 0 and duration_cnt > 0:
                        log_frequency = duration_cnt
                        examples_per_sec = log_frequency * FLAGS.batch_size / duration
                        sec_per_batch = float(duration / log_frequency)
                        summary = tf.Summary()
                        summary.value.add(tag='examples_per_sec',
                                          simple_value=examples_per_sec)
                        summary.value.add(tag='sec_per_batch',
                                          simple_value=sec_per_batch)
                        train_writer.add_summary(summary, i)
                        format_str = (
                            '%s: step %d, loss = %.3f (%.1f examples/sec; %.3f sec/batch)'
                        )
                        print(format_str % (datetime.now(), i, loss_value,
                                            examples_per_sec, sec_per_batch))
                        duration = 0
                        duration_cnt = 0

                        info = format_str % (datetime.now(), i, loss_value,
                                             examples_per_sec, sec_per_batch)
                        write_detailed_info(info)
                else:
                    # run only total loss when i=0
                    train_summary, loss_value = sess.run(
                        [summary_op,
                         total_loss])  #loss_value = sess.run(total_loss)
                    train_writer.add_summary(train_summary, i)
                    format_str = ('%s: step %d, loss = %.3f')
                    print(format_str % (datetime.now(), i, loss_value))
                    info = format_str % (datetime.now(), i, loss_value)
                    write_detailed_info(info)

                # record the evaluation accuracy
                is_last_step = (i == FLAGS.max_number_of_steps)
                if i % FLAGS.evaluate_every_n_steps == 0 or is_last_step:
                    #run_meta = (i==FLAGS.evaluate_every_n_steps)
                    test_accuracy, run_metadata = evaluate_accuracy(
                        sess,
                        coord,
                        test_dataset.num_samples,
                        test_images,
                        test_labels,
                        test_images,
                        test_labels,
                        correct_prediction,
                        FLAGS.test_batch_size,
                        run_meta=False)
                    summary = tf.Summary()
                    summary.value.add(tag='accuracy',
                                      simple_value=test_accuracy)
                    train_writer.add_summary(summary, i)
                    #if run_meta:
                    #    eval_writer.add_run_metadata(run_metadata, 'step%d-eval' % i)

                    info = ('%s: step %d, test_accuracy = %.6f') % (
                        datetime.now(), i, test_accuracy)
                    print(info)
                    write_detailed_info(info)

                    ###########################
                    # Save model parameters . #
                    ###########################
                    #saver = tf.train.Saver(var_list=get_model_variables_within_scopes([net_name_scope_pruned+'/']))
                    save_path = saver.save(
                        sess, os.path.join(train_dir, 'model.ckpt-' + str(i)))
                    print("HG: Model saved in file: %s" % save_path)

            coord.request_stop()
            coord.join(threads)
            total_time = time.time() - tic
            train_speed = train_time * 1.0 / train_only_cnt
            train_time = train_speed * (
                FLAGS.max_number_of_steps
            )  # - init_global_step_value) #/train_only_cnt
            info = "HG: training speed(sec/batch): %.6f\n" % (train_speed)
            info += "HG: training time(min): %.1f, total time(min): %.1f" % (
                train_time / 60.0, total_time / 60.0)
            print(info)
            write_detailed_info(info)
Exemplo n.º 34
0
		
		# Train summaries for tensorboard
		train_summary_op = tf.summary.merge([loss_gen_summary,loss_discr_lab_summary, loss_discr_score_summary])
		train_summary_dir = os.path.join(out_dir, "summaries", "train")
		train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)
		
		# Dev summaries for tensorboard
		dev_summary_op = tf.summary.merge([loss_discr_score_summary])
		dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
		dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

		checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
		checkpoint_prefix = os.path.join(checkpoint_dir, "model")
		if not os.path.exists(checkpoint_dir):
			os.makedirs(checkpoint_dir)
		saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

		# Initialize all variables
		sess.run(tf.global_variables_initializer())

		def train_step(batch_noise, batch_img_score, batch_score, batch_img_lab, batch_lab, train_gen):
			"""
			A single training step i.e. training the score discriminator, the label discriminator and if 
			necessary the generator.
			Args:
				batch_noise: input batch for the generator
				batch_img_score: input batch for the score discriminator
				batch_img_lab: input batch for the label discriminator 
				batch_score: scores corresponding the input score images
				batch_lab: labels corresponding the input label images
				train_gen(bool): whether or not to train the generator 
Exemplo n.º 35
0
 def get_vars(self, scope):
     realScope = self.directory + "/" + scope
     nnVars = [x for x in tf.global_variables() if realScope in x.name]
     return sorted(nnVars, key=lambda v: v.name)
Exemplo n.º 36
0
def train(x_train, y_train, vocab_processor, x_dev, y_dev, embedding):
    
    # Training
    # ==================================================

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        
        if params.model_version == 'TextCNN':
            model = TextCNN(
            sequence_length=x_train.shape[1],
            num_classes=y_train.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, params.filter_sizes.split(","))),
            num_filters=params.num_filters,
            l2_reg_lambda=params.l2_reg_lambda)
        
        elif params.model_version == 'CNN_LSTM':
            model = CNN_LSTM(
            sequence_length=x_train.shape[1],
            num_classes=y_train.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, params.filter_sizes.split(","))),
            num_hidden = params.num_hidden,
            num_filters=params.num_filters,
            l2_reg_lambda=params.l2_reg_lambda)        
            
        elif params.model_version == 'LSTM_CNN':
            model = LSTM_CNN(
            sequence_length = x_train.shape[1],
            num_classes = y_train.shape[1],
            vocab_size = len(vocab_processor.vocabulary_),
            embedding_size = FLAGS.embedding_dim,
            filter_sizes= list(map(int, params.filter_sizes.split(","))),
            num_filters=params.num_filters,
            l2_reg_lambda=params.l2_reg_lambda,
            num_hidden=params.num_hidden) 
                    
        elif params.model_version == 'LSTM':      
            model = LSTM(
            sequence_length = x_train.shape[1],
            num_classes = y_train.shape[1],
            vocab_size = len(vocab_processor.vocabulary_),
            num_hidden = params.num_hidden,
            embedding_size = params.embedding_dim,
            l2_reg_lambda=params.l2_reg_lambda)
            
        elif params.model_version == 'Bi_LSTM':      
            model = Bi_LSTM(
            sequence_length = x_train.shape[1],
            num_classes = y_train.shape[1],
            vocab_size = len(vocab_processor.vocabulary_),
            num_hidden = params.num_hidden,
            embedding_size = params.embedding_dim,
            l2_reg_lambda=params.l2_reg_lambda)
 
        else:           
            raise AttributeError("No model found at model_dir") 

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        grads_and_vars = optimizer.compute_gradients(model.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp =  '{}_'.format(params.model_version) + str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        logging.critical("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", model.loss)
        acc_summary = tf.summary.scalar("accuracy", model.accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=params.num_checkpoints)

        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        init = tf.global_variables_initializer()

        sess.run(model.embedding_init, feed_dict={model.embedding_placeholder: embedding})
        sess.run(init)

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                model.input_x: x_batch,
                model.input_y: y_batch,
                model.dropout_keep_prob: params.dropout_keep_prob
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, model.loss, model.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            logging.critical("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
                model.input_x: x_batch,
                model.input_y: y_batch,
                model.dropout_keep_prob: params.dropout_keep_prob
            }
            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, model.loss, model.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            logging.critical("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)

        # Generate batches
        batches = data_helpers.batch_iter(
            list(zip(x_train, y_train)), params.batch_size, params.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % params.evaluate_every == 0:
                logging.info("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                logging.info("")
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                logging.info("Saved model checkpoint to {}\n".format(path))
Exemplo n.º 37
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 1],
                                      name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 5],
                                        name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 8],
                                        name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu *
                                         len(gpus))

        start = time.time()
        for step in range(FLAGS.max_steps):
            data = next(data_generator)
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
def run():

    # Create the data loader object.
    data_loader = utils.DataLoader(args.batch_size,
                                   dataset_dirs=args.train_dataset,
                                   is_argumentation=args.data_argumentation,
                                   target_image_size=args.target_image_size)
    [args.norm_mean,
     args.norm_std] = [data_loader.norm_mean, data_loader.norm_std]

    # Create a Tensorflow Model
    model = Model(args)

    # Initialize a TensorFlow session
    with tf.Session() as sess:
        # Add all the variables to the list of variables to be saved
        saver = tf.train.Saver(tf.global_variables())

        # Initialize all the variables in the graph
        sess.run(tf.global_variables_initializer())
        '''Summaries'''
        train_writer = tf.summary.FileWriter(
            os.path.join(args.model_dir, 'log'), sess.graph)
        tf.summary.scalar('total_loss', model.total_loss)
        tf.summary.scalar('trans_loss', model.trans_loss)
        tf.summary.scalar('rot_loss', model.rot_loss)
        tf.summary.scalar('rot_loss0', model.rot_loss0)
        tf.summary.scalar('lamda_sigmoid', model.lamda_weights_sigmoid)
        all_summaries = tf.summary.merge_all()

        if os.path.isfile(os.path.join(args.model_dir, 'config.pkl')):
            '''Train from saved model'''
            # Get the checkpoint state to load the model from
            ckpt_file = os.path.join(args.model_dir, 'model-53.ckpt-48000')
            print('loading model: ', ckpt_file)
            saver = tf.train.Saver()
            # Restore the model at the checpoint
            saver.restore(sess, ckpt_file)
            print('model restored.')

        # Assign the global step
        sess.run(tf.assign(model.global_step, 0))
        '''Training Loop'''
        for e in range(args.num_epochs):
            # Assign the learning rate (decayed acc. to the epoch number)
            sess.run(
                tf.assign(
                    model.lr,
                    max(args.learning_rate_clip,
                        args.learning_rate * (args.decay_rate**e))))
            # shuffle_data
            data_loader.shuffle_data(mode='train')

            # For each batch in this epoch
            train_loss = 0.

            for b in range(data_loader.num_batches):

                # Tic
                start = time.time()
                # Get the source and target data of the current batch
                # x has the source data, y has the target data
                x, y = data_loader.next_batch(b)

                feed = {model.input_data: x, model.target_data: y}
                # Fetch the loss of the model on this batch, the final LSTM state from the session
                batch_total_loss, batch_trans_loss, batch_rot_loss, global_step, summaries, _ = sess.run(
                    [
                        model.total_loss, model.trans_loss, model.rot_loss,
                        model.global_step, all_summaries, model.train_op
                    ], feed)

                train_writer.add_summary(summaries,
                                         e * data_loader.num_batches + b)

                # Toc
                end = time.time()
                # Print epoch, batch, loss and time taken
                train_loss += batch_total_loss

                if b % args.display == 0:
                    print(
                        "{}/{} (epoch {}), train_loss = {}, time/batch = {:.3f}, learning rate = {:.9f}"
                        .format(e * data_loader.num_batches + b,
                                args.num_epochs * data_loader.num_batches, e,
                                train_loss / (b + 1), end - start,
                                sess.run(model.lr)))
                '''Save Model'''
                # Save the model if the current epoch and batch number match the frequency
                if (e * data_loader.num_batches + b +
                        1) % args.save_every == 0 and (
                            (e * data_loader.num_batches + b) > 0):
                    checkpoint_path = os.path.join(args.model_dir,
                                                   'model-{}.ckpt'.format(e))
                    saver = tf.train.Saver()
                    saver.save(sess, checkpoint_path, global_step=global_step)
                    print("model saved to {}".format(checkpoint_path))

                    # Save the arguments int the config file
                    with open(os.path.join(args.model_dir, 'config.pkl'),
                              'wb') as f:
                        pickle.dump(args, f)
Exemplo n.º 39
0
    def __init__(self,
                 config,
                 use_lstm=False,
                 num_samples=512,
                 forward=False,
                 scope_name='gen_seq2seq',
                 dtype=tf.float32):

        self.scope_name = scope_name
        with tf.variable_scope(self.scope_name):
            self.source_vocab_size = config.vocab_size
            self.target_vocab_size = config.vocab_size
            self.buckets = config.buckets
            self.learning_rate = tf.Variable(float(config.learning_rate),
                                             trainable=False,
                                             dtype=dtype)
            self.learning_rate_decay_op = self.learning_rate.assign(
                self.learning_rate * config.learning_rate_decay_factor)
            self.global_step = tf.Variable(0, trainable=False)
            self.batch_size = config.batch_size
            self.emb_dim = config.emb_dim
            self.num_layers = config.num_layers
            self.max_gradient_norm = config.max_gradient_norm

            #self.up_reward = tf.placeholder(tf.bool, name="up_reward")
            self.mc_search = tf.placeholder(tf.bool, name="mc_search")
            self.forward_only = tf.placeholder(tf.bool, name="forward_only")

            # If we use sampled softmax, we need an output projection.
            output_projection = None
            softmax_loss_function = None

            # Create the internal multi-layer cell for our RNN.
            single_cell = tf.nn.rnn_cell.GRUCell(self.emb_dim)
            if use_lstm:
                single_cell = tf.nn.rnn_cell.BasicLSTMCell(self.emb_dim)
            cell = single_cell
            if self.num_layers > 1:
                cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] *
                                                   self.num_layers)

            # The seq2seq function: we use embedding for the input and attention.
            def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
                return rl_seq2seq.embedding_attention_seq2seq(
                    encoder_inputs,
                    decoder_inputs,
                    cell,
                    num_encoder_symbols=self.source_vocab_size,
                    num_decoder_symbols=self.target_vocab_size,
                    embedding_size=self.emb_dim,
                    output_projection=output_projection,
                    feed_previous=do_decode,
                    mc_search=self.mc_search,
                    dtype=dtype)

            # Feeds for inputs.
            self.encoder_inputs = []
            self.decoder_inputs = []
            self.target_weights = []
            for i in xrange(
                    self.buckets[-1][0]):  # Last bucket is the biggest one.
                self.encoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="encoder{0}".format(i)))
            for i in xrange(self.buckets[-1][1] + 1):
                self.decoder_inputs.append(
                    tf.placeholder(tf.int32,
                                   shape=[None],
                                   name="decoder{0}".format(i)))
                self.target_weights.append(
                    tf.placeholder(dtype,
                                   shape=[None],
                                   name="weight{0}".format(i)))
            self.reward = [
                tf.placeholder(tf.float32, name="reward_%i" % i)
                for i in range(len(self.buckets))
            ]

            # Our targets are decoder inputs shifted by one.
            targets = [
                self.decoder_inputs[i + 1]
                for i in xrange(len(self.decoder_inputs) - 1)
            ]

            self.outputs, self.losses, self.encoder_state = rl_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                self.buckets,
                self.emb_dim,
                self.batch_size,
                lambda x, y: seq2seq_f(
                    x, y, tf.select(self.forward_only, True, False)),
                output_projection=output_projection,
                softmax_loss_function=softmax_loss_function)

            with tf.name_scope("gradient_descent"):
                self.gradient_norms = []
                self.updates = []
                self.gen_params = [
                    p for p in tf.trainable_variables()
                    if self.scope_name in p.name
                ]
                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                for b in xrange(len(self.buckets)):
                    adjusted_losses = tf.mul(self.losses[b], self.reward[b])
                    gradients = tf.gradients(adjusted_losses, self.gen_params)
                    clipped_gradients, norm = tf.clip_by_global_norm(
                        gradients, self.max_gradient_norm)
                    self.gradient_norms.append(norm)
                    self.updates.append(
                        opt.apply_gradients(zip(clipped_gradients,
                                                self.gen_params),
                                            global_step=self.global_step))

            self.gen_variables = [
                k for k in tf.global_variables() if self.scope_name in k.name
            ]
            self.saver = tf.train.Saver(self.gen_variables)
Exemplo n.º 40
0
def train_textCNN2Input(x_train, x_1_train,y_train, vocab_processor, x_dev,x_1_dev, y_dev, parameter,gpu_id):
    path=None
    import tensorflow as tf
    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          allow_soft_placement=parameter["allow_soft_placement"],
          log_device_placement=parameter["log_device_placement"])
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = TextCNN2Input(
                sequence_length=x_train.shape[1],
                sequence_length_1=x_1_train.shape[1],
                num_classes=y_train.shape[1],
                vocab_size=len(vocab_processor.vocabulary_),
                embedding_size=parameter["embedding_dim"],
                filter_sizes=list(map(int, parameter["filter_sizes"].split(","))),
                num_filters=parameter["num_filters"],
                l2_reg_lambda=parameter["l2_reg_lambda"],
                gpu=gpu_id)

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(parameter["learning_rate"])
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            # Keep track of gradient values and sparsity (optional)
            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
            grad_summaries_merged = tf.summary.merge(grad_summaries)

            # Output directory for models and summaries
            timestamp = str(int(time.time()))
            out_dir = os.path.abspath(os.path.join(os.path.curdir, "textCNN2Inputs_runs", timestamp))
            print("Writing to {}\n".format(out_dir))

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar("loss", cnn.loss)
            acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

            # Train Summaries
            train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
            train_summary_dir = os.path.join(out_dir, "summaries", "train")
            train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

            # Dev summaries
            dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
            dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
            dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

            # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=parameter["num_checkpoints"])

            # Write vocabulary
            vocab_processor.save(os.path.join(out_dir, "textCNN_vocab"))

            # Initialize all variables
            if "init_checkpoint" in parameter:
                sess.run(tf.train.init_from_checkpoint(parameter['init_checkpoint'],{'/':'/'}))
            else:
                sess.run(tf.global_variables_initializer())

            def train_step(x_batch, x_1_batch,y_batch,dropout_keep_prob):
                """
                A single training step
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_x_1: x_1_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: dropout_keep_prob
                }
                _, step, summaries, loss, accuracy = sess.run(
                    [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                train_summary_writer.add_summary(summaries, step)

            def dev_step(x_batch,x_1_batch, y_batch, writer=None):
                """
                Evaluates model on a dev set
                """
                feed_dict = {
                  cnn.input_x: x_batch,
                  cnn.input_x_1: x_1_batch,
                  cnn.input_y: y_batch,
                  cnn.dropout_keep_prob: 1.0
                }
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                time_str = datetime.datetime.now().isoformat()
                print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)


            # Generate batches
            batches = batch_iter(
                list(zip(x_train,x_1_train, y_train)), parameter["batch_size"], parameter["num_epochs"])
            # Training loop. For each batch...
            for batch in batches:
                x_batch,x_1_batch, y_batch = zip(*batch)
                train_step(x_batch,x_1_batch, y_batch, parameter["dropout_keep_prob"])
                current_step = tf.train.global_step(sess, global_step)
                if current_step % parameter["evaluate_every"] == 0:
                    print("\nEvaluation:")
                    dev_step(x_dev,x_1_dev, y_dev, writer=dev_summary_writer)
                    print("")
                if current_step % parameter["checkpoint_every"] == 0:
                    path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
    return  path
Exemplo n.º 41
0
    def forward(self):
        config = self.config
        N, PL, QL, d, nh = config.batch_size if not self.demo else 1, self.c_maxlen, self.q_maxlen, config.hidden, config.num_heads

        with tf.variable_scope("Input_Embedding_Layer"):
            c_emb = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_mat, self.c),
                1.0 - self.dropout)
            q_emb = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_mat, self.q),
                1.0 - self.dropout)
            c_emb = highway(c_emb,
                            size=d,
                            scope="highway",
                            dropout=self.dropout,
                            reuse=None)
            q_emb = highway(q_emb,
                            size=d,
                            scope="highway",
                            dropout=self.dropout,
                            reuse=True)

        with tf.variable_scope("Embedding_Encoder_Layer"):
            c = residual_block(c_emb,
                               num_blocks=1,
                               num_conv_layers=4,
                               kernel_size=7,
                               mask=self.c_mask,
                               num_filters=d,
                               num_heads=nh,
                               seq_len=self.c_len,
                               scope="Encoder_Residual_Block",
                               bias=False,
                               dropout=self.dropout)
            q = residual_block(
                q_emb,
                num_blocks=1,
                num_conv_layers=4,
                kernel_size=7,
                mask=self.q_mask,
                num_filters=d,
                num_heads=nh,
                seq_len=self.q_len,
                scope="Encoder_Residual_Block",
                reuse=True,  # Share the weights between passage and question
                bias=False,
                dropout=self.dropout)

        with tf.variable_scope("Context_to_Query_Attention_Layer"):
            S = optimized_trilinear_for_attention([c, q],
                                                  self.c_maxlen,
                                                  self.q_maxlen,
                                                  input_keep_prob=1.0 -
                                                  self.dropout)
            mask_q = tf.expand_dims(self.q_mask, 1)
            S_ = tf.nn.softmax(mask_logits(S, mask=mask_q))
            mask_c = tf.expand_dims(self.c_mask, 2)
            S_T = tf.transpose(
                tf.nn.softmax(mask_logits(S, mask=mask_c), dim=1), (0, 2, 1))
            self.c2q = tf.matmul(S_, q)
            self.q2c = tf.matmul(tf.matmul(S_, S_T), c)
            attention_outputs = [c, self.c2q, c * self.c2q, c * self.q2c]

        with tf.variable_scope("Model_Encoder_Layer"):
            inputs = tf.concat(attention_outputs, axis=-1)
            self.enc = [conv(inputs, d, name="input_projection")]
            for i in range(3):
                if i % 2 == 0:  # dropout every 2 blocks
                    self.enc[i] = tf.nn.dropout(self.enc[i],
                                                1.0 - self.dropout)
                self.enc.append(
                    residual_block(self.enc[i],
                                   num_blocks=7,
                                   num_conv_layers=2,
                                   kernel_size=5,
                                   mask=self.c_mask,
                                   num_filters=d,
                                   num_heads=nh,
                                   seq_len=self.c_len,
                                   scope="Model_Encoder",
                                   bias=False,
                                   reuse=True if i > 0 else None,
                                   dropout=self.dropout))

        with tf.variable_scope("Output_Layer"):
            start_logits = tf.squeeze(
                conv(tf.concat([self.enc[1], self.enc[2]], axis=-1),
                     1,
                     bias=False,
                     name="start_pointer"), -1)
            end_logits = tf.squeeze(
                conv(tf.concat([self.enc[1], self.enc[3]], axis=-1),
                     1,
                     bias=False,
                     name="end_pointer"), -1)
            self.logits = [
                mask_logits(start_logits, mask=self.c_mask),
                mask_logits(end_logits, mask=self.c_mask)
            ]
            logits1, logits2 = [l for l in self.logits]

            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, -1)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)

        if config.l2_norm is not None:
            variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            l2_loss = tf.contrib.layers.apply_regularization(
                regularizer, variables)
            self.loss += l2_loss

        if config.decay is not None:
            self.var_ema = tf.train.ExponentialMovingAverage(config.decay)
            ema_op = self.var_ema.apply(tf.trainable_variables())
            with tf.control_dependencies([ema_op]):
                self.loss = tf.identity(self.loss)

                self.assign_vars = []
                for var in tf.global_variables():
                    v = self.var_ema.average(var)
                    if v:
                        self.assign_vars.append(tf.assign(var, v))
Exemplo n.º 42
0
def initialize():
    """Initialize all the uninitialized variables in the global scope."""
    new_variables = set(tf.global_variables()) - ALREADY_INITIALIZED
    get_session().run(tf.variables_initializer(new_variables))
    ALREADY_INITIALIZED.update(new_variables)
Exemplo n.º 43
0
    def train(self):
        with tf.Session() as sess:
            tvars = tf.trainable_variables()
            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, self.__bert_checkpoint_path)
            print("init bert model params")
            tf.train.init_from_checkpoint(self.__bert_checkpoint_path,
                                          assignment_map)
            print("init bert model params done")
            sess.run(tf.variables_initializer(tf.global_variables()))

            current_step = 0
            start = time.time()
            for epoch in range(self.config["epochs"]):
                print("----- Epoch {}/{} -----".format(epoch + 1,
                                                       self.config["epochs"]))

                for batch in self.data_obj.next_batch(self.t_in_ids,
                                                      self.t_in_masks,
                                                      self.t_seg_ids,
                                                      self.t_lab_ids,
                                                      self.t_seq_len):

                    loss, true_y, predictions = self.model.train(
                        sess, batch, self.config["keep_prob"])

                    f1, precision, recall = gen_metrics(
                        pred_y=predictions,
                        true_y=true_y,
                        label_to_index=self.lab_to_idx)
                    print(
                        "train: step: {}, loss: {}, recall: {}, precision: {}, f1: {}"
                        .format(current_step, loss, recall, precision, f1))

                    current_step += 1
                    if self.data_obj and current_step % self.config[
                            "checkpoint_every"] == 0:

                        eval_losses = []
                        eval_recalls = []
                        eval_precisions = []
                        eval_f1s = []
                        for eval_batch in self.data_obj.next_batch(
                                self.e_in_ids, self.e_in_masks, self.e_seg_ids,
                                self.e_lab_ids, self.e_seq_len):
                            eval_loss, eval_true_y, eval_predictions = self.model.eval(
                                sess, eval_batch)

                            eval_losses.append(eval_loss)

                            f1, precision, recall = gen_metrics(
                                pred_y=eval_predictions,
                                true_y=eval_true_y,
                                labels=self.lab_to_idx)
                            eval_recalls.append(recall)
                            eval_precisions.append(precision)
                            eval_f1s.append(f1)
                        print("\n")
                        print(
                            "eval:  loss: {}, recall: {}, precision: {}, f1: {}"
                            .format(mean(eval_losses), mean(eval_recalls),
                                    mean(eval_precisions), mean(eval_f1s)))
                        print("\n")

                        if self.config["ckpt_model_path"]:
                            save_path = self.config["ckpt_model_path"]
                            if not os.path.exists(save_path):
                                os.makedirs(save_path)
                            model_save_path = os.path.join(
                                save_path, self.config["model_name"])
                            self.model.saver.save(sess,
                                                  model_save_path,
                                                  global_step=current_step)

            end = time.time()
            print("total train time: ", end - start)
Exemplo n.º 44
0
def train(opt, x_train, y_train, x_test, y_test):
    tf.set_random_seed(opt.seed)

    model = LSTM(opt)
    tf_loss, tf_rcst_loss, tf_logit_outputs, tf_pixels, tf_onehot_labels = model.build_model(
    )

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.InteractiveSession(config=config)

    # restore from the pre-trained model
    saver = tf.train.Saver(max_to_keep=opt.max_epochs, write_version=1)
    saver.restore(sess, opt.rcst_model_base_path)

    tf_learning_rate = tf.placeholder(tf.float32)
    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
        train_op = tf.train.AdamOptimizer(tf_learning_rate).minimize(tf_loss)

    uninitialized_vars = []
    for var in tf.global_variables():
        try:
            sess.run(var)
        except tf.errors.FailedPreconditionError:
            uninitialized_vars.append(var)
    if len(uninitialized_vars):
        init_new_vars_op = tf.variables_initializer(uninitialized_vars)
        sess.run(init_new_vars_op)

    iter_cnt = 0

    for epoch in range(0, opt.max_epochs):
        if epoch == 0:
            current_learning_rate = opt.learning_rate
        elif epoch != 0 and epoch % opt.learning_rate_decay_every == 0:
            current_learning_rate = current_learning_rate * opt.learning_rate_decay_rate

        # training
        for start, end in zip(
                range(0, x_train.shape[0], opt.batch_size),
                range(opt.batch_size, x_train.shape[0], opt.batch_size)):
            start_time = time.time()

            current_batch_pixels_train = x_train[start:end]
            current_batch_labels_train = y_train[start:end]

            feed_dict = {
                tf_learning_rate: current_learning_rate,
                tf_pixels: current_batch_pixels_train,
                tf_onehot_labels: current_batch_labels_train
            }

            _, loss_val, rcst_loss_val, logit_outputs = sess.run(
                [train_op, tf_loss, tf_rcst_loss, tf_logit_outputs], feed_dict)

            # 计算训练精度
            pred_y = np.argmax(logit_outputs, axis=1)
            true_y = np.argmax(current_batch_labels_train, axis=1)
            current_acc = sum(pred_y == true_y) / float(opt.batch_size)

            iter_cnt += 1
            end_time = time.time()
            print(
                "iter {:4d}  epoch {:3d}  lr {:.5f}  loss {:.4f}  rcst_loss {:.4f}  train_acc {:.4f}  time batch {:.4f}"
                .format(iter_cnt, epoch, current_learning_rate, loss_val,
                        rcst_loss_val, current_acc, end_time - start_time))

        # validation
        if np.mod(epoch, 1) == 0:
            print("epoch {} is done, saving the model ...".format(epoch))
            saver.save(sess,
                       os.path.join(opt.rcst_model_save_path, 'model_epoch'),
                       global_step=epoch)

            true_cnt = 0
            test_batch_cnt = 0
            for start, end in zip(
                    range(0, x_test.shape[0], opt.batch_size),
                    range(opt.batch_size, x_test.shape[0], opt.batch_size)):

                current_batch_pixels_test = x_test[start:end]
                current_batch_labels_test = y_test[start:end]

                feed_dict = {
                    tf_learning_rate: current_learning_rate,
                    tf_pixels: current_batch_pixels_test,
                    tf_onehot_labels: current_batch_labels_test
                }

                loss_test, logit_outputs = sess.run(
                    [tf_loss, tf_logit_outputs], feed_dict)

                # 计算验证精度
                pred_y = np.argmax(logit_outputs, axis=1)
                true_y = np.argmax(current_batch_labels_test, axis=1)
                true_cnt += sum(pred_y == true_y)
                test_batch_cnt += 1

            test_acc = true_cnt / float(test_batch_cnt * opt.batch_size)
            print("epoch {}  test_acc {:.4f}  test_num: {}".format(
                epoch, test_acc, test_batch_cnt * opt.batch_size))
Exemplo n.º 45
0
    def train(self):

        # Create session
        tfconfig = tf.ConfigProto(
            allow_soft_placement=True
        )  # allow_soft_placement = true : select GPU automatically
        tfconfig.gpu_options.allow_growth = True
        # tfconfig.gpu_options.per_process_gpu_memory_fraction = 0.90
        sess = tf.Session(config=tfconfig)

        with sess.graph.as_default():

            tf.set_random_seed(cfg.FLAGS.rng_seed)
            layers = self.net.create_architecture(sess,
                                                  "TRAIN",
                                                  self.imdb.num_classes,
                                                  tag='default')
            loss = layers['total_loss']
            lr = tf.Variable(cfg.FLAGS.learning_rate, trainable=False)
            momentum = cfg.FLAGS.momentum
            optimizer = tf.train.MomentumOptimizer(lr, momentum)

            gvs = optimizer.compute_gradients(loss)

            # Double bias
            # Double the gradient of the bias if set
            if cfg.FLAGS.double_bias:
                final_gvs = []
                with tf.variable_scope('Gradient_Mult'):
                    for grad, var in gvs:
                        scale = 1.
                        if cfg.FLAGS.double_bias and '/biases:' in var.name:
                            scale *= 2.
                        if not np.allclose(scale, 1.0):
                            grad = tf.multiply(grad, scale)
                        final_gvs.append((grad, var))
                train_op = optimizer.apply_gradients(final_gvs)
            else:
                train_op = optimizer.apply_gradients(gvs)

            # We will handle the snapshots ourselves
            self.saver = tf.train.Saver(max_to_keep=100000)
            # Write the train and validation information to tensorboard
            writer = tf.summary.FileWriter('default/', sess.graph)
            # valwriter = tf.summary.FileWriter(self.tbvaldir)

        # Load weights
        # Fresh train directly from ImageNet weights
        print('Loading initial model weights from {:s}'.format(
            cfg.FLAGS.pretrained_model))
        variables = tf.global_variables()
        # Initialize all variables first
        sess.run(tf.variables_initializer(variables, name='init'))
        var_keep_dic = self.get_variables_in_checkpoint_file(
            cfg.FLAGS.pretrained_model)
        # Get the variables to restore, ignorizing the variables to fix
        variables_to_restore = self.net.get_variables_to_restore(
            variables, var_keep_dic, sess, cfg.FLAGS.pretrained_model)

        restorer = tf.train.Saver(variables_to_restore)
        restorer.restore(sess, cfg.FLAGS.pretrained_model)
        print('Loaded.')
        # Need to fix the variables before loading, so that the RGB weights are changed to BGR
        # For VGG16 it also changes the convolutional weights fc6 and fc7 to
        # fully connected weights
        self.net.fix_variables(sess, cfg.FLAGS.pretrained_model)
        print('Fixed.')
        sess.run(tf.assign(lr, cfg.FLAGS.learning_rate))
        last_snapshot_iter = 0

        timer = Timer()
        iter = last_snapshot_iter + 1
        last_summary_time = time.time()
        print('START TRAINING: ...')
        while iter < cfg.FLAGS.max_iters + 1:
            try:
                # Learning rate
                if iter == cfg.FLAGS.step_size + 1:
                    # Add snapshot here before reducing the learning rate
                    # self.snapshot(sess, iter)
                    sess.run(
                        tf.assign(lr,
                                  cfg.FLAGS.learning_rate * cfg.FLAGS.gamma))

                timer.tic()
                # Get training data, one batch at a time
                blobs = self.data_layer.forward()
                iter += 1
                # Compute the graph without summary
                if iter % 100 == 0:
                    rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss, summary = self.net.train_step_with_summary(
                        sess, blobs, train_op)
                    timer.toc()

                    run_metadata = tf.RunMetadata()
                    writer.add_run_metadata(run_metadata, 'step%03d' % iter)
                    writer.add_summary(summary, iter)
                else:
                    rpn_loss_cls, rpn_loss_box, loss_cls, loss_box, total_loss = self.net.train_step(
                        sess, blobs, train_op)
                    timer.toc()

                # Display training information
                if iter % (cfg.FLAGS.display) == 0:
                    print('iter: %d / %d, total loss: %.6f\n >>> rpn_loss_cls: %.6f\n '
                        '>>> rpn_loss_box: %.6f\n >>> loss_cls: %.6f\n >>> loss_box: %.6f\n ' % \
                        (iter, cfg.FLAGS.max_iters, total_loss, rpn_loss_cls, rpn_loss_box, loss_cls, loss_box))
                    print('speed: {:.3f}s / iter'.format(timer.average_time))

                if iter % cfg.FLAGS.snapshot_iterations == 0:
                    self.snapshot(sess, iter)
            except Exception as e:
                print(e)
Exemplo n.º 46
0
    def build_attack(self,
                     gradient_names,
                     init_vars=True,
                     max_to_keep=5,
                     lr=None):
        def str_match_list(v, vl):
            for e in vl:
                if e in v:
                    return True
            return False

        with self.model.graph.as_default():

            input_tensor = {}
            for input_name in self.model.inputs_shape:
                input_tensor[input_name] = \
                    tf.Variable(
                        tf.random.normal(
                            [self.batch_size] + list(self.model.inputs_shape[input_name]),
                            dtype=tf.float32),
                        name=input_name
                    )
                self.model.output[input_name] = input_tensor[input_name].name

            target_tensor = {}
            target_tensor_raw = {}
            for target_name in self.model.targets_shape:
                target_tensor_raw[target_name] = tf.Variable(tf.random.normal(
                    [self.batch_size] +
                    list(self.model.targets_shape[target_name]),
                    dtype=tf.float32),
                                                             name=target_name)
                target_tensor[target_name] = tf.nn.softmax(
                    target_tensor_raw[target_name], axis=-1)
                self.model.output[target_name] = target_tensor[target_name].cid

            loss = self.model.forward(inputs=input_tensor,
                                      targets=target_tensor,
                                      trainable=False)

            assert loss is not None

            gradients = [[e, tf.gradients(loss, e)[0]]
                         for e in tf.global_variables()
                         if str_match_list(e.cid, gradient_names)]

            optimizer = tf.train.GradientDescentOptimizer(lr or self.model.lr)

            attack_loss = []
            for v, g in gradients:
                tmp_g = tf.compat.v1.placeholder(tf.float32, g.shape,
                                                 g.cid.split(':')[0])
                self.model.input[v.cid] = tmp_g.cid
                self.model.output[v.cid] = g.cid
                self.model.output[v.cid + '_y'] = tf.gradients(
                    tf.reduce_sum(tf.square(g)), target_tensor['y'])[0].cid
                assert g.shape == tmp_g.shape
                attack_loss.append(tf.reduce_sum(tf.square(g - tmp_g)))

            attack_loss = tf.reduce_sum(attack_loss)

            mask_tensor_x = tf.concat([
                tf.zeros([1] + list(self.model.inputs_shape['x'])),
                tf.ones([1] + list(self.model.inputs_shape['x']))
            ],
                                      axis=0)

            mask_tensor_y = tf.concat([
                tf.zeros([1] + list(self.model.targets_shape['y'])),
                tf.ones([1] + list(self.model.targets_shape['y']))
            ],
                                      axis=0)

            batch_index = tf.compat.v1.placeholder(tf.int32, [
                self.batch_size,
            ],
                                                   name='batch_index')
            self.model.input['batch_index'] = batch_index.cid
            update_mask_x = tf.gather(mask_tensor_x, batch_index, axis=0)
            update_mask_y = tf.gather(mask_tensor_y, batch_index, axis=0)

            self.model.output['attack_loss'] = attack_loss.cid
            self.model.output['loss'] = loss.cid

            optimizer_gradients = optimizer.compute_gradients(
                attack_loss, [input_tensor['x'], target_tensor_raw['y']])
            optimizer_gradients = [list(e) for e in optimizer_gradients]

            optimizer_gradients[0][0] = tf.multiply(update_mask_x,
                                                    optimizer_gradients[0][0])
            optimizer_gradients[1][0] = tf.multiply(update_mask_y,
                                                    optimizer_gradients[1][0])

            attack_train_op = optimizer.apply_gradients(optimizer_gradients)
            self.model.op['attack_train_op'] = attack_train_op.cid

            ########################
            # TMP
            self.model.output['grad_x'] = tf.gradients(
                attack_loss, input_tensor['x'])[0].cid
            self.model.output['grad_y'] = tf.gradients(
                attack_loss, target_tensor['y'])[0].cid
            ########################

            self.model.build_essential(init_vars=init_vars,
                                       max_to_keep=max_to_keep)
Exemplo n.º 47
0
    # Train Summaries
    train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

    # Dev summaries
    dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
    dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
    dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)

    # Write vocabulary
    vocab_processor.save(os.path.join(checkpoint_dir, "vocab"))

    # Initialize all variables
    sess.run(tf.global_variables_initializer())
    
    print("init all variables")
    graph_def = tf.get_default_graph().as_graph_def()
    graphpb_txt = str(graph_def)
    with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f:
        f.write(graphpb_txt)

    if FLAGS.word2vec_model :
        # initial matrix with random uniform
Exemplo n.º 48
0
def main():
  print('Starting...')
  model_dir = ModelDir(OPTS.model)
  model = model_dir.get_model()
  if OPTS.elmo:
    # Fix absolute path names from other codalab runs
    lm = model.lm_model
    if lm.lm_vocab_file.startswith('/0x'):
      lm.lm_vocab_file = os.sep.join(lm.lm_vocab_file.split(os.sep)[2:])
    if lm.options_file.startswith('/0x'):
      lm.options_file = os.sep.join(lm.options_file.split(os.sep)[2:])
    if lm.weight_file.startswith('/0x'):
      lm.weight_file = os.sep.join(lm.weight_file.split(os.sep)[2:])
    if lm.weight_file.startswith('/0x'):
      lm.embed_weights_file = os.sep.join(lm.embed_weights_file.split(os.sep)[2:])
    lm.embed_weights_file = None

  #if not isinstance(model, ParagraphQuestionModel):
  #  raise ValueError("This script is built to work for ParagraphQuestionModel models only")
  input_data, vocab = read_input_data(model)

  print('Loading word vectors...')
  model.set_input_spec(ParagraphAndQuestionSpec(batch_size=None), vocab)

  print('Starting Tensorflow session...')
  config = tf.ConfigProto(allow_soft_placement=True)
  config.gpu_options.allow_growth = True
  sess = tf.Session(config=config)
  with sess.as_default():
    prediction = model.get_prediction()
    # Take 0-th here because we know we only truncate to one paragraph
    start_logits_tf = prediction.start_logits[0]
    end_logits_tf = prediction.end_logits[0]
    none_logit_tf = prediction.none_logit[0]
  if OPTS.elmo:
    # See elmo/run_on_user_text.py
    all_vars = tf.global_variables() + tf.get_collection(tf.GraphKeys.SAVEABLE_OBJECTS)
    lm_var_names = {x.name for x in all_vars if x.name.startswith("bilm")}
    vars = [x for x in all_vars if x.name not in lm_var_names]
    model_dir.restore_checkpoint(sess, vars)
    sess.run(tf.variables_initializer([x for x in all_vars if x.name in lm_var_names]))
  else:
    model_dir.restore_checkpoint(sess)

  pred_obj = {}
  na_prob_obj = {}
  pred_always_ans_obj = {}
  analysis_obj = {}

  for context_raw, context_toks, ex in tqdm(input_data):
    encoded = model.encode(ex, is_train=False)
    start_logits, end_logits, none_logit = sess.run(
        [start_logits_tf, end_logits_tf, none_logit_tf],
        feed_dict=encoded)
    # beam, p_na = logits_to_probs(
    #     context_raw, context_toks, start_logits, end_logits, none_logit,
    #     beam_size=DEFAULT_BEAM_SIZE)
    beam, p_na = logits_to_probs(
        context_raw, context_toks, start_logits, end_logits, none_logit,
        beam_size=10)

    # print(beam[0][0])

    ans = beam[0][0]
    # start, end = beam[0][2],beam[0][3]
    non_empty_ans = [x[0] for x in beam if x[0]][0]
    qid = ex[0].question_id

    pred_obj[qid] = ans
    na_prob_obj[qid] = p_na
    pred_always_ans_obj[qid] = non_empty_ans
    analysis_obj[qid] = [{'answer': b[0], 'span':[b[2], b[3]], 'prob':b[1]} for b in beam] 
    # print(analysis_obj[qid])

  with open(OPTS.output_file, 'w') as f:
    json.dump(pred_obj, f)
  if OPTS.na_prob_file:
    with open(OPTS.na_prob_file, 'w') as f:
      json.dump(na_prob_obj, f)
  if OPTS.always_answer_file:
    with open(OPTS.always_answer_file, 'w') as f:
      json.dump(pred_always_ans_obj, f)
  if OPTS.analysis_file:
    with open(OPTS.analysis_file, 'w') as f:
      json.dump(analysis_obj, f, indent=2)
Exemplo n.º 49
0
    def __init__(self,
                 board_width,
                 board_height,
                 block,
                 init_model=None,
                 transfer_model=None,
                 cuda=False):
        print()
        print('building network ...')
        print()

        self.planes_num = 9  # feature planes
        self.nb_block = block  # resnet blocks
        if not cuda:
            # use GPU or not ,if there are a few GPUs,it's better to assign GPU ID
            os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
            os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

        self.board_width = board_width
        self.board_height = board_height

        # Make a session
        print(tf.__version__)
        self.session = tf.InteractiveSession()
        # 1. Input:
        self.input_states = tf.placeholder(
            tf.float32,
            shape=[None, self.planes_num, board_height, board_width])

        self.action_fc_train, self.evaluation_fc2_train = self.network(
            input_states=self.input_states, reuse=False, is_train=True)
        self.action_fc_test, self.evaluation_fc2_test = self.network(
            input_states=self.input_states, reuse=True, is_train=False)

        self.network_all_params = tf.global_variables()

        # Define the Loss function
        # 1. Label: the array containing if the game wins or not for each state
        self.labels = tf.placeholder(tf.float32, shape=[None, 1])
        # 2. Predictions: the array containing the evaluation score of each state
        # which is self.evaluation_fc2
        # 3-1. Value Loss function
        self.value_loss = tf.losses.mean_squared_error(
            self.labels, self.evaluation_fc2_train)
        # 3-2. Policy Loss function
        self.mcts_probs = tf.placeholder(
            tf.float32, shape=[None, board_height * board_width])
        self.policy_loss = tf.negative(
            tf.reduce_mean(
                tf.reduce_sum(
                    tf.multiply(self.mcts_probs, self.action_fc_train), 1)))
        # 3-3. L2 penalty (regularization)
        l2_penalty_beta = 1e-4
        vars = tf.trainable_variables()
        l2_penalty = l2_penalty_beta * tf.add_n(
            [tf.nn.l2_loss(v) for v in vars if 'bias' not in v.name.lower()])
        # 3-4 Add up to be the Loss function
        self.loss = self.value_loss + self.policy_loss + l2_penalty

        # Define the optimizer we use for training
        self.learning_rate = tf.placeholder(tf.float32)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.loss)

        # calc policy entropy, for monitoring only
        self.entropy = tf.negative(
            tf.reduce_mean(
                tf.reduce_sum(
                    tf.exp(self.action_fc_test) * self.action_fc_test, 1)))

        # self.network_params = tf.trainable_variables()
        self.network_params = tf.global_variables()
        # for transfer learning use

        # For saving and restoring
        self.saver = tf.train.Saver()

        self.restore_params = []
        for params in self.network_params:
            # print(params,'**'*100)
            if ('conv2d' in params.name) or ('resnet' in params.name) or (
                    'bn' in params.name) or ('flatten_layer' in params.name):
                self.restore_params.append(params)
        self.saver_restore = tf.train.Saver(self.restore_params)

        init = tf.global_variables_initializer()
        self.session.run(init)

        if init_model is not None:
            self.restore_model(init_model)
            print('model loaded!')
        elif transfer_model is not None:
            self.saver_restore.restore(self.session, transfer_model)
            print('transfer model loaded !')
        else:
            print('can not find saved model, learn from scratch !')
        # self.print_params()

        # opponent net for evaluating
        self.action_fc_train_oppo, self.evaluation_fc2_train_oppo = self.network(
            input_states=self.input_states,
            reuse=False,
            is_train=True,
            label='_oppo')
        self.action_fc_test_oppo, self.evaluation_fc2_test_oppo = self.network(
            input_states=self.input_states,
            reuse=True,
            is_train=False,
            label='_oppo')

        self.network_oppo_all_params = tf.global_variables(
        )[len(tf.global_variables()) - len(self.network_all_params):]
Exemplo n.º 50
0
            [loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                     sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=FLAGS.num_checkpoints)

        # Write vocabulary
        vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
Exemplo n.º 51
0
	def train(self, sess, num_train_patterns, num_valid_patterns):

		print('Training...')

		ckpt = tf.train.get_checkpoint_state(self.config.train_dir)
		init_op = tf.global_variables_initializer()
		sess.run(init_op)
		if ckpt and ckpt.model_checkpoint_path:
			#self.saver = tf.train.Saver(keep_checkpoint_every_n_hours=10.0, max_to_keep=2)
			self.saver = tf.train.Saver(max_to_keep=2)
			self.saver.restore(sess, ckpt.model_checkpoint_path)
			self.step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
		else:
			#self.saver = tf.train.Saver(tf.global_variables(), keep_checkpoint_every_n_hours=10.0, max_to_keep=2)
			self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
			self.step = 0

		coord = tf.train.Coordinator()
		threads = tf.train.start_queue_runners(sess=sess, coord=coord)
		self.summarizer = tf.summary.FileWriter(self.config.train_dir, sess.graph)

		print_interval = 10 # steps
		update_interval = 10 # steps
		summary_interval = 200 # steps
		validate_interval = 200 # steps
		checkpoint_interval = 1000 # steps

		print('Start iterating...')

		start_time = time.time()

		batch_grads = None

		while True:

			# compute epochs

			epochs = 1.0*(self.step+1)*self.config.batch_size/num_train_patterns
			do_print = ((self.step+1) % print_interval == 0)
			do_update = ((self.step+1) % update_interval == 0)
			do_summary = ((self.step+1) % summary_interval == 0)
			do_validate = ((self.step+1) % validate_interval == 0)
			do_checkpoint = ((self.step+1) % checkpoint_interval == 0)

			# training networks

			step_grads,_,step_losses = sess.run([self.grad_list, self.bn_op, self.losses])
			step_grads = [np.nan_to_num(grad) for grad in step_grads] # handle nan
			batch_grads = self.cumulate_gradients(batch_grads, step_grads)
			step_losses = step_losses

			# update gradients

			if do_update:
				grad_dict = {}
				for k in range(len(self.grad_placeholder)):
					grad_dict[self.grad_placeholder[k][0]] = batch_grads[k] / update_interval
				sess.run(self.update_op, feed_dict=grad_dict)
				batch_grads = None

			# validation

			if do_validate:
				self.validate_loss(sess, num_valid_patterns)

			# log

			if do_summary:
				summary_str = sess.run(self.train_summary_op)
				self.summarizer.add_summary(summary_str, self.step)

			if do_checkpoint:
				self.saver.save(sess, os.path.join(self.config.train_dir,'model.ckpt'), global_step=self.step+1)
			
			if do_print:
				now_time = time.time()
				batch_duration = now_time - start_time
				start_time = now_time
				log_str = 'Step %7d: %5.1f sec, epoch: %7.2f, loss: %7.3g %7.3g %7.3g %7.3g %7.3g\n' \
					% (self.step+1, batch_duration, epochs, step_losses[0], step_losses[1], step_losses[2], step_losses[3], step_losses[4])
				print(log_str)
				log_file_name = os.path.join(self.config.train_dir, 'log.txt')
				with open(log_file_name, 'a') as log_file:
					log_file.write(log_str)

			if epochs >= self.config.max_epochs:
				break

			self.step += 1

		coord.request_stop()
		coord.join(threads)
Exemplo n.º 52
0
def main():
	print('\nBegin to generate pictures ...\n')
	Format = '.jpg'

	for i in range(20):
		file_name1 = path1 + '/' + str(i + 1) + '.jpg'
		file_name2 = path2 + '/' + str(i + 1) + '.jpg'

		img1 = imread(file_name1) / 255.0
		img2 = imread(file_name2) / 255.0
		print('file1:', file_name1)
		print('file2:', file_name2)

		Shape1 = img1.shape
		if len(Shape1) > 2:
			img1 = img1[:, :, 0] * 0.3 + img1[:, :, 1] * 0.59 + img1[:, :, 2] * 0.11
		Shape2 = img2.shape
		h = Shape2[0]
		w = Shape2[1]
		if len(Shape2) > 2:
			img2 = img2[:, :, 0] * 0.3 + img2[:, :, 1] * 0.59 + img2[:, :, 2] * 0.11
		img1 = transform.resize(img1, (h, w))
		img2 = transform.resize(img2, (h, w))
		img1 = img1.reshape([1, h, w, 1])
		img2 = img2.reshape([1, h, w, 1])

		with tf.Graph().as_default(), tf.Session() as sess:
			# SOURCE1 = tf.placeholder(tf.float32, shape = shape, name = 'SOURCE1')
			# SOURCE2 = tf.placeholder(tf.float32, shape = shape, name = 'SOURCE2')
			# print('SOURCE1 shape:', SOURCE1.shape)

			M = Model(BATCH_SIZE=1, INPUT_H=h, INPUT_W=w, is_training=False)

			# G = Generator('Generator')
			# output_image= G.transform(I1=SOURCE1, I2=SOURCE2)

			# restore the trained model and run the style transferring
			g_list = tf.global_variables()
			# for i in g_list:
			# 	print(i.name)
			# g_list=tf.trainable_variables()

			saver = tf.train.Saver(var_list=g_list)
			model_save_path = MODEL_SAVE_PATH
			print(model_save_path)
			sess.run(tf.global_variables_initializer())
			saver.restore(sess, model_save_path)
			output = sess.run(M.generated_img, feed_dict={M.SOURCE1: img1, M.SOURCE2: img2})

			output = output[0, :, :, 0]

			fig = plt.figure()
			f1 = fig.add_subplot(311)
			f2 = fig.add_subplot(312)
			f3 = fig.add_subplot(313)
			f1.imshow(img1[0, :, :, 0], cmap='gray')
			f2.imshow(img2[0, :, :, 0], cmap='gray')
			f3.imshow(output, cmap='gray')
			plt.show()

			if not os.path.exists(output_path):
				os.makedirs(output_path)
			imsave(output_path + 'results_' + str(i + 1) + Format, output)

			del M
Exemplo n.º 53
0
  def update_model(self):
    ### 3. Perform experience replay and train the network.
    # note that this is only done if the replay buffer contains enough samples
    # for us to learn something useful -- until then, the model will not be
    # initialized and random actions should be taken
    if (self.t > self.learning_starts and \
        self.t % self.learning_freq == 0 and \
        self.replay_buffer.can_sample(self.batch_size)):
      # Here, you should perform training. Training consists of four steps:
      # 3.a: use the replay buffer to sample a batch of transitions (see the
      # replay buffer code for function definition, each batch that you sample
      # should consist of current observations, current actions, rewards,
      # next observations, and done indicator).
      # 3.b: initialize the model if it has not been initialized yet; to do
      # that, call
      #    initialize_interdependent_variables(self.session, tf.global_variables(), {
      #        self.obs_t_ph: obs_t_batch,
      #        self.obs_tp1_ph: obs_tp1_batch,
      #    })
      # where obs_t_batch and obs_tp1_batch are the batches of observations at
      # the current and next time step. The boolean variable model_initialized
      # indicates whether or not the model has been initialized.
      # Remember that you have to update the target network too (see 3.d)!
      # 3.c: train the model. To do this, you'll need to use the self.train_fn and
      # self.total_error ops that were created earlier: self.total_error is what you
      # created to compute the total Bellman error in a batch, and self.train_fn
      # will actually perform a gradient step and update the network parameters
      # to reduce total_error. When calling self.session.run on these you'll need to
      # populate the following placeholders:
      # self.obs_t_ph
      # self.act_t_ph
      # self.rew_t_ph
      # self.obs_tp1_ph
      # self.done_mask_ph
      # (this is needed for computing self.total_error)
      # self.learning_rate -- you can get this from self.optimizer_spec.lr_schedule.value(t)
      # (this is needed by the optimizer to choose the learning rate)
      # 3.d: periodically update the target network by calling
      # self.session.run(self.update_target_fn)
      # you should update every target_update_freq steps, and you may find the
      # variable self.num_param_updates useful for this (it was initialized to 0)
      #####

      # YOUR CODE HERE

      # 3a.
      obs_t_batch, act_batch, rew_batch, obs_tp1_batch, done_mask = self.replay_buffer.sample(self.batch_size)

      # 3b.
      if not self.model_initialized:
          initialize_interdependent_variables(self.session, tf.global_variables(), {
                     self.obs_t_ph: obs_t_batch,
                     self.obs_tp1_ph: obs_tp1_batch,
                 })
          self.session.run(self.update_target_fn)
          self.model_initialized = True

      # 3c.
      self.session.run(self.train_fn, feed_dict={self.obs_t_ph: obs_t_batch,
                                                 self.act_t_ph: act_batch,
                                                 self.rew_t_ph: rew_batch,
                                                 self.obs_tp1_ph: obs_tp1_batch,
                                                 self.done_mask_ph: done_mask,
                                                 self.learning_rate: self.optimizer_spec.lr_schedule.value(self.t)})
      # 3d.
      if self.t % self.target_update_freq == 0:
          self.session.run(self.update_target_fn)

      self.num_param_updates += 1

    self.t += 1
Exemplo n.º 54
0
 def train(self):
     self.t_vars = tf.global_variables()
     self.original_weights = [
         var for var in self.t_vars
         if ('conv_layer' in var.name and '/w' in var.name)
     ]
     self.binary_weights = [
         var for var in self.t_vars
         if ('conv_layer' in var.name and 'binary' in var.name)
     ]
     self.scaling_factors = [
         var for var in self.t_vars if 'scaling_factor' in var.name
     ]
     self.training_vars = tf.trainable_variables()
     self.loss_value = tf.reduce_mean(
         tf.abs(self.actual_output - self.output))
     self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1,
                                             self.beta2, self.epsilon)
     tf.initialize_all_variables().run()
     for i in range(self.no_epochs):
         iterations = int(self.train_size / self.batch_size)
         self.initialize_matrices()
         print("Epoch", i + 1)
         for k in range(iterations):
             print("Batch:", k + 1)
             print(
                 "Determining binarized_weights and optimal scaling factor")
             for j in range(1, (len(self.original_weights) + 1)):
                 string = 'conv_layer_' + str(j)
                 v1 = [
                     v for v in self.original_weights if string in v.name
                 ][0]
                 v2 = [v for v in self.binary_weights
                       if string in v.name][0]
                 scales = [
                     v for v in self.scaling_factors if string in v.name
                 ][0]
                 v1 = v1.eval(session=sess)
                 sh = v1.shape
                 v3 = np.zeros((sh))
                 v4 = np.zeros((sh[3]))
                 for l in range(sh[3]):
                     for x in range(sh[0]):
                         for y in range(sh[1]):
                             for z in range(sh[2]):
                                 v3[x][y][z][l] = self.discretize_function(
                                     v1[x][y][z][l])
                     v4[l] = self.L1_norm(
                         self.element_wise_mult(self.D[string][:, :, :, l],
                                                v1[:, :, :, l]))
                     v4[l] = (v4[l] * 1.0) / (self.L1_norm(
                         self.D[string][:, :, :, l]))
                 v2.assign(v3).eval()
                 scales.assign(v4).eval()
             print("Training................")
             (images, output) = self.generate_data()
             vals = self.sess.run(
                 self.optimizer.compute_gradients(
                     self.loss_value, var_list=self.training_vars),
                 feed_dict={
                     self.actual_output: output,
                     self.input_: images
                 })  # Here vals is a list of (gradient,variable) pairs
             indices = []
             for j in range(len(vals)):
                 if (len(vals[j][1].shape) >= 4):
                     indices.append(j)
             for j in range(len(self.original_weights)):
                 gradient = vals[indices[j]][0]
                 weight = self.original_weights[j].eval()
                 #print (gradient.shape,weight.shape)
                 string = 'conv_layer_' + str(j + 1)
                 self.m1[string] = self.beta1 * self.m1[string] + (
                     1.0 - self.beta1) * gradient
                 self.m2[string] = self.beta2 * self.m2[string] + (
                     1.0 - self.beta2) * (self.element_wise_mult(
                         gradient, gradient))
                 m1_unbiased = ((self.m1[string] * 1.0) /
                                (1.0 - self.beta1))
                 m2_unbiased = ((self.m2[string] * 1.0) /
                                (1.0 - self.beta2))
                 #print(m2_unbiased)
                 #abc = input()
                 self.D[string] = (1.0 / self.learning_rate) * (
                     self.epsilon + np.sqrt(m2_unbiased))
                 #self.D[string] = np.nan_to_num(self.D[string])
                 #print(self.D[string])
                 #abc = input()
                 weight = weight - (np.divide(m1_unbiased, self.D[string]))
                 self.original_weights[j].assign(weight).eval()
             weight = self.weights_FCLayer1.eval()
             gradient = [x[0] for x in vals if (len(x[0].shape) == 2)][0]
             weight = weight - (self.learning_rate * gradient)
             self.weights_FCLayer1.assign(weight).eval()
             weight = self.weights_FCLayer2.eval()
             gradient = [x[0] for x in vals if (len(x[0].shape) == 2)][1]
             weight = weight - (self.learning_rate * gradient)
             self.weights_FCLayer2.assign(weight).eval()
             weight = self.weights_FCLayer.eval()
             gradient = [x[0] for x in vals if (len(x[0].shape) == 2)][2]
             weight = weight - (self.learning_rate * gradient)
             self.weights_FCLayer.assign(weight).eval()
         self.learning_rate = self.update_learning_rate(
             self.learning_rate, i)
Exemplo n.º 55
0
    def build_graph(self):
        # Reset previous graph.
        reset_graph()

        # Placeholders.
        x_source = tf.placeholder(tf.int32,
                                  shape=[None, None],
                                  name="x_source")

        source_seq_length = tf.placeholder(tf.int32,
                                           shape=[None],
                                           name="source_seq_length")

        x_target = tf.placeholder(tf.int32,
                                  shape=[None, None],
                                  name="x_target")

        target_seq_length = tf.placeholder(tf.int32,
                                           shape=[None],
                                           name="target_seq_length")

        labels = tf.placeholder(tf.float32, shape=[None], name="labels")

        input_dropout = tf.placeholder_with_default(1.0,
                                                    shape=[],
                                                    name="input_dropout")

        output_dropout = tf.placeholder_with_default(1.0,
                                                     shape=[],
                                                     name="output_dropout")

        decision_threshold = tf.placeholder_with_default(
            0.5, shape=[], name="decision_threshold")

        # Embedding layer.
        with tf.variable_scope("embeddings"):
            if self.config.source_embeddings_path is not None and self.config.target_embeddings_path is not None:
                source_pretrained_embeddings,\
                target_pretrained_embeddings = get_pretrained_embeddings(
                    source_embeddings_path,
                    target_embeddings_path,
                    source_vocab,
                    target_vocab)
                assert source_pretrained_embeddings.shape[
                    1] == target_pretrained_embeddings.shape[1]
                self.config.embedding_size = source_pretrained_embeddings.shape[
                    1]
                if self.config.fix_pretrained:
                    source_embeddings = tf.get_variable(
                        name="source_embeddings_matrix",
                        shape=[
                            self.config.source_vocab_size,
                            self.config.embedding_size
                        ],
                        initializer=tf.constant_initializer(
                            source_pretrained_embeddings),
                        trainable=False)
                    target_embeddings = tf.get_variable(
                        name="target_embeddings_matrix",
                        shape=[
                            self.config.target_vocab_size,
                            self.config.embedding_size
                        ],
                        initializer=tf.constant_initializer(
                            target_pretrained_embeddings),
                        trainable=False)
                else:
                    source_embeddings = tf.get_variable(
                        name="source_embeddings_matrix",
                        shape=[
                            self.config.source_vocab_size,
                            self.config.embedding_size
                        ],
                        initializer=tf.constant_initializer(
                            source_pretrained_embeddings))
                    target_embeddings = tf.get_variable(
                        name="target_embeddings_matrix",
                        shape=[
                            self.config.target_vocab_size,
                            self.config.embedding_size
                        ],
                        initializer=tf.constant_initializer(
                            target_pretrained_embeddings))
            else:
                source_embeddings = tf.get_variable(
                    name="source_embeddings_matrix",
                    shape=[
                        self.config.source_vocab_size,
                        self.config.embedding_size
                    ])
                target_embeddings = tf.get_variable(
                    name="target_embeddings_matrix",
                    shape=[
                        self.config.target_vocab_size,
                        self.config.embedding_size
                    ])

            source_rnn_inputs = tf.nn.embedding_lookup(source_embeddings,
                                                       x_source)
            target_rnn_inputs = tf.nn.embedding_lookup(target_embeddings,
                                                       x_target)
            source_rnn_inputs = tf.nn.dropout(source_rnn_inputs,
                                              keep_prob=input_dropout,
                                              name="source_seq_embeddings")
            target_rnn_inputs = tf.nn.dropout(target_rnn_inputs,
                                              keep_prob=input_dropout,
                                              name="target_seq_embeddings")

        # BiRNN encoder.
        with tf.variable_scope("birnn") as scope:
            if self.config.use_lstm:
                cell_fw = tf.nn.rnn_cell.LSTMCell(self.config.state_size,
                                                  use_peepholes=True)
                cell_bw = tf.nn.rnn_cell.LSTMCell(self.config.state_size,
                                                  use_peepholes=True)
            else:
                cell_fw = tf.nn.rnn_cell.GRUCell(self.config.state_size)
                cell_bw = tf.nn.rnn_cell.GRUCell(self.config.state_size)

            cell_fw = tf.nn.rnn_cell.DropoutWrapper(
                cell_fw, output_keep_prob=output_dropout)
            cell_bw = tf.nn.rnn_cell.DropoutWrapper(
                cell_bw, output_keep_prob=output_dropout)

            if self.config.num_layers > 1:
                if self.config.use_lstm:
                    cell_fw = tf.nn.rnn_cell.MultiRNNCell([
                        tf.nn.rnn_cell.LSTMCell(self.config.state_size,
                                                use_peepholes=True)
                        for _ in range(self.config.num_layers)
                    ])
                    cell_bw = tf.nn.rnn_cell.MultiRNNCell([
                        tf.nn.rnn_cell.LSTMCell(self.config.state_size,
                                                use_peepholes=True)
                        for _ in range(self.config.num_layers)
                    ])
                else:
                    cell_fw = tf.nn.rnn_cell.MultiRNNCell([
                        tf.nn.rnn_cell.GRUCell(self.config.state_size)
                        for _ in range(self.config.num_layers)
                    ])
                    cell_bw = tf.nn.rnn_cell.MultiRNNCell([
                        tf.nn.rnn_cell.GRUCell(self.config.state_size)
                        for _ in range(self.config.num_layers)
                    ])

            with tf.variable_scope(scope):
                source_rnn_outputs, source_final_state = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=cell_fw,
                    cell_bw=cell_bw,
                    inputs=source_rnn_inputs,
                    sequence_length=source_seq_length,
                    dtype=tf.float32)

            with tf.variable_scope(scope, reuse=True):
                target_rnn_outputs, target_final_state = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=cell_fw,
                    cell_bw=cell_bw,
                    inputs=target_rnn_inputs,
                    sequence_length=target_seq_length,
                    dtype=tf.float32)

            self.config.state_size *= 2
            # Mean and max pooling only work for 1 layer BiRNN.
            if self.config.use_mean_pooling:
                source_final_state = self.average_pooling(
                    source_rnn_outputs, source_seq_length)
                target_final_state = self.average_pooling(
                    target_rnn_outputs, target_seq_length)
            elif self.config.use_max_pooling:
                source_final_state = self.max_pooling(source_rnn_outputs)
                target_final_state = self.max_pooling(target_rnn_outputs)
            else:
                source_final_state_fw, source_final_state_bw = source_final_state
                target_final_state_fw, target_final_state_bw = target_final_state
                if self.config.num_layers > 1:
                    source_final_state_fw = source_final_state_fw[-1]
                    source_final_state_bw = source_final_state_bw[-1]
                    target_final_state_fw = target_final_state_fw[-1]
                    target_final_state_bw = target_final_state_bw[-1]
                if self.config.use_lstm:
                    source_final_state_fw = source_final_state_fw.h
                    source_final_state_bw = source_final_state_bw.h
                    target_final_state_fw = target_final_state_fw.h
                    target_final_state_bw = target_final_state_bw.h
                source_final_state = tf.concat(
                    [source_final_state_fw, source_final_state_bw], axis=1)
                target_final_state = tf.concat(
                    [target_final_state_fw, target_final_state_bw], axis=1)

        # Feed-forward neural network.
        with tf.variable_scope("feed_forward"):
            h_multiply = tf.multiply(source_final_state, target_final_state)
            h_abs_diff = tf.abs(
                tf.subtract(source_final_state, target_final_state))

            W_1 = tf.get_variable(
                name="W_1",
                shape=[self.config.state_size, self.config.hidden_size])
            W_2 = tf.get_variable(
                name="W_2",
                shape=[self.config.state_size, self.config.hidden_size])
            b_1 = tf.get_variable(name="b_1",
                                  shape=[self.config.hidden_size],
                                  initializer=tf.constant_initializer(0.0))

            h_semantic = tf.tanh(
                tf.matmul(h_multiply, W_1) + tf.matmul(h_abs_diff, W_2) + b_1)

            W_3 = tf.get_variable(name="W_3",
                                  shape=[self.config.hidden_size, 1])
            b_2 = tf.get_variable(name="b_2",
                                  shape=[1],
                                  initializer=tf.constant_initializer(0.0))

            logits = tf.matmul(h_semantic, W_3) + b_2
            logits = tf.squeeze(logits, name="logits")

            # Sigmoid output layer.
            with tf.name_scope("output"):
                probs = tf.sigmoid(logits, name="probs")
                predicted_class = tf.cast(tf.greater(probs,
                                                     decision_threshold),
                                          tf.float32,
                                          name="predicted_class")

        # Loss.
        with tf.name_scope("cross_entropy"):
            losses = tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits,
                labels=labels,
                name="cross_entropy_per_sequence")
            mean_loss = tf.reduce_mean(losses, name="cross_entropy_loss")

        # Optimization.
        with tf.name_scope("optimization"):
            global_step = tf.Variable(initial_value=0,
                                      trainable=False,
                                      name="global_step")
            optimizer = tf.train.AdamOptimizer(self.config.learning_rate)
            trainable_variables = tf.trainable_variables()
            gradients = tf.gradients(mean_loss,
                                     trainable_variables,
                                     name="gradients")
            clipped_gradients, global_norm = tf.clip_by_global_norm(
                gradients,
                clip_norm=self.config.max_gradient_norm,
                name="clipped_gradients")
            train_op = optimizer.apply_gradients(zip(clipped_gradients,
                                                     trainable_variables),
                                                 global_step=global_step)

        # Evaluation metrics.
        accuracy = tf.metrics.accuracy(labels,
                                       predicted_class,
                                       name="accuracy")
        precision = tf.metrics.precision(labels,
                                         predicted_class,
                                         name="precision")
        recall = tf.metrics.recall(labels, predicted_class, name="recall")

        # Add summaries.
        tf.summary.scalar("loss", mean_loss)
        tf.summary.scalar("global_norm", global_norm)
        tf.summary.scalar("accuracy", accuracy[0])
        tf.summary.scalar("precision", precision[0])
        tf.summary.scalar("recall", recall[0])
        tf.summary.scalar("logits" + "/sparsity", tf.nn.zero_fraction(logits))
        tf.summary.histogram("logits" + "/activations", logits)
        tf.summary.histogram("probs", probs)

        # Add histogram for trainable variables.
        for var in trainable_variables:
            tf.summary.histogram(var.op.name, var)

        # Add histogram for gradients.
        for grad, var in zip(clipped_gradients, trainable_variables):
            if grad is not None:
                tf.summary.histogram(var.op.name + "/gradients", grad)

        # Assign placeholders and operations.
        self.x_source = x_source
        self.x_target = x_target
        self.source_seq_length = source_seq_length
        self.target_seq_length = target_seq_length
        self.labels = labels
        self.input_dropout = input_dropout
        self.output_dropout = output_dropout
        self.decision_threshold = decision_threshold
        self.train_op = train_op
        self.probs = probs
        self.predicted_class = predicted_class
        self.mean_loss = mean_loss
        self.accuracy = accuracy
        self.precision = precision
        self.recall = recall
        self.summaries = tf.summary.merge_all()
        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)
Exemplo n.º 56
0
def main(_):
    tf.reset_default_graph()

    # Import data
    cifar = cf.cifar10(batchSize=FLAGS.batch_size, downloadDir=FLAGS.data_dir)

    with tf.variable_scope('inputs'):
        # Create the model
        x = tf.placeholder(tf.float32, [None, FLAGS.img_width * FLAGS.img_height * FLAGS.img_channels])
        # Define loss and optimizer
        y_ = tf.placeholder(tf.float32, [None, FLAGS.num_classes])
        # Variable to state whether training or testing
        testFlag = tf.placeholder(tf.uint8, [1])

    # Build the graph for the deep net
    y_conv, img_summary = deepnn(x,testFlag)

    # Define your loss function - softmax_cross_entropy
    with tf.variable_scope("x_entropy"):
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    
    # Define your AdamOptimiser, using FLAGS.learning_rate to minimixe the loss function
    # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(cross_entropy)
    batch_number = tf.Variable(0, trainable=False)
    our_learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, batch_number, 1000, 0.8)
    optimizer = tf.train.AdamOptimizer(our_learning_rate).minimize(cross_entropy, global_step=batch_number)
    # calculate the prediction and the accuracy
    correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(y_conv,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    loss_summary = tf.summary.scalar('Loss', cross_entropy)
    acc_summary = tf.summary.scalar('Accuracy', accuracy)

    # summaries for TensorBoard visualisation
    validation_summary = tf.summary.merge([img_summary, acc_summary])
    training_summary = tf.summary.merge([img_summary, loss_summary])
    test_summary = tf.summary.merge([img_summary, acc_summary])

    # saver for checkpoints
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        summary_writer = tf.summary.FileWriter(run_log_dir + '_train', sess.graph, flush_secs=5)
        summary_writer_validation = tf.summary.FileWriter(run_log_dir + '_validate', sess.graph, flush_secs=5)

        sess.run(tf.global_variables_initializer())

        # Training and validation
        for step in range(FLAGS.max_steps):
            # Training: Backpropagation using train set
            (trainImages, trainLabels) = cifar.getTrainBatch()
            (testImages, testLabels) = cifar.getTestBatch()


            _, summary_str = sess.run([optimizer, training_summary], feed_dict={x: trainImages, testFlag:[0],  y_: trainLabels})

            
            if step % (FLAGS.log_frequency + 1)== 0:
                summary_writer.add_summary(summary_str, step)

            # Validation: Monitoring accuracy using validation set
            if step % FLAGS.log_frequency == 0:
                validation_accuracy, summary_str = sess.run([accuracy, validation_summary], feed_dict={x: testImages,testFlag:[1], y_: testLabels})
                print('step %d, accuracy on validation batch: %g' % (step, validation_accuracy))
                summary_writer_validation.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % FLAGS.save_model == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(run_log_dir + '_train', 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

        # Testing

        # resetting the internal batch indexes
        cifar.reset()
        evaluated_images = 0
        test_accuracy = 0
        batch_count = 0

        # don't loop back when we reach the end of the test set
        while evaluated_images != cifar.nTestSamples:
            (testImages, testLabels) = cifar.getTestBatch(allowSmallerBatches=True)
            test_accuracy_temp, _ = sess.run([accuracy, test_summary], feed_dict={x: testImages,testFlag:[1], y_: testLabels})

            batch_count = batch_count + 1
            test_accuracy = test_accuracy + test_accuracy_temp
            evaluated_images = evaluated_images + testLabels.shape[0]

        test_accuracy = test_accuracy / batch_count
        print('test set: accuracy on test set: %0.3f' % test_accuracy)
Exemplo n.º 57
0
    max_gradient=10.0,
    # sampler arguments
    sampler=None,
    batch_size=FLAGS.batch_size,
    global_step=global_step,
    is_learn_q=FLAGS.is_learn_q)

config = tf.ConfigProto(gpu_options=tf.GPUOptions(
    per_process_gpu_memory_fraction=FLAGS.gpu_fraction, allow_growth=True),
                        allow_soft_placement=True,
                        log_device_placement=False)

os.mkdir(FLAGS.savedir)
restore_var_list = []
if not FLAGS.is_learn_q:
    for var in tf.global_variables():
        print "var_name: ", var.name
        if 'Adam' in var.name or 'optimizers/beta1_power' in var.name \
                or 'optimizers/beta2_power' in var.name\
                or var.name == 'global_step:0':
            pass
        else:
            restore_var_list.append(var)
else:
    for var in tf.global_variables():
        print "var_name: ", var.name
        if 'Adam' in var.name or 'optimizers/beta1_power' in var.name \
                or 'optimizers/beta2_power' in var.name\
                or 'q_logits' in var.name\
                or var.name == 'global_step:0':
            pass
Exemplo n.º 58
0
    def train(self):
        config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=config) as sess:
            with tf.device("/gpu:%d" % cfg.GPU_ID):
                counter = self.build_model(sess)
                # changed by lihang liu
                # saver = tf.train.Saver(tf.all_variables(),
                #                        keep_checkpoint_every_n_hours=2)
                saver = tf.train.Saver(tf.global_variables(),
                                       keep_checkpoint_every_n_hours=2)

                # summary_op = tf.merge_all_summaries()
                # change by lihang liu
                # summary_writer = tf.train.SummaryWriter(self.log_dir,
                #                                         sess.graph)
                summary_writer = tf.summary.FileWriter(self.log_dir,
                                                        sess.graph)

                keys = ["d_loss", "g_loss"]
                log_vars = []
                log_keys = []
                for k, v in self.log_vars:
                    if k in keys:
                        log_vars.append(v)
                        log_keys.append(k)
                        # print(k, v)
                generator_lr = cfg.TRAIN.GENERATOR_LR
                discriminator_lr = cfg.TRAIN.DISCRIMINATOR_LR
                num_embedding = cfg.TRAIN.NUM_EMBEDDING
                lr_decay_step = cfg.TRAIN.LR_DECAY_EPOCH
                number_example = self.dataset.train._num_examples
                updates_per_epoch = int(number_example / self.batch_size)
                epoch_start = int(counter / updates_per_epoch)
                for epoch in range(epoch_start, self.max_epoch):
                    widgets = ["epoch #%d|" % epoch,
                               Percentage(), Bar(), ETA()]
                    pbar = ProgressBar(maxval=updates_per_epoch,
                                       widgets=widgets)
                    pbar.start()

                    if epoch % lr_decay_step == 0 and epoch != 0:
                        generator_lr *= 0.5
                        discriminator_lr *= 0.5

                    all_log_vals = []
                    for i in range(updates_per_epoch):
                        pbar.update(i)
                        # training d
                        images, wrong_images, embeddings, _, _ =\
                            self.dataset.train.next_batch(self.batch_size,
                                                          num_embedding)
                        feed_dict = {self.images: images,
                                     self.wrong_images: wrong_images,
                                     self.embeddings: embeddings,
                                     self.generator_lr: generator_lr,
                                     self.discriminator_lr: discriminator_lr
                                     }
                        # train d
                        feed_out = [self.discriminator_trainer,
                                    self.d_sum,
                                    self.hist_sum,
                                    log_vars]
                        _, d_sum, hist_sum, log_vals = sess.run(feed_out,
                                                                feed_dict)
                        summary_writer.add_summary(d_sum, counter)
                        summary_writer.add_summary(hist_sum, counter)
                        all_log_vals.append(log_vals)
                        # train g
                        feed_out = [self.generator_trainer,
                                    self.g_sum]
                        _, g_sum = sess.run(feed_out,
                                            feed_dict)
                        summary_writer.add_summary(g_sum, counter)
                        # save checkpoint
                        counter += 1
                        if counter % self.snapshot_interval == 0:
                            snapshot_path = "%s/%s_%s.ckpt" %\
                                             (self.checkpoint_dir,
                                              self.exp_name,
                                              str(counter))
                            fn = saver.save(sess, snapshot_path)
                            print("Model saved in file: %s" % fn)

                    img_sum = self.epoch_sum_images(sess, cfg.TRAIN.NUM_COPY)
                    summary_writer.add_summary(img_sum, counter)

                    avg_log_vals = np.mean(np.array(all_log_vals), axis=0)
                    dic_logs = {}
                    for k, v in zip(log_keys, avg_log_vals):
                        dic_logs[k] = v
                        # print(k, v)

                    log_line = "; ".join("%s: %s" %
                                         (str(k), str(dic_logs[k]))
                                         for k in dic_logs)
                    print("Epoch %d | " % (epoch) + log_line)
                    sys.stdout.flush()
                    if np.any(np.isnan(avg_log_vals)):
                        raise ValueError("NaN detected!")
Exemplo n.º 59
0
	def build_model(self):

		self.g_net = Generator( 
						max_seq_length=self.data.tags_idx.shape[1], 
						vocab_size=self.vocab_size, 
						embedding_size=self.FLAGS.embedding_dim, 
						hidden_size=self.FLAGS.hidden,
						img_row=self.img_row,
						img_col=self.img_col)
		self.d_net = Discriminator( 
						max_seq_length=self.data.tags_idx.shape[1], 
						vocab_size=self.vocab_size, 
						embedding_size=self.FLAGS.embedding_dim, 
						hidden_size=self.FLAGS.hidden,
						img_row=self.img_row,
						img_col=self.img_col)

		self.seq = tf.placeholder(tf.float32, [None, len(self.data.eyes_idx)+len(self.data.hair_idx)], name="seq")
		self.img = tf.placeholder(tf.float32, [None, self.img_row, self.img_col, 3], name="img")
		self.z = tf.placeholder(tf.float32, [None, self.FLAGS.z_dim])

		self.w_seq = tf.placeholder(tf.float32, [None, len(self.data.eyes_idx)+len(self.data.hair_idx)], name="w_seq")
		self.w_img = tf.placeholder(tf.float32, [None, self.img_row, self.img_col, 3], name="w_img")

		r_img, r_seq = self.img, self.seq

		self.f_img = self.g_net(r_seq, self.z)
		
		self.sampler = tf.identity(self.g_net(r_seq, self.z, reuse=True, train=False), name='sampler') 

		# TODO 
		"""
			r img, r text -> 1
			f img, r text -> 0
			r img, w text -> 0
			w img, r text -> 0
		"""
		self.d = self.d_net(r_seq, r_img, reuse=False) 	# r img, r text
		self.d_1 = self.d_net(r_seq, self.f_img) 		# f img, r text
		self.d_2 = self.d_net(self.w_seq, self.img)		# r img, w text
		self.d_3 = self.d_net(r_seq, self.w_img)		# w img, r text

		# epsilon = tf.random_uniform([], 0.0, 1.0)
		# img_hat = epsilon * r_img + (1 - epsilon) * self.f_img
		# d_hat = self.d_net(r_seq, img_hat)

		# ddx = tf.gradients(d_hat, img_hat)[0]
		# ddx = tf.reshape(ddx, [-1, self.img_row * self.img_col * 3])
		# ddx = tf.sqrt(tf.reduce_sum(tf.square(ddx), axis=1))
		# ddx = tf.reduce_mean(tf.square(ddx - 1.0) * self.alpha)
		
		# self.g_loss = -tf.reduce_mean(self.d_1)
		# self.d_loss = tf.reduce_mean(self.d) - (tf.reduce_mean(self.d_1)+tf.reduce_mean(self.d_2)+tf.reduce_mean(self.d_3))/3.
		# self.d_loss = -(self.d_loss - ddx)

		# dcgan
		self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_1, labels=tf.ones_like(self.d_1))) 

		self.d_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d, labels=tf.ones_like(self.d))) \
					+ (tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_1, labels=tf.zeros_like(self.d_1))) + \
					   tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_2, labels=tf.zeros_like(self.d_2))) +\
					   tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.d_3, labels=tf.zeros_like(self.d_3))) ) / 3 
		

		self.global_step = tf.Variable(0, name='g_global_step', trainable=False)

		with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
			self.d_updates = tf.train.AdamOptimizer(self.FLAGS.lr, beta1=0.5, beta2=0.9).minimize(self.d_loss, var_list=self.d_net.vars)
			self.g_updates = tf.train.AdamOptimizer(self.FLAGS.lr, beta1=0.5, beta2=0.9).minimize(self.g_loss, var_list=self.g_net.vars, global_step=self.global_step)

		self.sess.run(tf.global_variables_initializer())
		self.saver = tf.train.Saver(tf.global_variables())
"""
Tensorflow提供的持久化Api中有恢复时重命名的功能
这样的功能对使用滑动平均的模型有很大便利性,因为tensorflow中
实现滑动平均使用了影子变量,通过恢复重命名的机制可以方便的将
影子变量映射到当前模型中来

本脚本给出一个简单的样例
"""
import tensorflow as tf

# step = tf.Variable(0,dtype=tf.float32)
v = tf.Variable(0, dtype=tf.float32, name="v")
# 在没有声明滑动平均模型的时候只有一个变量v
# 输出v:0
for var in tf.global_variables():
    print(var.name)

# 加上ema模型
ema = tf.train.ExponentialMovingAverage(0.99)
maintain_averages_op = ema.apply(tf.global_variables())

# 声明ema模型后,再打印
# 输出v:0
# v/ExponentialMovingAverage:0
for var in tf.global_variables():
    print(var.name)

saver = tf.train.Saver()
with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)