def __call__(self, flow=None): """Constructs the layer in `Tensorflow` graph. Args: flow: This argument is ignored. (Default value = None) Returns: Output of this layer. """ with tf.variable_op_scope([flow], self.name, 'Embedding', reuse=self.reuse): if not self.reuse: self._table_loader = tf.placeholder(tf.float32, shape=self._init_values.shape, name='loader') self._lookup_table = tf.get_variable( 'lookup_table', initializer=self._table_loader, trainable=self.trainable) self.params.append(self._lookup_table) tf.initialize_variables(self.params).run(feed_dict={self._table_loader: self._init_values}) self.reuse = True flow = tf.placeholder(tf.int64, [None] + self._input_shape, 'input') tf.add_to_collection(GraphKeys.MODEL_INPUTS, flow) flow = tf.nn.embedding_lookup(self._lookup_table, flow) tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, flow) return flow
def testInitializeFromValue(self): with self.test_session() as sess: init = tf.constant(0.1) w = tf.get_variable("v", initializer=init) sess.run(tf.initialize_variables([w])) self.assertAllClose(w.eval(), 0.1) with self.assertRaisesRegexp(ValueError, "shape"): # We disallow explicit shape specification when initializer is constant. tf.get_variable("u", [1], initializer=init) with tf.variable_scope("foo", initializer=init): # Constant initializer can be passed through scopes if needed. v = tf.get_variable("v") sess.run(tf.initialize_variables([v])) self.assertAllClose(v.eval(), 0.1) # Check that non-float32 initializer creates a non-float32 variable. init = tf.constant(1, dtype=tf.int32) t = tf.get_variable("t", initializer=init) self.assertEqual(t.dtype.base_dtype, tf.int32) # Raise error if `initializer` dtype and `dtype` are not identical. with self.assertRaisesRegexp(ValueError, "don't match"): tf.get_variable("s", initializer=init, dtype=tf.float64)
def testVarScopeRegularizer(self): with self.test_session() as sess: init = tf.constant_initializer(0.3) def regularizer1(v): return tf.reduce_mean(v) + 0.1 def regularizer2(v): return tf.reduce_mean(v) + 0.2 with tf.variable_scope("tower", regularizer=regularizer1) as tower: with tf.variable_scope("foo", initializer=init): v = tf.get_variable("v", []) sess.run(tf.initialize_variables([v])) losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(1, len(losses)) self.assertAllClose(losses[0].eval(), 0.4) with tf.variable_scope(tower, initializer=init) as vs: u = tf.get_variable("u", []) vs.set_regularizer(regularizer2) w = tf.get_variable("w", []) # Next 3 variable not regularized to test disabling regularization. x = tf.get_variable("x", [], regularizer=tf.no_regularizer) with tf.variable_scope("baz", regularizer=tf.no_regularizer): y = tf.get_variable("y", []) vs.set_regularizer(tf.no_regularizer) z = tf.get_variable("z", []) # Check results. losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(3, len(losses)) sess.run(tf.initialize_variables([u, w, x, y, z])) self.assertAllClose(losses[0].eval(), 0.4) self.assertAllClose(losses[1].eval(), 0.4) self.assertAllClose(losses[2].eval(), 0.5) with tf.variable_scope("foo", reuse=True): v = tf.get_variable("v", []) # "v" is alredy there, reused losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.assertEqual(3, len(losses)) # No new loss added.
def evaluate_model(self, accuracy, num_steps, feed_vars=(), feed_data=None, summary_tag=None, print_every=0): """Evaluates the given model. Args: accuracy: The metric that is being evaluated. num_steps: The number of steps to run in the evaluator. feed_vars: A list or tuple of the variables that will be fed. feed_data: A generator that produces tuples of the same length as feed_vars. summary_tag: If provided, the final result of running the model will be published to this tag. print_every: Print a summary every so many steps, use 0 to disable. Returns: The accuracy. """ test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES) if test_vars: tf.initialize_variables(test_vars).run() result = self.run_model([accuracy], num_steps, feed_vars=feed_vars, feed_data=feed_data, print_every=print_every, allow_initialize=False) if summary_tag and self._summary_writer: summary = tf.Summary( value=[tf.Summary.Value(tag=summary_tag, simple_value=float(result[1]))]) event = tf.Event(wall_time=time.time(), summary=summary, step=int(result[0])) self._summary_writer.add_event(event) return result[1]
def train(self, session, text, num_steps): """ Train embeddings on given text""" generator = bigram_batch.SkipgramGenerator( text, self._batch_size, self._num_skips) is_own = lambda x: x.name.startswith(self._scope_name) tf.initialize_variables(filter(is_own, tf.all_variables())).run() print('Initialized') average_loss = 0 step = 0 while step < num_steps: batches_labels = zip(*generator.next()) for step, (batch, label) in enumerate(batches_labels, step): feed_dict = {self._train_dataset: batch, self._train_labels: label.reshape(label.shape[0], 1)} _, l = session.run( [self._optimizer, self._loss], feed_dict=feed_dict) average_loss += l if step % 2000 == 0: if step > 0: average_loss = average_loss / 2000 # The average loss is an estimate of the loss over the last # 2000 batches. print('Average loss at step %d: %f' % (step, average_loss)) average_loss = 0
def test_variable(self): with self.test_session() as sess: x = tf.Variable(2.0, name="CustomName") y = tf.constant(3.0) z = x * y z_new = copy(z) tf.initialize_variables([x]).run() self.assertEqual(z_new.eval(), 6.0)
def test_tensor_variable(self): with self.test_session() as sess: x = tf.constant(2.0) y = tf.constant(3.0) z = x * y qx = tf.Variable(4.0, name="CustomName") z_new = copy(z, {x: qx}) tf.initialize_variables([qx]).run() self.assertEqual(z_new.eval(), 12.0)
def init_vars(self, init_hp, session, reset_hp=False): print(init_hp) init_feed = dict() init_feed[self.ph_hypers] = init_hp if os.path.exists(self.save_path): # Restore variables from disk. self.saver.restore(session, self.save_path) if reset_hp: tf.initialize_variables(var_list=self.reset_vars).run(feed_dict=init_feed) else: tf.initialize_all_variables().run(feed_dict=init_feed)
def var_collection_example(): g1 = tf.Graph() with g1.as_default(): with tf.name_scope('scope1') as scope1: a = tf.Variable(tf.constant(1.0, shape=[1]), name='a') b = tf.Variable(tf.constant(2.0, shape=[1]), name='b') with tf.name_scope('scope2') as scope2: c = tf.Variable(tf.constant(3.0, shape=[1]), name='c') g2 = tf.Graph() with g2.as_default(): with tf.name_scope('scope1') as scope1: a = tf.Variable(tf.constant(4.0, shape=[1]), name='a') b = tf.Variable(tf.constant(5.0, shape=[1]), name='b') with tf.name_scope('scope2') as scope2: c = tf.Variable(tf.constant(6.0, shape=[1]), name='c') vars_g1 = var_collect.collect_all(graph=g1) vars_g1_scope1 = var_collect.collect_scope('scope1', graph=g1) var_g1_scope1_a = var_collect.collect_name('scope1/a', graph=g1) vars_g2 = var_collect.collect_all(graph=g2) vars_g2_dict = var_collect.collect_list( ['scope1/a', 'scope1/b', 'scope2/c'], graph=g2) sess = tf.Session(graph=g1) sess.run(tf.initialize_variables(vars_g1)) y_hat = [var.eval(sess)[0] for var in vars_g1] y = [1.0, 2.0, 3.0] print 'Graph g1: ' print 'y: [' + ', '.join([str(l) for l in y]) + ']' print 'y_hat: [' + ', '.join([str(l) for l in y_hat]) + ']' sess.close() sess = tf.Session(graph=g2) sess.run(tf.initialize_variables(vars_g2)) y_hat = [var.eval(sess)[0] for var in vars_g2] y = [4.0, 5.0, 6.0] print 'Graph g2: ' print 'y: [' + ', '.join([str(l) for l in y]) + ']' print 'y_hat: [' + ', '.join([str(l) for l in y_hat]) + ']' var_collect.print_var_list(vars_g1, name='vars_g1') var_collect.print_var_list(vars_g2, name='vars_g2') var_collect.print_var_list(vars_g1_scope1, name='vars_g1_scope1') var_collect.print_var_list([var_g1_scope1_a], name='vars_g1_scope1_a') print 'vars_g2_dict = {' for key, value in vars_g2_dict.items(): print ' {}: {},'.format(key, value.eval(sess)[0]) print '}' sess.close()
def testInitFromNonTensorValue(self): with self.test_session() as sess: v = tf.get_variable("v", initializer=4, dtype=tf.int32) sess.run(tf.initialize_variables([v])) self.assertAllClose(v.eval(), 4) w = tf.get_variable("w", initializer=numpy.array([1, 2, 3]), dtype=tf.int32) sess.run(tf.initialize_variables([w])) self.assertAllClose(w.eval(), [1, 2, 3]) with self.assertRaises(TypeError): tf.get_variable("x", initializer={})
def testVarScopeIntializer(self): with self.test_session() as sess: init = tf.constant_initializer(0.3) with variable_scope.variable_scope("tower") as tower: with variable_scope.variable_scope("foo", initializer=init): v = variable_scope.get_variable("v", []) sess.run(tf.initialize_variables([v])) self.assertAllClose(v.eval(), 0.3) with variable_scope.variable_scope(tower, initializer=init): w = variable_scope.get_variable("w", []) sess.run(tf.initialize_variables([w])) self.assertAllClose(w.eval(), 0.3)
def __init__(self, settings, session): self.s = session self.action_type = settings["action"]["type"] if self.action_type == "discrete": self.num_actions = settings["action"]["num_actions"] else: assert False, "Unknown action type:" % (self.action_type,) self.create_variables(settings) self.s.run(tf.initialize_variables(self.variables())) self.s.run(tf.initialize_variables(self.gradients()))
def test_local_variable(self): with self.test_session() as sess: self.assertEquals([], tf.local_variables()) value0 = 42 tf.contrib.framework.local_variable(value0) value1 = 43 tf.contrib.framework.local_variable(value1) variables = tf.local_variables() self.assertEquals(2, len(variables)) self.assertRaises(tf.OpError, sess.run, variables) tf.initialize_variables(variables).run() self.assertAllEqual(set([value0, value1]), set(sess.run(variables)))
def __call__(self, flow): """Applies this layer to the input `Tensor` and returns the output `Tensor`. Args: flow: The input `Tensor`. Returns: Output of this layer. """ with tf.variable_op_scope([flow], self.name, 'Conv', reuse=self.reuse): if not self.reuse: full_shape = self._filter_shape + [flow.get_shape()[-1].value, self._n_output_channels] self.filter = tf.get_variable( 'filter', full_shape, initializer=self._weight_init, regularizer=self._weight_regularizer, trainable=self.trainable) self.params.append(self.filter) tf.add_to_collection(tf.GraphKeys.WEIGHTS, self.filter) if self._has_bias: self.bias = tf.get_variable( 'bias', self._n_output_channels, initializer=self._bias_init, regularizer=self._bias_regularizer, trainable=self.trainable) self.params.append(self.bias) tf.add_to_collection(tf.GraphKeys.BIASES, self.bias) tf.initialize_variables(self.params).run() self.reuse = True flow = tf.nn.conv2d( flow, self.filter, [1] + self._strides + [1], self._padding, self._use_cudnn_on_gpu) flow = tf.nn.bias_add(flow, self.bias) if self._activation_fn is not None: flow = self._activation_fn(flow) tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, flow) return flow
def create_model(session, forward_only, batch_size=FLAGS["batch_size"], model_name = SAVE_NAME): """Create translation model and initialize or load parameters in session.""" model = seq2seq_model.Seq2SeqModel( FLAGS["source_vocab_size"], FLAGS["target_vocab_size"], _buckets, FLAGS["size"], FLAGS["num_layers"], FLAGS["max_gradient_norm"], batch_size, FLAGS["learning_rate"], FLAGS["learning_rate_decay_factor"], forward_only=forward_only) ckpt = tf.train.get_checkpoint_state("src/model/forex_trader/"+model_name) if ckpt: # if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path): print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(session, ckpt.model_checkpoint_path) if not forward_only: # set new learning rate print("Old Learning Rate: ",model.learning_rate.eval(session=session)) new_learning_rate = tf.Variable(float(FLAGS["learning_rate"]), trainable=False) op = tf.assign(model.learning_rate, new_learning_rate) op_init = tf.initialize_variables([new_learning_rate]) session.run([op_init]) session.run([op]) print("New Learning Rate: ",model.learning_rate.eval(session=session)) else: print("Creating model with fresh parameters.") session.run(tf.initialize_all_variables()) return model
def _create_state(self): """Prepare stateful variables modified during the recurrence.""" # Both the queue and the stack are flattened stack_size * batch_size # tensors. `stack_size` many blocks of `batch_size` values stack_shape = (self.stack_size * self.batch_size, self.model_dim) self.stack = tf.Variable(tf.zeros(stack_shape, dtype=tf.float32), trainable=False, name="stack") self.queue = tf.Variable(tf.zeros((self.stack_size * self.batch_size,), dtype=tf.float32), trainable=False, name="queue") self.buff_cursors = tf.Variable(tf.zeros((self.batch_size,), dtype=tf.float32), trainable=False, name="buff_cursors") self.cursors = tf.Variable(tf.ones((self.batch_size,), dtype=tf.float32) * - 1, trainable=False, name="cursors") # TODO make parameterizable self.tracking_value = tf.Variable(tf.zeros((self.batch_size, self.tracking_dim), dtype=tf.float32), trainable=False, name="tracking_value") # Create an Op which will (re-)initialize the auxiliary variables # declared above. self._aux_vars = [self.stack, self.queue, self.buff_cursors, self.cursors, self.tracking_value] self.variable_initializer = tf.initialize_variables(self._aux_vars)
def __init__(self, settings): self.settings = update_settings(DEFAULT_SETTINGS, settings) # network and training self.q_network = parse_block(settings["model"]) self.optimizer = parse_optimizer(settings["optimizer"]) out_sh = self.q_network.output_shape() assert len(out_sh) == 2 and out_sh[0] is None, \ "Output of the Discrete DeepQ must be (None, num_actions), where None corresponds to batch_size" self.num_actions = out_sh[1] self.minipatch_size = self.settings["minibatch_size"] self.train_every_nth = self.settings['train_every_nth'] self.discount_rate = self.settings["discount_rate"] self.transitions_so_far = 0 self.exploration_period = self.settings['exploration_period'] self.random_action_probability = self.settings['random_action_probability'] self.replay_buffer = deque() self.store_every_nth = self.settings['store_every_nth'] self.replay_buffer_size = self.settings['replay_buffer_size'] self.target_network_update_rate = self.settings['target_network_update_rate'] self.summary_writer = None self.s = tf.Session() self.create_variables() self.s.run(tf.initialize_variables( self.q_network.variables() + self.target_q_network.variables()))
def init_gradients(self, loss, var_train): if self.play_mode: return with tf.device(self.args.device): var_refs = [v.ref() for v in var_train] train_gradients = tf.gradients( loss, var_refs, gate_gradients=False, aggregation_method=None, colocate_gradients_with_ops=False) acc_gradient_list = [] train_step_list = [] new_grad_vars = [] self.grad_list = [] var_list = [] for grad, var in zip(train_gradients, self.global_vars): acc_gradient = tf.Variable(tf.zeros(grad.get_shape()), trainable=False) acc_gradient_list.append(acc_gradient) train_step_list.append(acc_gradient.assign_add(grad)) new_grad_vars.append((tf.convert_to_tensor(acc_gradient, dtype=tf.float32), var)) self.grad_list.append(acc_gradient) var_list.append(var) self.train_step = tf.group(*train_step_list) self.reset_acc_gradients = tf.initialize_variables(acc_gradient_list) self.apply_grads = self.global_optimizer.apply_gradients(new_grad_vars) sync_list = [] for i in range(0, len(self.global_vars)): sync_list.append(var_train[i].assign(self.global_vars[i])) self.sync = tf.group(*sync_list)
def test_tf_resize_new_values(self): var = tf.Variable(range(20)) self.session.run(tf.initialize_variables([var])) tf_resize(self.session, var, new_values=np.array(range(10))) self.assertEqual(len(self.session.run(var)), 10)
def initialize_op(self): """Returns an op for initializing tensorflow variables.""" all_vars = self._row_factors + self._col_factors if self._row_weights is not None: assert self._col_weights is not None all_vars.extend(self._row_weights + self._col_weights) return tf.initialize_variables(all_vars)
def _run_init_test_vars_op(self): test_vars = tf.get_collection(bookkeeper.GraphKeys.TEST_VARIABLES) if test_vars: if test_vars != self._test_vars: self._test_vars = list(test_vars) self._test_var_init_op = tf.initialize_variables(test_vars) return self._test_var_init_op.run()
def __init__(self, placeholder, session, noise_std=1.0, name='NoisyInput'): super(NoisyInputLayer, self).__init__(placeholder, name) self._noise_std = noise_std self._session = session with self.name_scope(): self._predict = tf.Variable(noise_std, name='predict') self._session.run(tf.initialize_variables([self._predict]))
def fit(self, dataset, n_epochs = {1:10,3:10,5:10}, n_mini_batch = 1, learning_rate=0.01): """ learn parameters by performing n_epochs loop """ self.data = dataset user_placeholder, M_placeholder = placeholder_inputs() self.sess.run(tf.initialize_all_variables()) tot_epochs = 0 for k in n_epochs.keys(): print("perform CD",k) for epochs in range(n_epochs[k]): tot_epochs+=1 print("epochs",tot_epochs) user_batchs = chunks(range(self.num_users),n_mini_batch) b = 0 for batch in user_batchs: b+=1 print("batch :",b,"/",n_mini_batch) # the learning rate is divided by the batch-size # the last batch does not necesarilly have the same size as # the offer, so we have to init train_op here train_op = self.train(user_placeholder,M_placeholder,k,learning_rate/len(batch)) update_op = self.update_weight() # re-initialize the gradient self.sess.run(tf.initialize_variables([self.delta_w,self.delta_vb,self.delta_hb])) for u in batch: feed_dict = fill_feed_dict(self.data, u, user_placeholder, M_placeholder) # update the gradient self.sess.run(train_op, feed_dict = feed_dict) # update the weight for this mini-batch self.sess.run(update_op)
def reset_module(self, module): temp = set(tf.all_variables()) module.backward(module.loss) self.sess.run(tf.initialize_variables(set(tf.all_variables()) - temp))
def train_dnn(data_folder, model_file): # Output of dnn using input x y = DNN(x) print "Loading training pickles..." train_set = import_data.load_dataset(data_folder + '/train_data.pickle', data_folder + '/train_labels.pickle', context_frames=context_frames) # Create the dir for the model if not os.path.isdir('%s/models/%s'%(save_loc,start_date)): try: os.makedirs('%s/models/%s'%(save_loc,start_date)) except OSError: if not os.path.isdir('%s/models/%s'%(save_loc,start_date)): raise # Create the session global sess sess = tf.InteractiveSession() global summary_op global train_writer global saver saver = tf.train.Saver() # Op for merging all summaries summary_op = tf.merge_all_summaries() # Summary Writer train_writer = tf.train.SummaryWriter('%ssummaries/%s'%(save_loc, start_date), sess.graph) # Cost function cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_)) # Optimizer # For gradient descend, learning rate = 0.002 (see Hinton et al.) # For AdamOptimizer, learning rate = 0.0001 (better than default (exp 1.2)) if (optimizer_name == 'Adam'): # Hacky solution for always making sure that the beta2_power var # is always initialized temp = set(tf.all_variables()) optimizer = tf.train.AdamOptimizer(1e-4).minimize(cost) sess.run(tf.initialize_variables(set(tf.all_variables()) - temp)) else: optimizer = tf.train.GradientDescentOptimizer(0.02).minimize(cost) if model_file: saver.restore(sess, model_file) print "Model restored" else: # Initialization init_op = tf.initialize_all_variables() sess.run(init_op) print("Training network. Date: %s" % start_date) train(train_set, y, cost, optimizer) save_path = saver.save(sess, "%s/models/%s/model.ckpt"%(save_loc, start_date)) print("Model saved in file: %s" % save_path) print("Summaries written to summaries/%s" % start_date) evaluate_dnn(data_folder, y)
def __init__(self, session, optimizer_critic, optimizer_actor, critic_network, actor_network, gamma_lmbda, state_dim, num_actions, summary_writer=None, summary_every=5): self.session = session self.summary_writer = summary_writer self.optimizer_critic = optimizer_critic self.optimizer_actor = optimizer_actor self.actor_network = actor_network self.critic_network = critic_network self.state_dim = state_dim self.num_actions = num_actions self.gamma_lmbda = tf.constant(gamma_lmbda) # initialize the graph on tensorflow self.create_variables() var_lists = tf.get_collection(tf.GraphKeys.VARIABLES) self.session.run(tf.initialize_variables(var_lists)) # make sure the variables in graph are initialized self.session.run(tf.assert_variables_initialized()) if self.summary_writer is not None: self.summary_writer.add_graph(self.session.graph) self.summary_every = summary_every
def guarantee_initialized_variables(self, session, list_of_variables = None): if list_of_variables is None: list_of_variables = tf.all_variables() uninitialized_variables = list(tf.get_variable(name) for name in session.run(tf.report_uninitialized_variables(list_of_variables))) session.run(tf.initialize_variables(uninitialized_variables)) return uninitialized_variables
def testIntializeFromValue(self): with self.test_session() as sess: init = tf.constant(0.1) w = tf.get_variable("v", initializer=init) sess.run(tf.initialize_variables([w])) self.assertAllClose(w.eval(), 0.1) with self.assertRaisesRegexp(ValueError, "shape"): # We disallow explicit shape specification when initializer is constant. tf.get_variable("u", [1], initializer=init) with tf.variable_scope("foo", initializer=init): # Constant initializer can be passed through scopes if needed. v = tf.get_variable("v") sess.run(tf.initialize_variables([v])) self.assertAllClose(v.eval(), 0.1)
def restoreBaseReg(self,sess,restore): outfilename = os.path.join(self.conf.cachedir,self.conf.baseregoutname) traindatafilename = os.path.join(self.conf.cachedir,self.conf.baseregdataname) latest_ckpt = tf.train.get_checkpoint_state(self.conf.cachedir, latest_filename = self.conf.baseregckptname) if not latest_ckpt or not restore: self.baseregstartat = 0 self.baseregtrainData = {'train_err':[], 'val_err':[], 'step_no':[], 'train_dist':[], 'val_dist':[] } sess.run(tf.initialize_variables(PoseTools.get_vars('base'))) print("Not loading base variables. Initializing them") return False else: self.baseregsaver.restore(sess,latest_ckpt.model_checkpoint_path) matchObj = re.match(outfilename + '-(\d*)',latest_ckpt.model_checkpoint_path) self.baseregstartat = int(matchObj.group(1))+1 with open(traindatafilename,'rb') as tdfile: inData = pickle.load(tdfile) if not isinstance(inData,dict): self.baseregtrainData, loadconf = inData print('Parameters that dont match for base:') PoseTools.compare_conf(self.conf, loadconf) else: print("No config was stored for base. Not comparing conf") self.baseregtrainData = inData print("Loading base variables from %s"%latest_ckpt.model_checkpoint_path) return True
def main(_): """Load a word embedding.""" if not FLAGS.model_file or not FLAGS.vocab_file: print("--model_file --vocab_file and must be specified.") sys.exit(1) # get the word to id mapping word2id = {} with open(FLAGS.vocab_file, "r") as file: for i, line in enumerate(reader(file, delimiter=" ")): word2id[line[0]] = i # load word embeddings with tf.Graph().as_default(), tf.Session() as session: #with tf.device("/cpu:0"): w_in = tf.Variable(tf.zeros([len(word2id), FLAGS.embedding_size]), trainable=False, name="w_in") saver = tf.train.Saver({"w_in": w_in}) saver.restore(session, FLAGS.model_file) tensor = tf.concat(0, [w_in.value(), tf.zeros([2, FLAGS.embedding_size])]) embeddings = tf.Variable(tensor, trainable=True, name="embeddings") word_ids = tf.constant([[0, 1, 2], [3, 4, 71291]]) word_emb = tf.nn.embedding_lookup(embeddings, word_ids) #word_emb = tf.Print(word_emb, [word_emb[0]]) init = tf.initialize_variables([embeddings]) session.run(init) word_emb = session.run(word_emb) print word_emb
what do I need to do: - take charid (not 1-hot vector) => create new 'bigram' class which uses BatchGenerator to create batches of charids - graph requirement: 1 TF train_input placeholder, has shape = [ batch_size * (num_unrollings + 1) ] ''' _, l, predictions, lr = session.run( [optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict) # LSTM...tbc... # initialize all other variables is_lstm = lambda x: x.name.startswith("lstm_scope") tf.initialize_variables(filter(is_lstm, tf.all_variables())).run() # # train lstm print('Initialized') mean_loss = 0 for step in range(num_steps): batches = train_batches.next() feed_dict = dict() for i in range(num_unrollings + 1): feed_dict[train_data[i]] = batches[i] '''TEST - CHECK TENSOR SHAPES AND TYPES''' if step == 0: print('step==0')
rnn_classification_loss = weighted_sum_cross_entropy(z_logits, z) tf.summary.scalar('rnn_classification_loss', rnn_classification_loss) tf.summary.scalar('l2_z', l2) total_loss = FLAGS.w_c*rnn_classification_loss + FLAGS.w_i*inverse_loss + FLAGS.w_l2*l2 tf.summary.scalar('loss', total_loss) rnn_acc = match_all(z_, z) tf.summary.scalar('rnn_acc', rnn_acc) learning_rate = 0.0001 with tf.variable_scope('optimizer'): optimizer= minimize(total_loss, { 'learning rate' : learning_rate}, algo='adam') sess.run(tf.initialize_variables(list(set(tf.all_variables()) - set(prev_vars)) )) train_writer = tf.summary.FileWriter('cifar10_rnn/train', graph=sess.graph) test_writer = tf.summary.FileWriter('cifar10_rnn/test') summary_op = tf.summary.merge_all() n_epoch = 1000 n_batch = 200 n_display = 10000 saver2 = tf.train.Saver()
def run(self, logdir=None, variables=None, use_coordinator=True, *args, **kwargs): """A simple wrapper to run inference. 1. Initialize algorithm via ``initialize``. 2. (Optional) Build a ``tf.train.SummaryWriter`` for TensorBoard. 3. (Optional) Initialize TensorFlow variables. 4. (Optional) Start queue runners. 5. Run ``update`` for ``self.n_iter`` iterations. 6. While running, ``print_progress``. 7. Finalize algorithm via ``finalize``. 8. (Optional) Stop queue runners. To customize the way inference is run, run these steps individually. Parameters ---------- logdir : str, optional Directory where event file will be written. For details, see `tf.train.SummaryWriter`. Default is to write nothing. variables : list, optional A list of TensorFlow variables to initialize during inference. Default is to initialize all variables (this includes reinitializing variables that were already initialized). To avoid initializing any variables, pass in an empty list. use_coordinator : bool, optional Whether to start and stop queue runners during inference using a TensorFlow coordinator. For example, queue runners are necessary for batch training with the ``n_minibatch`` argument or with file readers. *args Passed into ``initialize``. **kwargs Passed into ``initialize``. """ self.initialize(*args, **kwargs) if logdir is not None: self.train_writer = tf.train.SummaryWriter(logdir, tf.get_default_graph()) if variables is None: init = tf.initialize_all_variables() else: init = tf.initialize_variables(variables) # Feed placeholders in case initialization depends on them. feed_dict = {} for key, value in six.iteritems(self.data): if isinstance(key, tf.Tensor): feed_dict[key] = value init.run(feed_dict) if use_coordinator: # Start input enqueue threads. self.coord = tf.train.Coordinator() self.threads = tf.train.start_queue_runners(coord=self.coord) for _ in range(self.n_iter): info_dict = self.update() self.print_progress(info_dict) self.finalize() if use_coordinator: # Ask threads to stop. self.coord.request_stop() self.coord.join(self.threads)
def run(args, server, renderOnly=False): env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, renderOnly=renderOnly) trainer = A3C(env, args.task, args.visualise, renderOnly=renderOnly) # Variable names that start with "local" are not saved in checkpoints. if use_tf12_api: variables_to_save = [v for v in tf.global_variables() if not v.name.startswith("local")] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() else: variables_to_save = [v for v in tf.all_variables() if not v.name.startswith("local")] init_op = tf.initialize_variables(variables_to_save) init_all_op = tf.initialize_all_variables() saver = FastSaver(variables_to_save) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) else: summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task) logger.info("Events directory: %s_%s", logdir, args.task) sv = tf.train.Supervisor(is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = 100000000 logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified.") with sv.managed_session(server.target, config=config) as sess, sess.as_default(): sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("Starting training at step=%d", global_step) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): #logger.info("About to process") trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_biasCNN.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) dataset_val = dataset_biasCNN.get_dataset( FLAGS.dataset_name, 'validation', FLAGS.dataset_dir) ###################### # Select the network # ###################### if FLAGS.weights_initializer is None: weights_initializer = None # default value will be defined in argscope, it is xavier_initializer elif FLAGS.weights_initializer=='zeros': weights_initializer = tf.zeros_initializer() elif FLAGS.weights_initializer=='ones': weights_initializer = tf.ones_initializer() elif FLAGS.weights_initializer=='trunc_normal': weights_initializer = tf.truncated_normal_initializer() elif FLAGS.weights_initializer=='xavier': weights_initializer = initializers.xavier_initializer() elif FLAGS.weights_initializer=='var_scaling': weights_initializer = initializers.variance_scaling_initializer() else: raise ValueError('weight initializer not found') if FLAGS.biases_initializer is None: biases_initializer = None # default value will be defined in argscope, it is zeros_initializer elif biases_initializer=='zeros': biases_initializer = tf.zeros_initializer() elif FLAGS.biases_initializer=='ones': biases_initializer = tf.ones_initializer() elif FLAGS.biases_initializer=='trunc_normal': biases_initializer = tf.truncated_normal_initializer() elif FLAGS.biases_initializer=='xavier': biases_initializer = initializers.xavier_initializer() elif FLAGS.biases_initializer=='var_scaling': biases_initializer = initializers.variance_scaling_initializer() else: raise ValueError('biases initializer not found') network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, weights_initializer=weights_initializer, biases_initializer=biases_initializer, is_training=True) network_fn_val = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weights_initializer=weights_initializer, biases_initializer=biases_initializer, is_training=False) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_biasCNN.get_preprocessing( preprocessing_name, is_training=True, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, is_windowed = FLAGS.is_windowed) image_preprocessing_fn_val = preprocessing_biasCNN.get_preprocessing( preprocessing_name, is_training=False, flipLR = FLAGS.flipLR, random_scale = FLAGS.random_scale, is_windowed=FLAGS.is_windowed) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) ############################################ # Create a provider for the validation set # ############################################ provider_val = slim.dataset_data_provider.DatasetDataProvider( dataset_val, shuffle=True, common_queue_capacity=2 * FLAGS.batch_size_val, common_queue_min=FLAGS.batch_size_val) [image_val, label_val] = provider_val.get(['image', 'label']) label_val -= FLAGS.labels_offset eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image_val = image_preprocessing_fn_val(image_val, eval_image_size, eval_image_size) images_val, labels_val = tf.train.batch( [image_val, label_val], batch_size=FLAGS.batch_size_val, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size_val) labels_val_onehot = slim.one_hot_encoding( labels_val, dataset.num_classes - FLAGS.labels_offset) ############################### # Define the model (training) # ############################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() with tf.variable_scope('my_scope'): logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') tf.losses.softmax_cross_entropy( labels, logits, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] # adding in a picture of the activations at each layer, this is a good way to double check that the rotated images look rotated to our eyes if 'conv' in end_point: dims = x.get_shape() for ii in range(5): summaries.add(tf.summary.image('image_out/' + end_point + '/image_' + str(ii), tf.slice(x,[ii,0,0,0],[1,dims[1],dims[2],1]))) summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None if FLAGS.quantize_delay >= 0: tf.contrib.quantize.create_training_graph( quant_delay=FLAGS.quantize_delay) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) ################################# # Define the model (validation) # ################################# # get the validation set logits (predictions) with tf.variable_scope('my_scope',reuse=True): logits_val, _ = network_fn_val(images_val) predictions_val = tf.argmax(logits_val, 1) # Define loss on validation set, add a summary tf.losses.softmax_cross_entropy( labels_val_onehot, logits_val, label_smoothing=FLAGS.label_smoothing, weights=1.0, loss_collection = 'eval_losses') for loss in tf.get_collection('eval_losses'): summaries.add(tf.summary.scalar('eval_losses/%s' % loss.op.name, loss)) # Define the validation set metrics: # Will define each metric twice as separate operation. # One set will be made resettable, the other set will be streaming. with tf.name_scope('eval_metrics'): eval_acc_value, eval_acc_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) eval_recall_5_value, eval_recall_5_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) # add these variables as summaries for tensorboard summaries.add(tf.summary.scalar('eval_recall_5', eval_recall_5_value)) summaries.add(tf.summary.scalar('eval_acc', eval_acc_value)) with tf.name_scope('eval_metrics_streaming'): eval_acc_streaming_value, eval_acc_streaming_op = tf.metrics.accuracy(predictions=predictions_val,labels=labels_val) eval_recall_5_streaming_value, eval_recall_5_streaming_op = slim.metrics.streaming_recall_at_k(predictions=logits_val, labels=labels_val,k=5) # add these variables as summaries for tensorboard summaries.add(tf.summary.scalar('eval_recall_5_streaming', eval_recall_5_streaming_value)) summaries.add(tf.summary.scalar('eval_acc_streaming', eval_acc_streaming_value)) # also add summaries of all the local variables used to compute the eval metrics... for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_metrics'): summaries.add(tf.summary.scalar('%s' % metric.op.name, metric)) for metric in tf.get_collection(tf.GraphKeys.METRIC_VARIABLES, 'eval_streaming_metrics'): summaries.add(tf.summary.scalar('%s' % metric.op.name, metric)) # gather up all the variables that are used to compute eval metrics stream_vars = [i for i in tf.local_variables() if i.name.split('/')[0]=='eval_metrics'] # make an operation that'll let us re-initialize just these vars. reset_op = tf.initialize_variables(stream_vars) # make an operation that'll let us run evaluation (all metrics) eval_op = list([eval_acc_op, eval_recall_5_op, eval_acc_streaming_op, eval_recall_5_streaming_op]) # Gather validation summaries summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Merge all summaries together (this includes training summaries too). summary_op = tf.summary.merge(list(summaries), name='summary_op') # Create a non-default saver so we don't delete all the old checkpoints. my_saver = tf_saver.Saver(max_to_keep=FLAGS.max_checkpoints_to_keep, keep_checkpoint_every_n_hours=FLAGS.keep_checkpoint_every_n_hours,) # Create a non-default dictionary of options for train_step_fn # This is a hack that lets us pass everything we need to run evaluation, into the training loop function with ops.name_scope('train_step'): train_step_kwargs = {} if FLAGS.max_number_of_steps: should_stop_op = math_ops.greater_equal(global_step, FLAGS.max_number_of_steps) else: should_stop_op = constant_op.constant(False) train_step_kwargs['should_stop'] = should_stop_op if FLAGS.log_every_n_steps > 0: train_step_kwargs['should_log'] = math_ops.equal( math_ops.mod(global_step, FLAGS.log_every_n_steps), 0) train_step_kwargs['should_val'] = math_ops.equal( math_ops.mod(global_step, FLAGS.val_every_n_steps),0) train_step_kwargs['should_reset_eval_metrics'] = math_ops.equal( math_ops.mod(global_step, tf.to_int64(math_ops.multiply(FLAGS.reset_eval_metrics_every_n_vals, FLAGS.val_every_n_steps))),0) train_step_kwargs['eval_op'] = eval_op train_step_kwargs['reset_op'] = reset_op ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None, saver=my_saver, train_step_fn=learning_biasCNN.train_step_fn, train_step_kwargs = train_step_kwargs)
# 将输出的信号转化为概率值 prediction = tf.nn.softmax(wx_plus_b) # 原先采用二次代价函数 with tf.name_scope('loss'): loss = tf.reduce_mean(tf.square(y - prediction)) tf.summary.scalar('loss', loss) # 使用梯度梯度下降法 with tf.name_scope('train'): train_step = tf.train.GradientDescentOptimizer(0.2).minimize(loss) # 初始化变量 if int((tf.__version__).split('.')[1]) < 12 and int( (tf.__version__).split('.')[0]) < 1: init = tf.initialize_variables() else: init = tf.global_variables_initializer() # 结果存放到一个布尔类型的列表中,生成1*100的布尔矩阵 # argmax返回一维张量中最大的值所在的位置 with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1)) with tf.name_scope('accuracy'): # 求准确率,现将布尔类型矩阵转换为浮点类型矩阵 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # 合并所有的summary
def test_all_initialized(self): with self.test_session() as sess: x = tf.Variable(tf.zeros([])) sess.run(tf.initialize_variables([x])) self.assertEqual([], tdc._init_uninitialized(sess))
def build_graph(cluster, image_url, return_list): prob_list = return_list num_workers = cluster.num_tasks('worker') # default picture for testing if image_url == None: image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/7/7e/Bow_bow.jpg/800px-Bow_bow.jpg" image_string = urllib.urlopen(image_url).read() #image_string = tf.read_file("/home/philiptkd/Downloads/Dependency_Tree.png") # I lost internet image_size = inception.inception_v1_dist.default_image_size # shared done list, ready list, and image with tf.device("/job:ps/task:0"): done_list = tf.get_variable("done_list", [num_workers+1], tf.int32, tf.zeros_initializer) ready_list = tf.get_variable("ready_list", [num_workers], tf.int32, tf.zeros_initializer) with tf.device("/job:worker/task:0"): # image image = tf.image.decode_jpeg(image_string, channels=3) processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) shared_image = tf.Variable(processed_images, name="shared_image") #download the inception v1 checkpoint if we need to url = "http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz" checkpoints_dir = '/tmp/checkpoints' if not tf.gfile.Exists(checkpoints_dir): tf.gfile.MakeDirs(checkpoints_dir) if not tf.gfile.Exists(checkpoints_dir+'/inception_v1_2016_08_28.tar.gz'): dataset_utils.download_and_uncompress_tarball(url, checkpoints_dir) # end download server = tf.train.Server(cluster, job_name="ps", task_index=0) sess = tf.Session(target=server.target) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception.inception_v1_dist_arg_scope()): logits, _ = inception.inception_v1_dist(shared_image, num_workers, num_classes=1001, is_training=False, reuse=tf.AUTO_REUSE) probabilities = tf.nn.softmax(logits) # initialization function that uses saved parameters init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) sess.run(tf.initialize_variables([done_list, ready_list, shared_image])) # initialize variables that aren't model parameters init_fn(sess) # wait for workers to acknowledge variables have been initialized while sess.run(tf.reduce_sum(ready_list)) < num_workers: pass # do the thing print("before getting probs") run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() np_image, probabilities = sess.run([shared_image, probabilities], options=run_options, run_metadata=run_metadata) print("after getting probs") # see who did what for device in run_metadata.step_stats.dev_stats: print(device.device) for node in device.node_stats: print(" ", node.node_name) # indicate that the ps task is done sess.run(tf.scatter_update(done_list, [0], 1)) # wait until all tasks are done num_done = 1 while num_done < num_workers+1: num_done = sess.run(tf.reduce_sum(done_list)) sess.close() probabilities = probabilities[0, 0:] sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])] names = imagenet.create_readable_names_for_imagenet_labels() for i in range(5): index = sorted_inds[i] probability = 'Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index]) prob_list.append(probability) print(probability)
def run(args, server): env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes) if args.teacher: teacher = model.LSTMPolicy(env.observation_space.shape, env.action_space.n, name="global") teacher_init_op = teacher.load_model_from_checkpoint( args.checkpoint_path) trainer = A3C(env, args.task, args.visualise, teacher=teacher, name="student") else: teacher = None trainer = A3C(env, args.task, args.visualise, teacher=teacher) # Variable names that start with "local" are not saved in checkpoints. if use_tf12_api: variables_to_save = trainer.global_var_list all_trainable_variables = [ v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if trainer.scope in v.name ] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.variables_initializer(all_student_variables) else: variables_to_save = trainer.global_var_list init_op = tf.initialize_variables(variables_to_save) all_trainable_variables = [ v for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) if trainer.scope in v.name ] init_all_op = tf.variables_initializer(all_student_variables) saver = FastSaver(variables_to_save) logger.info('Trainable vars:') for v in all_trainable_variables: logger.info('{} {}'.format(v.name, v.get_shape())) def init_fn(ses): logger.info("Initializing all parameters.") ses.run([init_all_op]) def get_init_fn(): if args.teacher: return tf.contrib.framework.assign_from_checkpoint_fn( args.checkpoint_path, teacher.var_list, ignore_missing_vars=True) else: return lambda sess: init_fn(sess) config = tf.ConfigProto(device_filters=[ "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task) ]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_{}".format(args.task)) else: summary_writer = tf.train.SummaryWriter(logdir + "_'{}".format(args.task)) logger.info("Events directory: {}_{}".format(logdir, args.task)) sv = tf.train.Supervisor( is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=get_init_fn(), summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = 100000000 logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified." ) with sv.managed_session(server.target, config=config) as sess, sess.as_default(): sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("Starting training at step={}".format(global_step)) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached {} steps. worker stopped.'.format(global_step))
def train(sess, repModel, evalModel, permModel, save_dir, dataset, oracle, freeze, min_feats="1", max_feats="10", inner_batch_size=5, inner_iters=20, learning_rate=0.0001, meta_step_size=0.1, meta_batch_size=1, meta_iters=15001, reptile_fn=Reptile, perm_epochs=501, perm_lr=0.00001, feature_split=0, name="Model", name_affix="", save_path="exp1", log_fn=id_print, job_id=0): if not os.path.exists(save_dir): os.mkdir(save_dir) if saving_raw: raw_path = "./results/" + save_path + "/" + name if not os.path.exists(raw_path): os.system(f"mkdir -p {raw_path}") if not os.path.exists(os.path.join(raw_path, name_affix)): os.mkdir(os.path.join(raw_path, name_affix)) # Save scratch model vars, so they can be reset during training evalVars = [] for v in tf.trainable_variables(): if "EvalMod" in v.name: evalVars.append(v) # init reptile process reptile = reptile_fn(sess) saver = tf.train.Saver() # Loading Data start = time.time() train_gen = dataset.generate(dataset.totalLabels, ast.literal_eval(min_feats), ast.literal_eval(max_feats), inner_batch_size, inner_batch_size, meta_batch_size, test=False, oracle=oracle) if oracle: test_data = dataset.test_data_oracle else: test_data = dataset.test_data log_fn( job_id, f"------------------Finished loading data in {time.time()-start}--------------------------" ) log_fn( job_id, f"Training data shape: X{dataset.train_x.shape} y{dataset.train_y.shape}" ) log_fn(job_id, f"Test data shape: X{dataset.val_x.shape} y{dataset.val_y.shape}") log_fn(job_id, "") # Declare the tensorflow graph # Each component has an optimizer and update ops if permModel is not None: # Optimizer here used for pretraining perm_opt = tf.train.AdamOptimizer(learning_rate=perm_lr) perm_gradients, perm_variables = zip( *perm_opt.compute_gradients(permModel.loss)) perm_train_op = perm_opt.apply_gradients( zip(perm_gradients, perm_variables)) if not freeze: rep_opt = tf.train.AdamOptimizer(learning_rate=learning_rate) rep_gradients, rep_variables = zip( *rep_opt.compute_gradients(repModel.totalLoss)) rep_train_op = rep_opt.apply_gradients( zip(rep_gradients, rep_variables)) repModel.setTrainOP(rep_train_op) else: rep_opt = tf.train.AdamOptimizer(learning_rate=learning_rate) rep_train_op = rep_opt.minimize( repModel.totalLoss, var_list=[ var for var in tf.trainable_variables() if "Perm" not in var.name and "RepMod" in var.name ]) repModel.setTrainOP(rep_train_op) if evalModel is not None: eval_opt = tf.train.AdamOptimizer(learning_rate=learning_rate) eval_gradients, eval_variables = zip( *eval_opt.compute_gradients(evalModel.totalLoss)) eval_train_op = eval_opt.apply_gradients( zip(eval_gradients, eval_variables)) evalModel.setTrainOP(eval_train_op) #Initializing variables sess.run(tf.initializers.global_variables()) if evalModel is not None: init_eval_vars = tf.initialize_variables(evalVars) #Pre-train permutation model if permModel is not None: log_fn(job_id, "") log_fn( job_id, "-------------------------------Training Chameleon network:-------------------------------" ) start = time.time() # Sample a random task for pre training from meta data set "data" def sampleTask(data, minF, maxF, maxK, meta_batch=1): meta_x = [] meta_y = [] num_feat = np.random.randint(minF, maxF + 1) for b in range(meta_batch): features = np.random.choice(range(len(data[0])), num_feat, replace=False) out = np.transpose(np.array([data[:, i] for i in features])) order = np.eye(maxK)[features.astype(int)] meta_x.append(out) meta_y.append(order) return meta_x, meta_y trainLoss_buffer = [] train_losses = [] train_indexes = [] for epoch in range(1, perm_epochs + 1): X_pre = dataset.train_x[:, dataset.train_f] np.random.shuffle(X_pre) loss_per_epoch = [] val_loss_per_epoch = [] val_acc_per_epoch = [] for minibatch in range( int(len(X_pre) / (inner_batch_size * dataset.totalLabels))): # CUrrently only mb = 1 X = X_pre[(inner_batch_size * dataset.totalLabels) * minibatch:(inner_batch_size * dataset.totalLabels) * minibatch + (inner_batch_size * dataset.totalLabels)] X_perm, order = sampleTask(X, len(X[0]), len(X[0]), len(dataset.train_f), 32) loss, _, out = sess.run( [permModel.loss, perm_train_op, permModel.out], feed_dict={ permModel.task: X_perm, permModel.label: order }) #, permModel.train_mode:True}) loss_per_epoch.append(loss) trainLoss_buffer.append(np.mean(loss_per_epoch)) train_losses.append(np.mean(loss_per_epoch)) train_indexes.append(epoch) if epoch % 50 == 0: log_fn( job_id, f"Epoch {epoch}: Permutation loss: {np.mean(trainLoss_buffer):.3f}" ) trainLoss_buffer = [] if perm_epochs != 0 and not freeze: if saving_plots: savePlot(train_losses, "Plots", dataset.path.split("/")[-1], "Permutation", "Chameleon", xticks=train_indexes, xaxis="Meta Epochs", yaxis="Loss", run=name) if saving_raw: np.save( os.path.join(raw_path, name_affix) + "/perm_loss.npy", np.array([train_indexes, train_losses])) log_fn(job_id, f"Finished pre-training in {time.time()-start:.2f}s") log_fn(job_id, "") if evalModel is not None: log_fn( job_id, "-------------------------------Evaluating Test Data with Scratch Training:---------------" ) reptile.testScratch(evalModel, inner_iters, init_eval_vars, data=test_data, train_f=len(dataset.train_f)) log_fn( job_id, f"Scratch Evaluation: -- Test Loss {reptile.scratch_loss} -- Test Acc {reptile.scratch_acc}" ) log_fn(job_id, "") if saving_plots: savePlot([reptile.scratch_loss, reptile.scratch_loss], "Plots", dataset.path.split("/")[-1], "Final_Metatest_ValLoss", "Scratch", xticks=[0, meta_iters], xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot([reptile.scratch_acc, reptile.scratch_acc], "Plots", dataset.path.split("/")[-1], "Final_Metatest_ValAcc", "Scratch", xticks=[0, meta_iters], xaxis="Meta Epochs", yaxis="Accuracy", run=name) if saving_raw: np.save( os.path.join(raw_path, name_affix) + "/Scratch_Metatest_ValLoss.npy", np.array([reptile.scratch_loss, reptile.scratch_loss])) np.save( os.path.join(raw_path, name_affix) + "/Scratch_Metatest_ValAcc.npy", np.array([reptile.scratch_acc, reptile.scratch_acc])) if permModel is not None: log_fn( job_id, "-------------------------------Training Chameleon and Base Model with reptile:-----------" ) else: log_fn( job_id, "--------------------------------------Training Base Model with reptile:------------------" ) # Evaluate the initialized model val_final = [] val_index = [] train_final = [] train_index = [] train_buffer = [] full_start = time.time() start = time.time() if oracle: # make sure oracle is padded to testfeats t_f = dataset.totalFeatures else: t_f = len(dataset.train_f) reptile.evaluate(repModel=repModel, permModel=permModel, inner_iters=inner_iters, data=test_data, train_f=t_f, exp_name=dataset.path.split("/")[-1], meta_epoch=0, name=name) log_fn( job_id, f"Val Epoch {0}: Initial Train Loss: {reptile.eval_train_losses[0]:.2f} -- Final Train Loss: {reptile.eval_train_losses[-1]:.2f} -- Val Loss: {reptile.eval_test_loss:.2f} -- Val Acc: {reptile.eval_test_acc:.2f} in {time.time()-start:.2f}s" ) log_fn(job_id, "") val_final.append([ reptile.eval_train_losses[0], reptile.eval_train_losses[-1], reptile.eval_test_loss, reptile.eval_test_acc ]) # Perform reptile joint training on the model for meta_epoch in range(1, meta_iters + 1): start = time.time() # Perform one train step reptile.train_step(repModel=repModel, permModel=permModel, inner_iters=inner_iters, meta_step_size=meta_step_size, data=train_gen, train_f=t_f, exp_name=dataset.path.split("/")[-1], meta_epoch=meta_epoch, name=name) train_final.append([ reptile.run_train_losses[0], reptile.run_train_losses[-1], reptile.run_test_loss_before, reptile.run_test_loss_after ]) train_index.append(meta_epoch) train_buffer.append([ reptile.run_train_losses[0], reptile.run_train_losses[-1], reptile.run_test_loss_before, reptile.run_test_loss_after ]) # log_fn Train Step if meta_epoch % 100 == 0: train_buffer = np.mean(train_buffer, axis=0) log_fn( job_id, f"Train Epoch {meta_epoch}: Initial Train Loss: {train_buffer[0]:.2f} -- Final Train Loss: {train_buffer[1]:.2f} -- Initial Val Loss: {train_buffer[2]:.2f} -- Final Val Loss: {train_buffer[3]:.2f}" ) train_buffer = [] # Validates performance on test data if meta_epoch % 100 == 0: reptile.evaluate(repModel=repModel, permModel=permModel, inner_iters=inner_iters, data=test_data, train_f=t_f, exp_name=dataset.path.split("/")[-1], meta_epoch=meta_epoch, name=name) val_final.append([ reptile.eval_train_losses[0], reptile.eval_train_losses[-1], reptile.eval_test_loss, reptile.eval_test_acc ]) val_index.append(meta_epoch) log_fn( job_id, f"Val Epoch {0}: Initial Train Loss: {reptile.eval_train_losses[0]:.2f} -- Final Train Loss: {reptile.eval_train_losses[-1]:.2f} -- Val Loss: {reptile.eval_test_loss:.2f} -- Val Acc: {reptile.eval_test_acc:.2f} in {time.time()-start:.2f}s" ) log_fn(job_id, f"Finished joint training in {time.time()-full_start}s") log_fn(job_id, "") if permModel is not None: permName = "Chameleon+Reptile" else: permName = "Reptile" if freeze: permName += "_Frozen" if perm_epochs == 0: permName += "_Untrained" log_fn(job_id, "Final Shape", np.array(train_final).shape) log_fn(job_id, "Final Shape", np.array(val_final).shape) log_fn(job_id, val_index) if saving_plots: savePlot(np.array(train_final)[:, 0], "Plots", dataset.path.split("/")[-1], "Initial_Metatrain_Loss", permName, xticks=train_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(train_final)[:, 1], "Plots", dataset.path.split("/")[-1], "Final_Metatrain_Loss", permName, xticks=train_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(train_final)[:, 2], "Plots", dataset.path.split("/")[-1], "Initial_Metatrain_ValLoss", permName, xticks=train_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(train_final)[:, 3], "Plots", dataset.path.split("/")[-1], "Final_Metatrain_ValLoss", permName, xticks=train_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(val_final)[:, 0], "Plots", dataset.path.split("/")[-1], "Initial_Metatest_Loss", permName, xticks=val_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(val_final)[:, 1], "Plots", dataset.path.split("/")[-1], "Final_Metatest_Loss", permName, xticks=val_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(val_final)[:, 2], "Plots", dataset.path.split("/")[-1], "Final_Metatest_ValLoss", permName, xticks=val_index, xaxis="Meta Epochs", yaxis="Loss", run=name) savePlot(np.array(val_final)[:, 3], "Plots", dataset.path.split("/")[-1], "Final_Metatest_ValAcc", permName, xticks=val_index, xaxis="Meta Epochs", yaxis="Accuracy", run=name) if saving_raw: np.save( os.path.join(raw_path, name_affix) + "/TrainIndexes.npy", train_index) np.save( os.path.join(raw_path, name_affix) + "/Initial_Metatrain_Loss.npy", np.array(train_final)[:, 0]) np.save( os.path.join(raw_path, name_affix) + "/Final_Metatrain_Loss.npy", np.array(train_final)[:, 1]) np.save( os.path.join(raw_path, name_affix) + "/Initial_Metatrain_ValLoss.npy", np.array(train_final)[:, 2], ) np.save( os.path.join(raw_path, name_affix) + "/Final_Metatrain_ValLoss.npy", np.array(train_final)[:, 3]) np.save( os.path.join(raw_path, name_affix) + "/ValIndexes.npy", val_index) np.save( os.path.join(raw_path, name_affix) + "/Initial_Metatest_Loss.npy", np.array(val_final)[:, 0]) np.save( os.path.join(raw_path, name_affix) + "/Final_Metatest_Loss.npy", np.array(val_final)[:, 1]) np.save( os.path.join(raw_path, name_affix) + "/Final_Metatest_ValLoss.npy", np.array(val_final)[:, 2]) np.save( os.path.join(raw_path, name_affix) + "/Final_Metatest_ValAcc.npy", np.array(val_final)[:, 3])
# Helper ops nonzero_indicator1 = tf.to_float(tf.not_equal(W_fc1, tf.zeros_like(W_fc1))) nonzero_indicator2 = tf.to_float(tf.not_equal(W_fc2, tf.zeros_like(W_fc2))) nonzero_indicator3 = tf.to_float(tf.not_equal(W_fc3, tf.zeros_like(W_fc3))) count_parameters1 = tf.reduce_sum(nonzero_indicator1) count_parameters2 = tf.reduce_sum(nonzero_indicator2) count_parameters3 = tf.reduce_sum(nonzero_indicator3) # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Run training in a session sess = tf.Session() sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_variables(tf.get_collection( tf.GraphKeys.PRUNING_MASKS))) def print_mask_parameter_counts(): print("# Mask Parameter Counts") print(" - Mask1: {0}".format( sess.run( tf.reduce_sum( tf.to_float( tf.not_equal(indicator_matrix1, tf.zeros_like(indicator_matrix1))))))) print(" - Mask2: {0}".format( sess.run( tf.reduce_sum( tf.to_float( tf.not_equal(indicator_matrix2,
def reinit(self): init = tf.initialize_variables(tf.trainable_variables()) self.sess.run(init)
def train(self, config): global_step1 = tf.Variable(0, name='global_step_train1', trainable=False) global_step2 = tf.Variable(0, name='global_step_train2', trainable=False) global_step3 = tf.Variable(0, name='global_step_train3', trainable=False) train_optim_ver = tf.train.AdamOptimizer( config.learning_rate).minimize(self.loss_fine_ver, global_step=global_step1) train_optim_hor = tf.train.AdamOptimizer( config.learning_rate).minimize(self.loss_fine_hor, global_step=global_step2) train_optim_views = tf.train.AdamOptimizer( config.learning_rate).minimize(self.loss_fine_views, global_step=global_step3) t_vars = tf.trainable_variables() self.var_list1 = [var for var in t_vars if 'first_two_' in var.name] self.var_list2 = [var for var in t_vars if 'last' in var.name] self.var_list3 = [var for var in t_vars if 'ver' in var.name] self.var_list4 = [var for var in t_vars if 'hor' in var.name] self.var_list5 = [var for var in t_vars if 'views' in var.name] self.var_list6 = [var for var in t_vars if 'shared_' in var.name] self.var_list7 = [var for var in t_vars if 'shread_' in var.name] tf.initialize_all_variables().run() if config.is_finetune: # Initialize Spanet and load pretrained network tmp = self.var_list1 + self.var_list2 self.saver = tf.train.Saver(var_list=tmp, max_to_keep=1) tf.initialize_variables(tmp).run() #load trained network if self.loadnet(self.checkpoint_dir, 'spaSR'): #Load Spatial SR network print('Load pretrained spatial network') else: print(' Load Fail!!') tmp = self.var_list3 + self.var_list4 + self.var_list5 + self.var_list6 + self.var_list7 self.saver = tf.train.Saver(var_list=tmp, max_to_keep=1) tf.initialize_variables(tmp).run() if self.loadnet(self.checkpoint_dir, 'allviews'): #Load Spatial SR network print('Load pretrained angular network') else: print(' Load Fail!!') self.saver = tf.train.Saver(max_to_keep=1) else: self.saver = tf.train.Saver(max_to_keep=1) if self.loadnet(self.checkpoint_dir, 'finetune'): #Load Spatial SR network print('Load pretrained angular network') else: print(' Load Fail!!') train_ver_input, train_hor_input, train_views_input, train_ver_sr_gt, train_hor_sr_gt, train_views_sr_gt, train_ver_ang_gt, train_hor_ang_gt, train_views_ang_gt = load_traindata( ) [ val_ver_input, val_hor_input, val_views_input, val_ver_sr_gt, val_hor_sr_gt, val_views_sr_gt, val_ver_ang_gt, val_hor_ang_gt, val_views_ang_gt ] = load_valdata() batch_idxs_views = train_views_input.shape[-1] / self.batch_size val_batch_idxs_views = val_views_input.shape[-1] / self.batch_size for epoch in xrange(config.epochs): rand_idx_ver = np.random.permutation( range(train_ver_input.shape[-1])) rand_idx_hor = np.random.permutation( range(train_hor_input.shape[-1])) rand_idx_views = np.random.permutation( range(train_views_input.shape[-1])) val_rand_idx_ver = np.random.permutation( range(val_ver_input.shape[-1])) val_rand_idx_hor = np.random.permutation( range(val_hor_input.shape[-1])) val_rand_idx_views = np.random.permutation( range(val_views_input.shape[-1])) train_spa_MSE = 0.0 train_ang_MSE = 0.0 train_total_MSE = 0.0 val_spa_MSE = 0.0 val_ang_MSE = 0.0 val_total_MSE = 0.0 for idx in xrange(0, batch_idxs_views): if epoch == 0: f_train_epoch = open( os.path.join("logs", self.date, 'train_epoch.log'), 'w') f_val = open(os.path.join("logs", self.date, 'val.log'), 'w') else: f_train_epoch = open( os.path.join("logs", self.date, 'train_epoch.log'), 'aw') f_val = open(os.path.join("logs", self.date, 'val.log'), 'aw') randview = np.random.permutation(range(3)) for view in randview: if view == 0: batch_files = rand_idx_ver[idx * config.batch_size:(idx + 1) * config.batch_size] batches = [ get_image(train_ver_input[0, batch], train_ver_sr_gt[0, batch], train_ver_ang_gt[0, batch], self.image_wid) for batch in batch_files ] batches = np.array(batches).astype(np.float32) input1 = batches[:, :, :, 0] input1 = np.expand_dims(input1, axis=-1) input2 = batches[:, :, :, 1] input2 = np.expand_dims(input2, axis=-1) spa_gt1 = batches[:, :, :, 2] spa_gt1 = np.expand_dims(spa_gt1, axis=-1) spa_gt2 = batches[:, :, :, 3] spa_gt2 = np.expand_dims(spa_gt2, axis=-1) ang_gt = batches[:, :, :, 4] ang_gt = np.expand_dims(ang_gt, axis=-1) _, total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run( [ train_optim_ver, self.loss_fine_ver, self.loss_spa1, self.loss_spa2, self.loss_ver ], feed_dict={ self.train_input1: input1, self.train_input2: input2, self.train_spa_gt1: spa_gt1, self.train_spa_gt2: spa_gt2, self.train_ang_gt: ang_gt }) self.count += 1 train_ang_MSE += ang_MSE train_total_MSE += total_MSE train_spa_MSE = (spa1_MSE + spa2_MSE) / 2. + train_spa_MSE elif view == 1: batch_files = rand_idx_hor[idx * config.batch_size:(idx + 1) * config.batch_size] batches = [ get_image(train_hor_input[0, batch], train_hor_sr_gt[0, batch], train_hor_ang_gt[0, batch], self.image_wid) for batch in batch_files ] batches = np.array(batches).astype(np.float32) input1 = batches[:, :, :, 0] input1 = np.expand_dims(input1, axis=-1) input2 = batches[:, :, :, 1] input2 = np.expand_dims(input2, axis=-1) spa_gt1 = batches[:, :, :, 2] spa_gt1 = np.expand_dims(spa_gt1, axis=-1) spa_gt2 = batches[:, :, :, 3] spa_gt2 = np.expand_dims(spa_gt2, axis=-1) ang_gt = batches[:, :, :, -1] ang_gt = np.expand_dims(ang_gt, axis=-1) _, total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run( [ train_optim_hor, self.loss_fine_hor, self.loss_spa1, self.loss_spa2, self.loss_hor ], feed_dict={ self.train_input1: input1, self.train_input2: input2, self.train_spa_gt1: spa_gt1, self.train_spa_gt2: spa_gt2, self.train_ang_gt: ang_gt }) self.count += 1 train_ang_MSE += ang_MSE train_total_MSE += total_MSE train_spa_MSE = (spa1_MSE + spa2_MSE) / 2. + train_spa_MSE else: batch_files = rand_idx_views[idx * config.batch_size:(idx + 1) * config.batch_size] batches = [ get_image(train_views_input[0, batch], train_views_sr_gt[0, batch], train_views_ang_gt[0, batch], self.image_wid) for batch in batch_files ] batches = np.array(batches).astype(np.float32) input1 = batches[:, :, :, 0] input1 = np.expand_dims(input1, axis=-1) input2 = batches[:, :, :, 1] input2 = np.expand_dims(input2, axis=-1) input3 = batches[:, :, :, 2] input3 = np.expand_dims(input3, axis=-1) input4 = batches[:, :, :, 3] input4 = np.expand_dims(input4, axis=-1) spa_gt1 = batches[:, :, :, 4] spa_gt1 = np.expand_dims(spa_gt1, axis=-1) spa_gt2 = batches[:, :, :, 5] spa_gt2 = np.expand_dims(spa_gt2, axis=-1) spa_gt3 = batches[:, :, :, 6] spa_gt3 = np.expand_dims(spa_gt3, axis=-1) spa_gt4 = batches[:, :, :, 7] spa_gt4 = np.expand_dims(spa_gt4, axis=-1) ang_gt = batches[:, :, :, -1] ang_gt = np.expand_dims(ang_gt, axis=-1) _, total_MSE, spa1_MSE, spa2_MSE, spa3_MSE, spa4_MSE, ang_MSE = self.sess.run( [ train_optim_views, self.loss_fine_views, self.loss_spa1, self.loss_spa2, self.loss_spa3, self.loss_spa4, self.loss_views ], feed_dict={ self.train_input1: input1, self.train_input2: input2, self.train_input3: input3, self.train_input4: input4, self.train_spa_gt1: spa_gt1, self.train_spa_gt2: spa_gt2, self.train_spa_gt3: spa_gt3, self.train_spa_gt4: spa_gt4, self.train_ang_gt: ang_gt }) self.count += 1 train_ang_MSE += ang_MSE train_spa_MSE = (spa1_MSE + spa2_MSE + spa3_MSE + spa4_MSE) / 4. + train_spa_MSE train_total_MSE += total_MSE print( 'Epoch train[%2d] total MSE: %.4f spa MSE: %.4f ang MSE: %.4f \n' % (epoch, train_total_MSE / (3 * batch_idxs_views), train_spa_MSE / (3 * batch_idxs_views), train_ang_MSE / (3 * batch_idxs_views))) #Validation for val_idx in xrange(0, val_batch_idxs_views): randview = np.random.permutation(range(3)) for view in randview: if view == 0: batch_files = val_rand_idx_ver[val_idx * config.batch_size: (val_idx + 1) * config.batch_size] batches = [ get_image(val_ver_input[0, batch], val_ver_sr_gt[0, batch], val_ver_ang_gt[0, batch], self.image_wid) for batch in batch_files ] batches = np.array(batches).astype(np.float32) input1 = batches[:, :, :, 0] input1 = np.expand_dims(input1, axis=-1) input2 = batches[:, :, :, 1] input2 = np.expand_dims(input2, axis=-1) spa_gt1 = batches[:, :, :, 2] spa_gt1 = np.expand_dims(spa_gt1, axis=-1) spa_gt2 = batches[:, :, :, 3] spa_gt2 = np.expand_dims(spa_gt2, axis=-1) ang_gt = batches[:, :, :, 4] ang_gt = np.expand_dims(ang_gt, axis=-1) total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run( [ self.loss_fine_ver, self.loss_spa1, self.loss_spa2, self.loss_ver ], feed_dict={ self.train_input1: input1, self.train_input2: input2, self.train_spa_gt1: spa_gt1, self.train_spa_gt2: spa_gt2, self.train_ang_gt: ang_gt }) val_ang_MSE += ang_MSE val_total_MSE += total_MSE val_spa_MSE = spa1_MSE + spa2_MSE + train_spa_MSE elif view == 1: batch_files = val_rand_idx_hor[val_idx * config.batch_size: (val_idx + 1) * config.batch_size] batches = [ get_image(val_hor_input[0, batch], val_hor_sr_gt[0, batch], val_hor_ang_gt[0, batch], self.image_wid) for batch in batch_files ] batches = np.array(batches).astype(np.float32) input1 = batches[:, :, :, 0] input1 = np.expand_dims(input1, axis=-1) input2 = batches[:, :, :, 1] input2 = np.expand_dims(input2, axis=-1) spa_gt1 = batches[:, :, :, 2] spa_gt1 = np.expand_dims(spa_gt1, axis=-1) spa_gt2 = batches[:, :, :, 3] spa_gt2 = np.expand_dims(spa_gt2, axis=-1) ang_gt = batches[:, :, :, -1] ang_gt = np.expand_dims(ang_gt, axis=-1) total_MSE, spa1_MSE, spa2_MSE, ang_MSE = self.sess.run( [ self.loss_fine_hor, self.loss_spa1, self.loss_spa2, self.loss_hor ], feed_dict={ self.train_input1: input1, self.train_input2: input2, self.train_spa_gt1: spa_gt1, self.train_spa_gt2: spa_gt2, self.train_ang_gt: ang_gt }) val_ang_MSE += ang_MSE val_total_MSE += total_MSE val_spa_MSE = spa1_MSE + spa2_MSE + train_spa_MSE else: batch_files = val_rand_idx_views[val_idx * config.batch_size: (val_idx + 1) * config.batch_size] batches = [ get_image(val_views_input[0, batch], val_views_sr_gt[0, batch], val_views_ang_gt[0, batch], self.image_wid) for batch in batch_files ] batches = np.array(batches).astype(np.float32) input1 = batches[:, :, :, 0] input1 = np.expand_dims(input1, axis=-1) input2 = batches[:, :, :, 1] input2 = np.expand_dims(input2, axis=-1) input3 = batches[:, :, :, 2] input3 = np.expand_dims(input3, axis=-1) input4 = batches[:, :, :, 3] input4 = np.expand_dims(input4, axis=-1) spa_gt1 = batches[:, :, :, 4] spa_gt1 = np.expand_dims(spa_gt1, axis=-1) spa_gt2 = batches[:, :, :, 5] spa_gt2 = np.expand_dims(spa_gt2, axis=-1) spa_gt3 = batches[:, :, :, 6] spa_gt3 = np.expand_dims(spa_gt3, axis=-1) spa_gt4 = batches[:, :, :, 7] spa_gt4 = np.expand_dims(spa_gt4, axis=-1) ang_gt = batches[:, :, :, -1] ang_gt = np.expand_dims(ang_gt, axis=-1) total_MSE, spa1_MSE, spa2_MSE, spa3_MSE, spa4_MSE, ang_MSE = self.sess.run( [ self.loss_fine_views, self.loss_spa1, self.loss_spa2, self.loss_spa3, self.loss_spa4, self.loss_views ], feed_dict={ self.train_input1: input1, self.train_input2: input2, self.train_input3: input3, self.train_input4: input4, self.train_spa_gt1: spa_gt1, self.train_spa_gt2: spa_gt2, self.train_spa_gt3: spa_gt3, self.train_spa_gt4: spa_gt4, self.train_ang_gt: ang_gt }) val_ang_MSE += ang_MSE val_spa_MSE = spa1_MSE + spa2_MSE + spa3_MSE + spa4_MSE + train_spa_MSE val_total_MSE += total_MSE print( 'Epoch val[%2d] total MSE: %.4f spa MSE: %.4f ang MSE: %.4f \n' % (epoch, val_total_MSE / (3 * val_batch_idxs_views), val_spa_MSE / (3 * val_batch_idxs_views), val_ang_MSE / (3 * val_batch_idxs_views))) if np.mod(epoch, 100) == 0: f_train_epoch.write( 'epoch %06d mean_total_MSE %.6f mean_spa_MSE %.6f mean_ang_MSE %.6f\n' % (epoch, train_total_MSE / (3 * batch_idxs_views), train_spa_MSE / (3 * batch_idxs_views), train_ang_MSE / (3 * batch_idxs_views))) f_train_epoch.close() f_val.write( 'epoch %06d mean_total_MSE %.6f mean_spa_MSE %.6f mean_ang_MSE %.6f\n' % (epoch, val_total_MSE / (3 * batch_idxs_views), val_spa_MSE / (3 * batch_idxs_views), val_ang_MSE / (3 * batch_idxs_views))) f_val.close() self.save(config.checkpoint_dir, 0)
def initialize(self, assign_dict): # This is where the `self._hidden` map is created. # The `tensorflow.Variable`s of the map are initialized # to the values given by the user in `assign_dict`. if Model._current_model == self: raise ModelError( "Can't call `model.initialize()` inside the model block") if self._observed is None: raise ModelError( "Can't initialize latent variables before `model.observed()` has been called." ) if self._hidden is not None: raise ModelError( "Can't call `model.initialize()` twice. Use `model.assign()` to change the state." ) if not isinstance(assign_dict, dict) or not assign_dict: raise ValueError( "Argument to `model.initialize()` must be a dictionary with more than one element" ) for key in assign_dict.keys(): if not isinstance(key, tf.Tensor): raise ValueError( "Key in the initialization dict is not a tf.Tensor: {}". format(repr(key))) hidden = set(self._description.keys()).difference(set(self._observed)) if hidden != set(assign_dict.keys()): raise ModelError( "Not all latent variables have been passed in a call to `model.initialize().\n\ Missing variables: {}".format( hidden.difference(assign_dict.keys()))) # Add variables to the execution graph with self.session.graph.as_default(): self._hidden = dict() for var in hidden: self._hidden[var] = tf.Variable(var.dtype.as_numpy_dtype( assign_dict[var]), name=var.name.split(':')[0]) self.session.run(tf.initialize_variables(list(self._hidden.values()))) # Sort the hidden variables so we can access them in a consistant order self._hidden_sorted = sorted(self._hidden.keys(), key=lambda v: v.name) for h in self._hidden.values(): with self.session.graph.as_default(): var = tf.Variable(h.dtype.as_numpy_dtype(), name=h.name.split(':')[0] + '_placeholder') setter = h.assign(var) self._setters[h] = (setter, var) all_vars = self._hidden.copy() all_vars.update(self._observed) self._rewrite_graph(all_vars) with self.session.graph.as_default(): # observed_logps contains one element per data point observed_logps = [ self._get_rewritten(self._description[v].logp) for v in self._observed ] # hidden_logps contains a single value hidden_logps = [ self._get_rewritten(self._description[v].logp) for v in self._hidden ] # Handle the case where we don't have observed variables. # We define the probability to not observe anything as 1. if not observed_logps: observed_logps = [tf.constant(0, dtype=config.dtype)] self._pdf = tf.exp(tf.add_n(observed_logps)) self._nll = -tf.add_n( [tf.reduce_sum(logp) for logp in observed_logps] + hidden_logps) variables = [self._hidden[k] for k in self._hidden_sorted] self._nll_grad = tf.gradients(self._nll, variables) for i, (v, g) in enumerate(zip(variables, self._nll_grad)): if g is None: self._nll_grad[i] = tf.constant(0, dtype=config.dtype) logger.warn('Model is independent of variable {}'.format( v.name.split(':')[0])) self.initialized = True
def main(_): game = pyspiel.load_game(FLAGS.game) # Information state length info_state_shape = game.information_state_tensor_shape() flat_info_state_length = np.prod(info_state_shape) # Output num_actions = game.num_distinct_actions() with tf.Session() as sess: net_input = tf.placeholder( tf.float32, [None, flat_info_state_length], name="input") # pylint: disable=unused-variable output = tf.placeholder(tf.float32, [None, num_actions], name="output") legals_mask = tf.placeholder( tf.float32, [None, num_actions], name="legals_mask") policy_net = tf.layers.dense(net_input, 128, activation=tf.nn.relu) policy_net = tf.layers.dense(policy_net, 128, activation=tf.nn.relu) policy_net = tf.layers.dense(policy_net, num_actions) # Note: subtracting the max here is to help with numerical stability. # However, there can still be numerical problems. If you are doing a softmax # here, it can return NaN when the max for the policy net is high on one of # the illegal actions, because policy_net - max will be small for legal # actions, giving all exp(small) == 0 in the denominator, returning NaN at # the end. One fix is to set the logits to -inf and define a custom cross # entropy op that ignores over the illegal actions. policy_net = policy_net - tf.reduce_max(policy_net, axis=-1, keepdims=True) masked_exp_logit = tf.multiply(tf.exp(policy_net), legals_mask) renormalizing_factor = tf.reduce_sum( masked_exp_logit, axis=-1, keepdims=True) # pylint: disable=unused-variable policy_softmax = tf.where( tf.equal(legals_mask, 0.), tf.zeros_like(masked_exp_logit), tf.divide(masked_exp_logit, renormalizing_factor), name="policy_softmax") policy_targets = tf.placeholder(shape=[None, num_actions], dtype=tf.float32) policy_cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=policy_net, labels=policy_targets), axis=0) # We make one sample. sampled_actions = tf.random.categorical( tf.log(policy_softmax), 1, name="sampled_actions") # pylint: disable=unused-variable optimizer = tf.train.AdamOptimizer(0.0001).minimize( policy_cost, name="train") # pylint: disable=unused-variable init = tf.initialize_variables(tf.all_variables(), name="init_all_vars_op") print("Writing file: {}/{}".format(FLAGS.dir, FLAGS.filename)) tf.train.write_graph( sess.graph_def, FLAGS.dir, FLAGS.filename, as_text=False)
def _precompute_image_features(img, layers, shape, save_dir): # type: (np.ndarray, Union[Tuple[str], List[str]], Union[Tuple[int], List[int]]) -> Dict[str, np.ndarray] """ Precompute the features of the image by passing it through the vgg network and storing the computed layers. :param img: the image of which the features would be precomputed. It must have shape (height, width, 3) :param layers: A list of string specifying which layers would we be returning. Check vgg.py for layer names. :param shape: shape of the image placeholder. :param vgg_data: The vgg network represented as a dictionary. It can be obtained by vgg.pre_read_net. :param mean_pixel: The mean pixel value for the vgg network. It can be obtained by vgg.read_net or just hardcoded. :param use_mrf: Whether we're using mrf loss. If true, it does not calculate and store the gram matrix. :param use_semantic_masks: Whether we're using semantic masks. If true, it does not calculate and store the gram matrix. :return: A dictionary containing the precomputed feature for each layer. """ features_dict = {} g = tf.Graph() # Choose to use cpu here because we only need to compute this once and using cpu would provide us more memory # than the gpu and therefore allow us to process larger style images using the extra memory. This will not have # an effect on the training speed later since the gram matrix size is not related to the size of the image. with g.as_default(), g.device('/cpu:0'), tf.Session(config=tf.ConfigProto( device_count={'GPU': 0})) as sess: with tf.name_scope("classifier"): with tf.variable_scope("classifier", reuse=False): image = tf.placeholder(tf.uint8, shape=shape) image_float = tf.image.convert_image_dtype(image, dtype=tf.float32) vgg = vgg19_mat.Vgg19( vgg19_npy_path='imagenet-vgg-verydeep-19.mat') vgg.build(image_float, None) net = vgg.net() style_pre = np.array([img]) style_pre = style_pre.astype(np.uint8) if '0.12.0' in tf.__version__: all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) else: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) if save_dir is not None: discrim_tvars = [ var for var in all_vars if var.name.startswith("classifier") ] saver = tf.train.Saver(discrim_tvars) ckpt = tf.train.get_checkpoint_state(save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise AssertionError( "Cannot load from save directory.") # var_not_saved = [ item for item in all_vars if item not in discrim_tvars ] print('Var not saved', var_not_saved) sess.run(tf.initialize_variables(var_not_saved)) else: sess.run(tf.initialize_all_variables()) for layer in layers: # Calculate and store gramian. features = net[layer].eval(feed_dict={image: style_pre}) features = np.reshape(features, (-1, features.shape[3])) gram = np.matmul(features.T, features) / features.size features_dict[layer] = gram return features_dict
def stylize(network, content, styles, shape, iterations, save_dir=None, content_weight=5.0, style_weight=100.0, tv_weight=100.0, style_blend_weights=None, learning_rate=10.0, initial=None, use_mrf=False, use_semantic_masks=False, mask_resize_as_feature=True, output_semantic_mask=None, style_semantic_masks=None, semantic_masks_weight=1.0, print_iterations=None, checkpoint_iterations=None, semantic_masks_num_layers=4, content_img_style_weight_mask=None): """ Stylize images. :param network: Path to pretrained vgg19 network. It can be downloaded at http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat :param content: The content image. If left blank, it will enter texture generation mode (style synthesis without context loss). :param styles: A list of style images as numpy arrays. :param shape: The shape of the output image. It should be with format (1, height, width, 3) :param iterations: The number of iterations to run. :param content_weight: The weight for content loss. The larger the weight, the more the output will look like the content image. :param style_weight: The weight for style loss. The larger the weight, the more the output will have a style that looks like the style images. :param tv_weight: The weight for total-variation loss. The larger the weight, the smoother the output will be. :param style_blend_weights: If inputting multiple style images, this controls the balance between their styles. If left as None, it will treat all style images as equal. :param learning_rate: As name suggests. :param initial: The initial starting point for the output. If left blank, the initial would just be noise. :param use_mrf: Whether we use markov-random-field loss instead of gramian loss. mrf_util.py contains more info. :param use_semantic_masks: Whether we use semantic masks as additional semantic information. Please check the paper "Semantic Style Transfer and Turning Two-Bit Doodles into Fine Artworks" for more information. :param mask_resize_as_feature: If true, resize the mask and use the resized mask as additional feature besides the vgg network layers. If false, pass the masks (must have exactly 3 masks) into the vgg network and use the outputted layers as additional features. :param output_semantic_mask: The semantic masks you would like to apply to the outputted image.The mask should have shape (batch_size, height, width, semantic_masks_num_layers) Unlike the neural doodle paper, here I use one black-and-white image for each semantic mask (the paper had semantic masks represented as rgb images, limiting the semantic channels to 3). :param style_semantic_masks: A list of semantic masks you would like to apply to each style image. The mask should have shape (batch_size, height, width, semantic_masks_num_layers) :param semantic_masks_weight: How heavily you'd like to weight the semantic masks as compared to other sources of semantic information obtained through passing the image through vgg network. Default is 1.0. :param print_iterations: Print loss information every n iterations. :param checkpoint_iterations: Save a checkpoint as well as the best image so far every n iterations. :param semantic_masks_num_layers: The number of semantic masks each image have. :param content_img_style_weight_mask: One black-and-white mask specifying how much we should "stylize" each pixel in the outputted image. The areas where the mask has higher value would be stylized more than other areas. A completely white mask would mean that we stylize the output image just as before, while a completely dark mask would mean that we do not stylize the output image at all, so it should look pretty much the same as content image. If you do not wish to use this feature, just leave it as None. :return: a tuple where the first item is either the current iteration or None, indicating it has finished training. The second item is the image that has the lowest loss so far. The tuples are yielded every 'checkpoint_iterations' iterations as well as the last iteration. :rtype: iterator[tuple[int|None,image]] """ global STYLE_LAYERS if content is not None: STYLE_LAYERS = STYLE_LAYERS_WITH_CONTENT if use_mrf: raise NotImplementedError STYLE_LAYERS = STYLE_LAYERS_MRF # Easiest way to be compatible with no-mrf versions. if use_semantic_masks: raise NotImplementedError assert semantic_masks_weight is not None assert output_semantic_mask is not None assert style_semantic_masks is not None if content_img_style_weight_mask is not None: if shape[1] != content_img_style_weight_mask.shape[1] or shape[ 2] != content_img_style_weight_mask.shape[2]: raise AssertionError( "The shape of style_weight_mask is incorrect. It must have the same height and width " "as the output image. The output image has shape: %s and the style weight mask has " "shape: %s" % (str(shape), str(content_img_style_weight_mask.shape))) if content_img_style_weight_mask.dtype != np.float32: raise AssertionError( 'The dtype of style_weight_mask must be float32. it is now %s' % str(content_img_style_weight_mask.dtype)) # Append a (1,) in front of the shapes of the style images. So the style_shapes contains (1, height, width, 3). # 3 corresponds to rgb. style_shapes = [(1, ) + style.shape for style in styles] if style_blend_weights is None: style_blend_weights = [1.0 / len(styles) for _ in styles] content_features = {} style_features = [{} for _ in styles] output_semantic_mask_features = {} for i in range(len(styles)): # Using precompute_image_features, which calculates on cpu and thus allow larger images. style_features[i] = _precompute_image_features(styles[i], STYLE_LAYERS, style_shapes[i], save_dir) # The default behavior of tensorflow was to allocate all gpu memory. Here it is set to only use as much gpu memory # as it needs. # TODO: CHANGE IT BACK< USING CPU NOW # tf_config = tf.ConfigProto(gpu_options=tf.GPUOptions(device_count = {'GPU': 1})) # tf_config = tf.ConfigProto() tf_config.gpu_options.per_process_gpu_memory_fraction = 0.45 with tf.Graph().as_default(), tf.Session(config=tf_config) as sess: # with tf.name_scope("classifier"): # Compute content features in feed-forward mode content_image = tf.placeholder(tf.uint8, shape=shape, name='content_image') content_image_float = tf.image.convert_image_dtype(content_image, dtype=tf.float32) with tf.variable_scope("classifier", reuse=False): vgg_c = vgg19_mat.Vgg19( vgg19_npy_path='imagenet-vgg-verydeep-19.mat') vgg_c.build(content_image_float, None) net_c = vgg_c.net() content_features[CONTENT_LAYER] = net_c[CONTENT_LAYER] if content is not None: # content_pre = np.array([vgg.preprocess(content, mean_pixel)]) content_pre = np.array([content]) content_pre = content_pre.astype(dtype=np.uint8) # Compute style features in feed-forward mode. if content_img_style_weight_mask is not None: style_weight_mask_layer_dict = neural_doodle_util.masks_average_pool( content_img_style_weight_mask) if initial is None: initial = tf.random_normal(shape) * 0.001 else: # initial = np.array([vgg.preprocess(initial, mean_pixel)]) initial = np.array([initial]) initial = initial.astype('float32') # image = tf.Variable(initial) # image_uint8 = tf.cast(image, tf.uint8) # image_float = tf.image.convert_image_dtype(image_uint8,dtype=tf.float32) * 2 - 1 image_float = tf.Variable(initial) image = tf.image.convert_image_dtype(image_float, dtype=tf.uint8, saturate=True) with tf.variable_scope("classifier", reuse=True): vgg_o = vgg19_mat.Vgg19( vgg19_npy_path='imagenet-vgg-verydeep-19.mat') vgg_o.build(image_float, None) net_o = vgg_o.net() # content loss _, height, width, number = map( lambda i: i.value, content_features[CONTENT_LAYER].get_shape()) content_features_size = height * width * number content_loss = content_weight * (2 * tf.nn.l2_loss( net_o[CONTENT_LAYER] - content_features[CONTENT_LAYER]) / content_features_size) # style loss style_loss = 0 for i in range(len(styles)): style_losses = [] for style_layer in STYLE_LAYERS: layer = net[style_layer] if content_img_style_weight_mask is not None: # Apply_style_weight_mask_to_feature_layer, then normalize with average of that style weight mask. layer = neural_doodle_util.vgg_layer_dot_mask(style_weight_mask_layer_dict[style_layer], layer) \ / (tf.reduce_mean(style_weight_mask_layer_dict[style_layer]) + 0.000001) if use_mrf: if use_semantic_masks: # TODO: Compare the effect of concatenate masks to vgg layers versus dotting them with vgg # layers. If you change this to dot, don't forget to also change that in neural_doodle_util. layer = neural_doodle_util.concatenate_mask_layer_tf( output_semantic_mask_features[style_layer], layer) # layer = neural_doodle_util.vgg_layer_dot_mask(output_semantic_mask_features[style_layer], layer) style_losses.append( mrf_loss(style_features[i][style_layer], layer, name='%d%s' % (i, style_layer))) else: if use_semantic_masks: gram = neural_doodle_util.gramian_with_mask( layer, output_semantic_mask_features[style_layer]) else: gram = neural_util.gramian(layer) style_gram = style_features[i][style_layer] style_gram_size = get_np_array_num_elements(style_gram) style_losses.append( tf.nn.l2_loss(gram - style_gram) / style_gram_size ) # TODO: Check normalization constants. the style loss is way too big compared to the other two. style_loss += style_weight * style_blend_weights[i] * reduce( tf.add, style_losses) # total variation denoising tv_loss = tf.mul(neural_util.total_variation(image_float), tv_weight) # overall loss if content is None: # If we are doing style/texture regeration only. loss = style_loss + tv_loss else: loss = content_loss + style_loss + tv_loss # optimizer setup train_step = tf.train.AdamOptimizer(learning_rate).minimize( loss, var_list=[image_float]) def print_progress(i, feed_dict, last=False): stderr.write('Iteration %d/%d\n' % (i + 1, iterations)) if last or (print_iterations is not None and print_iterations != 0 and i % print_iterations == 0): if content is not None: stderr.write(' content loss: %g\n' % content_loss.eval(feed_dict=feed_dict)) stderr.write(' style loss: %g\n' % style_loss.eval(feed_dict=feed_dict)) stderr.write(' tv loss: %g\n' % tv_loss.eval(feed_dict=feed_dict)) stderr.write(' total loss: %g\n' % loss.eval(feed_dict=feed_dict)) # Load classifier weight. if '0.12.0' in tf.__version__: all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) else: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) if save_dir is not None: # discrim_tvars = [var for var in tf.trainable_variables() if var.name.startswith("classifier")] discrim_tvars = [ var for var in all_vars if var.name.startswith("classifier") ] saver = tf.train.Saver(discrim_tvars) ckpt = tf.train.get_checkpoint_state(save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: raise AssertionError("Cannot load from save directory.") var_not_saved = [ item for item in all_vars if item not in discrim_tvars ] print('Var not saved', var_not_saved) sess.run(tf.initialize_variables(var_not_saved)) else: sess.run(tf.initialize_all_variables()) # optimization best_loss = float('inf') best = np.zeros(shape=shape) feed_dict = {} if content is not None: feed_dict[content_image] = content_pre # sess.run(tf.initialize_all_variables(), feed_dict=feed_dict) for i in range(iterations): last_step = (i == iterations - 1) print_progress(i, feed_dict, last=last_step) train_step.run(feed_dict=feed_dict) if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step: this_loss = loss.eval(feed_dict=feed_dict) if this_loss < best_loss: best_loss = this_loss best = image.eval() # yield ( # (None if last_step else i), # vgg.unprocess(best.reshape(shape[1:]), mean_pixel) # ) # print(best) best_float32 = image_float.eval() # print(best_float32) best_str, = sess.run( [tf.image.encode_png(best[0], name="input_pngs")]) # yield ( # (None if last_step else i), # best.reshape(shape[1:]) # ) yield ((None if last_step else i), best_str)
beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(cost, var_list=train_params[parameters:] ) uninitialized_vars = [] for var in tf.all_variables(): try: sess.run(var) except tf.errors.FailedPreconditionError: uninitialized_vars.append(var) init_new_vars_op = tf.initialize_variables(uninitialized_vars) sess.run(init_new_vars_op) log('TensorFlow Session starting...') # TensorBoard summary (graph) tf.summary.scalar('cost', cost) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter('./tensorboard_test') writer.add_graph(sess.graph) log('TensorBoard infos in ./tensorboard_test') # Save path depending on the training behaviour if not args.transfer_model and args.transfer_cnn: save_path = args.save_dir+'/cnn_s2p_' + appliance_name + '_transf_' + args.cnn + '_pointnet_model'
def run(args, server): env = create_env(args.env_id, client_id=str(args.task), remotes=args.remotes, envWrap=args.envWrap, designHead=args.designHead, noLifeReward=args.noLifeReward) # set one task to one cpu config = tf.ConfigProto(device_filters=[ "/job:ps", "/job:worker/task:{}/cpu:0".format(args.task) ]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) else: summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task) trainer = A3C(env, args.task, args.visualise, args.unsup, summary_writer, args.envWrap, args.designHead, args.noReward) # logging if args.task == 0: with open(args.log_dir + '/log.txt', 'w') as fid: for key, val in constants.items(): fid.write('%s: %s\n' % (str(key), str(val))) fid.write('designHead: %s\n' % args.designHead) fid.write('input observation: %s\n' % str(env.observation_space.shape)) fid.write('env name: %s\n' % str(env.spec.id)) fid.write('unsup method type: %s\n' % str(args.unsup)) # Variable names that start with "local" are not saved in checkpoints. if use_tf12_api: variables_to_save = [ v for v in tf.global_variables() if not v.name.startswith("local") ] init_op = tf.variables_initializer(variables_to_save) init_all_op = tf.global_variables_initializer() else: variables_to_save = [ v for v in tf.all_variables() if not v.name.startswith("local") ] init_op = tf.initialize_variables(variables_to_save) init_all_op = tf.initialize_all_variables() saver = FastSaver(variables_to_save) if args.pretrain is not None: variables_to_restore = [ v for v in tf.trainable_variables() if not v.name.startswith("local") ] pretrain_saver = FastSaver(variables_to_restore) pretrain = tf.train.latest_checkpoint(args.pretrain) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, tf.get_variable_scope().name) logger.info('Trainable vars:') for v in var_list: logger.info(' %s %s', v.name, v.get_shape()) def init_fn(ses): logger.info("Initializing all parameters.") ses.run(init_all_op) if args.pretrain is not None: variables_to_restore = [ v for v in tf.trainable_variables() if not v.name.startswith("local") ] pretrain = tf.train.latest_checkpoint(args.pretrain) print(args.pretrain) print(pretrain_saver) print(pretrain) logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("==> Restoring from given pretrained checkpoint.") logger.info(" Pretraining address: %s", pretrain) pretrain_saver.restore(ses, pretrain) logger.info("==> Done restoring model! Restored %d variables.", len(variables_to_restore)) logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("----------------------------------------") logger.info("----------------------------------------") ''' # set one task to one cpu config = tf.ConfigProto(device_filters=["/job:ps", "/job:worker/task:{}/cpu:0".format(args.task)]) logdir = os.path.join(args.log_dir, 'train') if use_tf12_api: summary_writer = tf.summary.FileWriter(logdir + "_%d" % args.task) else: summary_writer = tf.train.SummaryWriter(logdir + "_%d" % args.task) ''' logger.info("Events directory: %s_%s", logdir, args.task) sv = tf.train.Supervisor( is_chief=(args.task == 0), logdir=logdir, saver=saver, summary_op=None, init_op=init_op, init_fn=init_fn, summary_writer=summary_writer, ready_op=tf.report_uninitialized_variables(variables_to_save), global_step=trainer.global_step, save_model_secs=30, save_summaries_secs=30) num_global_steps = constants['MAX_GLOBAL_STEPS'] logger.info( "Starting session. If this hangs, we're mostly likely waiting to connect to the parameter server. " + "One common cause is that the parameter server DNS name isn't resolving yet, or is misspecified." ) with sv.managed_session(server.target, config=config) as sess, sess.as_default(): # Workaround for FailedPreconditionError # see: https://github.com/openai/universe-starter-agent/issues/44 and 31 sess.run(trainer.sync) trainer.start(sess, summary_writer) global_step = sess.run(trainer.global_step) logger.info("-------Starting training at gobal_step=%d", global_step) while not sv.should_stop() and (not num_global_steps or global_step < num_global_steps): #print("-------Start at:" + str(trainer.global_step) + ", to:" + str(num_global_steps)) trainer.process(sess) global_step = sess.run(trainer.global_step) # Ask for all the services to stop. sv.stop() logger.info('reached %s steps. worker stopped.', global_step)
def render_vis(model, objective_f, param_f=None, optimizer=None, transforms=None, thresholds=(512, ), print_objectives=None, verbose=False, model_name_scope='encode'): """Flexible optimization-based feature vis. There's a lot of ways one might wish to customize optimization-based feature visualization. It's hard to create an abstraction that stands up to all the things one might wish to try. This function probably can't do *everything* you want, but it's much more flexible than a naive attempt. The basic abstraction is to split the problem into several parts. Consider the arguments: Args: model: The model to be visualized, from Alex' modelzoo. objective_f: The objective our visualization maximizes. See the objectives module for more details. param_f: Paramaterization of the image we're optimizing. See the paramaterization module for more details. Defaults to a naively paramaterized [1, 128, 128, 3] image. optimizer: Optimizer to optimize with. Either tf.train.Optimizer instance, or a function from (graph, sess) to such an instance. Defaults to Adam with lr .05. transforms: A list of stochastic transformations that get composed, which our visualization should robustly activate the network against. See the transform module for more details. Defaults to [transform.jitter(8)]. thresholds: A list of numbers of optimization steps, at which we should save (and display if verbose=True) the visualization. print_objectives: A list of objectives separate from those being optimized, whose values get logged during the optimization. verbose: Should we display the visualization when we hit a threshold? This should only be used in IPython. Returns: 2D array of optimization results containing of evaluations of supplied param_f snapshotted at specified thresholds. Usually that will mean one or multiple channel visualizations stacked on top of each other. """ gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto( allow_soft_placement=True, gpu_options=gpu_options, ) with tf.Graph().as_default() as graph, tf.Session(config=config) as sess: T = make_vis_T(model, objective_f, param_f, optimizer, transforms) loss, vis_op, t_image = T("loss"), T("vis_op"), T("input") added_vars = [x for x in tf.global_variables() \ if not x.op.name.startswith(model_name_scope)] init_new_vars_op = tf.initialize_variables(added_vars) init_new_vars_op.run() images = [] all_losses = [] for i in tqdm(range(max(thresholds) + 1)): loss_, _ = sess.run([loss, vis_op]) all_losses.append(loss_) if i in thresholds: vis = t_image.eval() images.append(vis) if verbose: print(i, loss_) return t_image.eval(), all_losses
sess.run(s_assign) style_features = [0 for i in range(5)] style_features = sess.run( [vgg.conv1_1, vgg.conv2_1, vgg.conv3_1, vgg.conv4_1, vgg.conv5_1], feed_dict={vgg.imgs: [style_img]}) c_assign = vgg.imgs_update.assign(np.asarray([content_img]).astype(float)) sess.run(c_assign) content_features = sess.run(vgg.conv5_2, feed_dict={vgg.imgs: [content_img]}) result_img = np.zeros((1, 224, 224, 3)).tolist() # r_assign = vgg.imgs_update.assign(np.asarray(result_img).astype(float)) # sess.run(r_assign) vgg.transfer_style(content_features, style_features) sess.run(tf.initialize_variables(set(tf.all_variables()) - vgg.temp)) for i in range(1000): loss = sess.run(vgg.loss, feed_dict={vgg.imgs: result_img}) print("iteration", i, "loss", loss) update = sess.run(vgg.train_step, feed_dict={vgg.imgs: result_img}) result_img = sess.run(vgg.imgs_update, feed_dict={vgg.imgs: result_img}) # import skimage.io as io x = np.asarray(result_img[0]).astype(np.uint8) # io.imshow(x) # io.show() imsave('output.jpg', x)
def restart_units(self): self.restart_op = tf.initialize_variables([self.v, self.u])
for grad, var in grads: if (var.name == "sp_w_fc1:0"): idx_in1 = tf.cast(tf.constant(idx_fc1), tf.float32) grads[count] = (tf.multiply(idx_in1, grad), var) if (var.name == "sp_w_fc2:0"): idx_in2 = tf.cast(tf.constant(idx_fc2), tf.float32) grads[count] = (tf.multiply(idx_in2, grad), var) count += 1 train_step = trainer.apply_gradients(grads) correct_prediction = tf.equal(tf.argmax(logit, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) for var in tf.all_variables(): if sess.run(tf.is_variable_initialized(var)) == False: sess.run(tf.initialize_variables([var])) for i in range(20000): batch = mnist.train.next_batch(50) idx_in1_value = sess.run(idx_in1) grads_fc1_value = sess.run(grads, feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5 }) if i % 100 == 0: train_acc = sess.run(accuracy, feed_dict={ x: batch[0], y_: batch[1],
def _initialize_variables(self): uninitialized_var_names = [bytes.decode(var) for var in self._sess.run(tf.report_uninitialized_variables())] uninitialized_vars = [var for var in tf.global_variables() if var.name.split(':')[0] in uninitialized_var_names] self._sess.run(tf.initialize_variables(uninitialized_vars))
def color_sketches_net(height, width, iterations, batch_size, content_weight, tv_weight, learning_rate, generator_network='unet', use_adversarial_net=False, use_hint=False, adv_net_weight=1.0, weight_decay_lambda=1e-5, sketch_reconstruct_weight=10.0 / 255.0, print_iterations=None, checkpoint_iterations=None, save_dir="model/", do_restore_and_generate=False, do_restore_and_train=False, restore_from_noadv_to_adv=False, preprocessed_folder=None, preprocessed_file_path_list=None, content_preprocessed_folder=None, color_rebalancing_folder=None, from_screenshot=False, from_webcam=False, test_img_dir=None, test_img_hint=None, input_mode='sketch', output_mode='rgb', use_cpu=False): """ Stylize images. TODO: modify the description. This function yields tuples (iteration, image); `iteration` is None if this is the final image (the last iteration). Other tuples are yielded every `checkpoint_iterations` iterations. :param: lr_decay_steps: learning rate decays by lr_decay_rate after lr_decay steps. Default per https://arxiv.org/abs/1603.03417 :param: min_lr: The minimum learning rate. Default per https://arxiv.org/abs/1603.03417 :param: lr_decay_rate: learning rate decays by lr_decay_rate after lr_decay steps. Default per https://arxiv.org/abs/1603.03417 :param: use_semantic_masks: If it is true, the input to the generator network will be the semantic masks instead of the content image. The content image will serve as ground truth for loss (I haven't decided whether to use content or style loss). :rtype: iterator[tuple[int|None,image]] """ # Before training, make sure everything is set correctly. if use_hint: assert test_img_hint is not None height, width = get_compatible_shape(height, width) input_shape = (1, height, width, 3) print( 'The input shape is: %s. Input mode is: %s. Output mode is: %s. Using %s generator network' % (str(input_shape), input_mode, output_mode, generator_network)) content_img_preprocessed = None sketches_preprocessed = None prev_content_preprocessed_file_i = 0 # Define tensorflow placeholders and variables. with tf.Graph().as_default(): input_images = tf.placeholder( tf.float32, shape=[ batch_size, input_shape[1], input_shape[2], 1 if generator_network != 'lnet' else 3 ], name='input_sketches' if input_mode == 'sketch' else 'input_bw') if use_hint: input_hint = tf.placeholder( tf.float32, shape=[batch_size, input_shape[1], input_shape[2], 3], name='input_hint') input_concatenated = tf.concat(3, (input_images, input_hint)) if generator_network == 'unet_color': assert input_mode == 'sketch' or (input_mode == 'raw_sketch' and do_restore_and_generate) color_output = unet_color_util.net(input_concatenated) sketch_output = lnet_util.net( (color_output - 128) / 128 ) * 255 # This is the reconstructed sketch from the color output. elif generator_network == 'lnet': assert input_mode == 'color' and not use_adversarial_net and not use_hint # This step is not necessary but kept to be in sync with chainer repo. input_concatenated = (input_concatenated - 128) / 128 color_output = lnet_util.net(input_concatenated, trainable=True) * 255 elif generator_network == 'backprop': assert input_mode == 'sketch' color_output = tf.get_variable( 'backprop_input_var', shape=[batch_size, input_shape[1], input_shape[2], 3], initializer=tf.random_normal_initializer( mean=128, stddev=10.0)) + 0 * input_images sketch_output = lnet_util.net( (color_output - 128) / 128 ) * 255 # This is the reconstructed sketch from the color output. else: # TODO: change the error message. raise AssertionError( "Please input a valid generator network name. Possible options are: TODO. Got: %s" % (generator_network)) else: if generator_network == 'unet_color': assert input_mode == 'sketch' or (input_mode == 'raw_sketch' and do_restore_and_generate) color_output = unet_color_util.net(input_images) sketch_output = lnet_util.net( (color_output - 128) / 128 ) * 255 # This is the reconstructed sketch from the color output. elif generator_network == 'lnet': assert input_mode == 'color' and not use_adversarial_net and not use_hint # This step is not necessary but kept to be in sync with chainer repo. input_images = (input_images - 128) / 128 color_output = lnet_util.net(input_images, trainable=True) * 255 elif generator_network == 'backprop': assert input_mode == 'sketch' color_output = tf.get_variable( 'backprop_input_var', shape=[batch_size, input_shape[1], input_shape[2], 3], initializer=tf.random_normal_initializer( )) + 0 * input_images sketch_output = lnet_util.net( (color_output - 128) / 128 ) * 255 # This is the reconstructed sketch from the color output. else: raise AssertionError( "Please input a valid generator network name. Possible options are: TODO. Got: %s" % (generator_network)) generator_all_var = unet_util.get_net_all_variables() sketch_reconstruct_all_var = lnet_util.get_net_all_variables() if not do_restore_and_generate: assert preprocessed_folder is not None and preprocessed_file_path_list is not None and \ preprocessed_folder[-1] == '/' learning_rate_init = tf.constant(learning_rate) learning_rate_var = tf.get_variable(name='learning_rate_var', trainable=False, initializer=learning_rate_init) color_expected_output = tf.placeholder( tf.float32, shape=[ batch_size, input_shape[1], input_shape[2], 3 if generator_network != 'lnet' else 1 ], name='color_expected_output') # Use the mean difference loss. Used to use tf.nn.l2_loss. Don't know how big of a difference that makes. # color_loss_non_adv =tf.nn.l2_loss(color_output - color_expected_output) / batch_size color_loss_non_adv = tf.reduce_mean( tf.abs(color_output - color_expected_output)) weight_decay_loss_non_adv = conv_util.weight_decay_loss( scope='unet') * weight_decay_lambda # This is only for unet_color, not for training the lnet, sketch_expected_output = lnet_util.net( (color_expected_output - 128) / 128, reuse=True) * 255 sketch_reconstruct_loss_non_adv = tf.reduce_mean( tf.abs(sketch_output - sketch_expected_output)) * sketch_reconstruct_weight generator_loss_non_adv = color_loss_non_adv + weight_decay_loss_non_adv + sketch_reconstruct_loss_non_adv # tv_loss = tv_weight * total_variation(image) if use_adversarial_net: adv_net_input = tf.placeholder( tf.float32, shape=[batch_size, input_shape[1], input_shape[2], 3], name='adv_net_input') adv_net_prediction_image_input = adv_net_util.net( adv_net_input) adv_net_prediction_generator_input = adv_net_util.net( color_output, reuse=True) adv_net_all_var = adv_net_util.get_net_all_variables() weight_decay_loss_adv = conv_util.weight_decay_loss( scope='adv_net') * weight_decay_lambda logits_from_i = adv_net_prediction_image_input logits_from_g = adv_net_prediction_generator_input # One represent labeling the image as coming from real image. Zero represent labeling it as generated. adv_loss_from_i = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits_from_i, tf.ones( [batch_size], dtype=tf.int64))) * adv_net_weight adv_loss_from_g = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits_from_g, tf.zeros( [batch_size], dtype=tf.int64))) * adv_net_weight adv_loss = adv_loss_from_i + adv_loss_from_g + weight_decay_loss_adv generator_loss_through_adv = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits_from_g, tf.ones( [batch_size], dtype=tf.int64))) * adv_net_weight # Beta1 = 0.5 according to dcgan paper adv_train_step = tf.train.AdamOptimizer( learning_rate_var, beta1=0.5, beta2=0.999).minimize(adv_loss, var_list=adv_net_all_var) # adv_train_step_i = tf.train.AdamOptimizer(learning_rate_var, beta1=0.5, # beta2=0.999).minimize(adv_loss_from_i, var_list=adv_net_all_var) # adv_train_step_g = tf.train.AdamOptimizer(learning_rate_var, beta1=0.5, # beta2=0.999).minimize(adv_loss_from_g, var_list=adv_net_all_var) generator_train_step_through_adv = tf.train.AdamOptimizer( learning_rate_var, beta1=0.5, beta2=0.999).minimize(generator_loss_through_adv, var_list=generator_all_var) generator_train_step = tf.train.AdamOptimizer( learning_rate_var, beta1=0.9, beta2=0.999).minimize(generator_loss_non_adv) with tf.control_dependencies( [generator_train_step_through_adv, generator_train_step]): generator_both_train = tf.no_op( name='generator_both_train') adv_loss_real_sum = scalar_summary("adv_loss_real", adv_loss_from_i) adv_loss_fake_sum = scalar_summary("adv_loss_fake", adv_loss_from_g) adv_loss_weight_decay_sum = scalar_summary( "adv_loss_weight_decay", weight_decay_loss_adv) generator_loss_through_adv_sum = scalar_summary( "g_loss_through_adv", generator_loss_through_adv) adv_loss_sum = scalar_summary("adv_loss", adv_loss) generator_loss_l2_sum = scalar_summary( "generator_loss_non_adv", generator_loss_non_adv) generator_loss_weight_decay_sum = scalar_summary( "generator_loss_weight_decay", weight_decay_loss_non_adv) sketch_reconstruct_loss_non_adv_sum = scalar_summary( "sketch_reconstruct_loss_non_adv", sketch_reconstruct_loss_non_adv) g_sum = merge_summary([ generator_loss_through_adv_sum, generator_loss_l2_sum, generator_loss_weight_decay_sum, sketch_reconstruct_loss_non_adv_sum ]) adv_sum = merge_summary([ adv_loss_fake_sum, adv_loss_real_sum, adv_loss_weight_decay_sum, adv_loss_sum ]) else: # optimizer setup # Training using adam optimizer. Setting comes from https://arxiv.org/abs/1610.07629. generator_train_step = tf.train.AdamOptimizer( learning_rate_var, beta1=0.9, beta2=0.999).minimize(generator_loss_non_adv) generator_loss_l2_sum = scalar_summary("color_loss_non_adv", generator_loss_non_adv) generator_loss_weight_decay_sum = scalar_summary( "generator_loss_weight_decay", weight_decay_loss_non_adv) sketch_reconstruct_loss_non_adv_sum = scalar_summary( "sketch_reconstruct_loss_non_adv", sketch_reconstruct_loss_non_adv) g_sum = merge_summary([ generator_loss_l2_sum, generator_loss_weight_decay_sum, sketch_reconstruct_loss_non_adv_sum ]) def print_progress(i, feed_dict, adv_feed_dict, start_time, total_iterations, last=False): stderr.write('Iteration %d/%d\n' % (i + 1, iterations)) if last or (print_iterations and i % print_iterations == 0): current_time = time.time() if i > 0: seconds_passed = current_time - start_time seconds_remaining = float(total_iterations - i) / i * seconds_passed m, s = divmod(seconds_remaining, 60) h, m = divmod(m, 60) stderr.write( 'Estimated time remaining: "%d:%02d:%02d"' % (h, m, s)) stderr.write('Learning rate %f\n' % (learning_rate_var.eval())) # TODO: change this stderr.write( ' generator l2 loss: %g\n' % generator_loss_non_adv.eval(feed_dict=feed_dict)) stderr.write(' sketch loss: %g\n' % sketch_reconstruct_loss_non_adv.eval( feed_dict=feed_dict)) if not generator_network == 'backprop': stderr.write(' w decay gen loss: %g\n' % weight_decay_loss_non_adv.eval( feed_dict=feed_dict)) # if generator_network == 'unet_both' or generator_network == 'colorful_img_both': # stderr.write(' bw loss: %g\n' % color_loss_non_adv.eval(feed_dict=feed_dict)) # stderr.write(' ab loss: %g\n' % ab_loss_non_adv.eval(feed_dict=feed_dict)) if use_adversarial_net: stderr.write( ' adv_from_i loss: %g\n' % adv_loss_from_i.eval(feed_dict=adv_feed_dict)) stderr.write( ' adv_from_g loss: %g\n' % adv_loss_from_g.eval(feed_dict=adv_feed_dict)) stderr.write('generator adv loss: %g\n' % generator_loss_through_adv.eval( feed_dict=adv_feed_dict)) stderr.write(' w decay adv loss: %g\n' % weight_decay_loss_adv.eval( feed_dict=adv_feed_dict)) # Optimization # It used to track and record only the best one with lowest loss. This is no longer necessary and I think # just recording the one generated at each round will make it easier to debug. best_image = None start_time = time.time() if restore_from_noadv_to_adv and use_adversarial_net: saver = tf.train.Saver(generator_all_var + [learning_rate_var]) else: saver = tf.train.Saver() if use_cpu: config = tf.ConfigProto(device_count={'GPU': 0}) else: config = None with tf.Session(config=config) as sess: if do_restore_and_generate: assert batch_size == 1 ckpt = tf.train.get_checkpoint_state(save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: stderr("No checkpoint found. Exiting program") return if from_screenshot: # This is the x and y offset, the coordinate where we start capturing screen shot. kScreenX = 300 kScreenY = 300 elif from_webcam: cap = cv2.VideoCapture(0) # Set width and height. ret = cap.set(3, 1280) ret = cap.set(4, 960) ret, frame = cap.read() print('The dimension of this camera is : %d x %d' % (frame.shape[1], frame.shape[0])) else: assert test_img_dir is not None iterator = 0 while from_screenshot or from_webcam or (iterator == 0): if from_screenshot: pass # w = gtk.gdk.get_default_root_window() # sz = w.get_size() # print "The size of the window is %d x %d" % sz # pb = gtk.gdk.Pixbuf(gtk.gdk.COLORSPACE_RGB, False, 8, input_shape[1], input_shape[2]) # pb = pb.get_from_drawable(w, w.get_colormap(), kScreenX, kScreenY, 0, 0, input_shape[1], # input_shape[2]) # content_image = pb.pixel_array elif from_webcam: ret, frame = cap.read() content_image = scipy.misc.imresize( frame, (input_shape[1], input_shape[2])) else: content_image = imread( test_img_dir, (input_shape[1], input_shape[2])) content_image = np.array([content_image]) if input_mode == 'sketch': color_expected_output = tf.placeholder( tf.float32, shape=[ batch_size, input_shape[1], input_shape[2], 3 if generator_network != 'lnet' else 1 ], name='color_expected_output') sketch_expected_output = lnet_util.net( (color_expected_output - 128) / 128, reuse=True) * 255 content_image_yuv = cv2.cvtColor( np.asarray(content_image[0, ...], dtype=np.uint8), cv2.COLOR_RGB2YUV) image_sketches = sketch_expected_output.eval( feed_dict={ color_expected_output: np.array([content_image_yuv]) }) # image_sketches = sketches_util.image_to_sketch(content_image) # image_sketches = np.expand_dims(image_sketches, axis=3) elif input_mode == 'bw': content_image_lab = colorful_img_network_util.rgb_to_lab( content_image) image_sketches = content_image_lab[..., 0:1] # image_sketches = np.expand_dims(rgb2gray(content_image), axis=3) elif input_mode == 'color': image_sketches = np.zeros(content_image.shape) # image_sketches = np.expand_dims(rgb2gray(content_image), axis=3) elif input_mode == 'raw_sketch': image_sketches = rgb2gray(content_image, keep_dim=True) else: raise AssertionError('Input mode error.') # Do some processing... image_sketches, content_image = sketches_util.generate_training_batch( image_sketches, content_image, train=False) # Now generate an image using the style_blend_weights given. if input_mode == 'color': feed_dict = {input_images: content_image} else: feed_dict = {input_images: image_sketches[..., :1]} if use_hint: image_hint = hint_imread( test_img_hint, (input_shape[1], input_shape[2])) feed_dict[input_hint] = np.array([image_hint]) generated_bw = color_output.eval(feed_dict=feed_dict) iterator += 1 if generator_network != 'lnet': # Whenever using cv2.cvtColor, be careful not to use float values... It gives out wierd answers. print(generated_bw[0, 0, 0:5, :]) print(content_image[0, 0, 0:5, :]) generated_image = np.array([ cv2.cvtColor( np.asarray(generated_bw[0, ...], dtype=np.uint8), cv2.COLOR_YUV2RGB) ]) # generated_image = image_sketches[...,:1] else: generated_image = generated_bw yield (iterator, generated_image) else: # Initialize log writer summary_writer = SummaryWriter("./logs", sess.graph) # initialize pre-processsed numpy array if content_preprocessed_folder is not None: if not os.path.isfile(content_preprocessed_folder + 'record.txt'): raise AssertionError( 'No preprocessed content images found in %s. To use this feature, first use some ' 'other file to call read_resize_and_save_all_imgs_in_dir.' % (content_preprocessed_folder)) content_preprocessed_record = sketches_util.read_preprocessed_sketches_npy_record( content_preprocessed_folder) if content_preprocessed_record[0][ 3] != height or content_preprocessed_record[0][ 4] != width: raise AssertionError( 'The height and/or width of the preprocessed numpy files does not ' 'match those of the current setting.') # Read the first file print('Reading preprocessed content images.') content_img_preprocessed = np.load( content_preprocessed_record[ prev_content_preprocessed_file_i][0]) sketches_preprocessed = np.load( content_preprocessed_record[ prev_content_preprocessed_file_i][1]) # Do Training. iter_start = 0 if do_restore_and_train: ckpt = tf.train.get_checkpoint_state(save_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) iter_start = get_global_step_from_save_dir( ckpt.model_checkpoint_path) else: raise AssertionError( "No checkpoint found. Exiting program") return if restore_from_noadv_to_adv and use_adversarial_net: # Simply running this doesn;t seem to work. # sess.run(tf.initialize_variables(adv_net_all_var)) # Get all variables except the generator net and the learning rate if '0.12.0' in tf.__version__: all_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES) else: all_vars = tf.get_collection( tf.GraphKeys.VARIABLES) var_not_saved = [ item for item in all_vars if item not in (generator_all_var + [learning_rate_var]) ] sess.run(tf.initialize_variables(var_not_saved)) # Now change the saver back to normal saver = tf.train.Saver() raise NotImplementedError else: # # In the past I ran this. Now I have lnet which is a pretrained network. # sess.run(tf.initialize_all_variables()) saver = tf.train.Saver(sketch_reconstruct_all_var) ckpt = tf.train.get_checkpoint_state( 'model/chainer_converted/') saver.restore(sess, ckpt.model_checkpoint_path) # Get variables not in lnet and initialize them # Get all variables except the generator net and the learning rate if '0.12.0' in tf.__version__: all_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES) else: all_vars = tf.get_collection(tf.GraphKeys.VARIABLES) var_not_saved = [ item for item in all_vars if item not in sketch_reconstruct_all_var ] sess.run(tf.initialize_variables(var_not_saved)) # Now change the saver back to normal saver = tf.train.Saver() # Get path to all content images. image_subpaths = read_preprocessed_file_path_list( preprocessed_file_path_list) # Ignore the ones at the end. if batch_size != 1 and len(image_subpaths) % batch_size != 0: image_subpaths = image_subpaths[:-(len(image_subpaths) % batch_size)] print('The size of training dataset is %d images.' % len(image_subpaths)) preprocessed_colored_folder = preprocessed_folder + 'color/' preprocessed_sketch_folder = preprocessed_folder + 'line/' content_dirs = map(lambda p: preprocessed_colored_folder + p, image_subpaths) sketch_dirs = map(lambda p: preprocessed_sketch_folder + p, image_subpaths) # # Test training GAN differently*** # generators_turn = True # # END TEST*** current_lr = learning_rate_var.eval() if abs(current_lr - learning_rate) > 0.00000001: print( "Given learning rate is different from the learning rate stored. Changing lr %f -> %f" % (current_lr, learning_rate)) sess.run(learning_rate_var.assign(learning_rate)) for i in range(iter_start, iterations): if content_preprocessed_folder is not None: current_content_preprocessed_file_i, index_within_preprocessed = \ sketches_util.find_corresponding_sketches_npy_from_record( content_preprocessed_record, i * batch_size, batch_size) if prev_content_preprocessed_file_i != current_content_preprocessed_file_i: prev_content_preprocessed_file_i = current_content_preprocessed_file_i content_img_preprocessed = np.load( content_preprocessed_record[ current_content_preprocessed_file_i][0]) sketches_preprocessed = np.load( content_preprocessed_record[ current_content_preprocessed_file_i][1]) content_pre_list = content_img_preprocessed[ index_within_preprocessed: index_within_preprocessed + batch_size, ...].astype(np.float32) if input_mode == 'sketch': image_sketches = sketches_preprocessed[ index_within_preprocessed: index_within_preprocessed + batch_size, ...].astype(np.float32) image_sketches = np.expand_dims(image_sketches, axis=3) elif input_mode == 'bw': content_image_lab = colorful_img_network_util.rgb_to_lab( content_pre_list) image_sketches = content_image_lab[..., 0:1] # image_sketches = np.expand_dims(rgb2gray(content_pre_list), axis=3) elif input_mode == 'color': image_sketches = content_pre_list elif input_mode == 'raw_sketch': raise AssertionError( 'Input mode raw_sketch should not be trained.') else: raise AssertionError('Input mode error.') else: current_content_dirs = get_batch_paths( content_dirs, i * batch_size, batch_size) current_sketch_dirs = get_batch_paths( sketch_dirs, i * batch_size, batch_size) content_pre_list = read_and_resize_batch_images( current_content_dirs, None, None) image_sketches = read_and_resize_bw_mask_images( current_sketch_dirs, None, None, len(current_sketch_dirs), 1) # if input_mode == 'sketch': # image_sketches = sketches_util.image_to_sketch(content_pre_list) # image_sketches = np.expand_dims(image_sketches, axis=3) # elif input_mode == 'bw': # content_image_lab = colorful_img_network_util.rgb_to_lab(content_pre_list) # image_sketches = content_image_lab[...,0:1] # # image_sketches = np.expand_dims(rgb2gray(content_pre_list), axis=3) # else: # raise AssertionError('Input mode error.') # Do some processing... image_sketches, content_pre_list = sketches_util.generate_training_batch( image_sketches, content_pre_list, train=True) if generator_network == 'lnet': feed_dict = { color_expected_output: image_sketches[..., :1] } else: feed_dict = {color_expected_output: content_pre_list} if use_hint: # image_hint = sketches_util.generate_hint_from_image(content_pre_list) # feed_dict[input_hint] = image_hint image_hint = image_sketches[..., 1:] feed_dict[input_hint] = image_hint image_sketches = image_sketches[..., :1] if input_mode == 'color': feed_dict[input_images] = content_pre_list else: feed_dict[input_images] = image_sketches last_step = (i == iterations - 1) if use_adversarial_net: # adv_feed_dict = {input_images:image_sketches, adv_net_input: content_pre_list} # if use_hint: # adv_feed_dict[input_hint] = image_hint adv_feed_dict = copy.copy(feed_dict) adv_feed_dict[adv_net_input] = content_pre_list # TEST printing before training print_progress(i, feed_dict=feed_dict, adv_feed_dict=adv_feed_dict, start_time=start_time, total_iterations=iterations, last=last_step) # Update D network _, summary_str = sess.run([adv_train_step, adv_sum], feed_dict=adv_feed_dict) summary_writer.add_summary(summary_str, i) # Update G network _, summary_str = sess.run( [generator_both_train, g_sum], feed_dict=adv_feed_dict) summary_writer.add_summary(summary_str, i) else: adv_feed_dict = None print_progress(i, feed_dict=feed_dict, adv_feed_dict=adv_feed_dict, start_time=start_time, total_iterations=iterations, last=last_step) _, summary_str = sess.run( [generator_train_step, g_sum], feed_dict=feed_dict) summary_writer.add_summary(summary_str, i) # TEST printing after training print_progress(i, feed_dict=feed_dict, adv_feed_dict=adv_feed_dict, start_time=start_time, total_iterations=iterations, last=last_step) if (checkpoint_iterations and i % checkpoint_iterations == 0) or last_step: saver.save(sess, save_dir + 'model.ckpt', global_step=i) print('Checkpoint saved.') if test_img_dir is not None: test_image = imread(test_img_dir) test_image_shape = test_image.shape # The for loop will run once and terminate. Can't use return and yield in the same function so this is a hacky way to do it. # Set use_cpu = true to save graphical memory for _, generated_image in color_sketches_net( test_image_shape[0], test_image_shape[1], iterations, 1, content_weight, tv_weight, learning_rate, generator_network=generator_network, use_adversarial_net= False, # use_adversarial_net=use_adversarial_net, use_hint=use_hint, save_dir=save_dir, do_restore_and_generate=True, do_restore_and_train=False, from_screenshot=False, from_webcam=False, test_img_dir=test_img_dir, test_img_hint=test_img_hint, input_mode=input_mode, output_mode=output_mode, use_cpu=use_cpu): pass best_image = generated_image # Because we now have batch, choose the first one in the batch as our sample image. yield ((None if last_step else i), None if test_img_dir is None else best_image)
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list if not tf.gfile.Exists(FLAGS.checkpoint_path): tf.gfile.MkDir(FLAGS.checkpoint_path) else: if not FLAGS.restore: tf.gfile.DeleteRecursively(FLAGS.checkpoint_path) tf.gfile.MkDir(FLAGS.checkpoint_path) input_images = tf.placeholder(tf.float32, shape=[None, None, None, 39], name='input_images') input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps') if FLAGS.geometry == 'RBOX': input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps') else: input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps') input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks') input_labels = tf.placeholder(tf.float32, shape=[None, None, 4, 2], name='input_labels') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True) # add summary tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # split input_images_split = tf.split(input_images, len(gpus)) input_score_maps_split = tf.split(input_score_maps, len(gpus)) input_geo_maps_split = tf.split(input_geo_maps, len(gpus)) input_training_masks_split = tf.split(input_training_masks, len(gpus)) input_labels_split = tf.split(input_labels, len(gpus)) tower_grads = [] reuse_variables = None for i, gpu_id in enumerate(gpus): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: iis = input_images_split[i] isms = input_score_maps_split[i] igms = input_geo_maps_split[i] itms = input_training_masks_split[i] il = input_labels_split[i] total_loss, model_loss, f_score, f_geometry, _ = tower_loss(iis, isms, igms, itms, il, reuse_variables) #f_score, f_geometry = i_am_testing(iis) batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) #print "below..." #batch_norm_updates_op = tf.group(*[op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) if 'resnet_v1_50/block4' in op.name or 'resnet_v1_50/block3' in op.name or 'feature_fusion' in op.name]) #print "above..." reuse_variables = True #print "below.." train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block1' in var.name] #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block4' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_7' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_8' in var.name] #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_9' in var.name] #print train_var #print "above..." train_var += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='feature_fusion') grads = opt.compute_gradients(total_loss, var_list=train_var) tower_grads.append(grads) grads = average_gradients(tower_grads) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay, global_step) #train_var = [var for var in tf.trainable_variables() if ('resnet_v1_50/block3' in var.name or 'resnet_v1_50/block4' in var.name or 'feature_fusion' in var.name)] variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') ##################################################################################################################### # BLOCK MODIFIED BY ME #variables = slim.get_variables_to_restore() #var_list = [] #for v in variables: # if len(v.name.split('/')) == 1: # var_list.append(v) # elif v.name.split('/')[1] != "myconv1" or not v.name.find('custom_filter'): # var_list.append(v) # else: # pass #saver = tf.train.Saver(var_list) saver = tf.train.Saver(tf.global_variables()) saver_restore_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) # removing the first conv layer #del saver_restore_vars[1] #saver_to_restore = tf.train.Saver(saver_restore_vars) ##################################################################################################################### summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph()) init = tf.global_variables_initializer() #print '>> trainable variables: ',slim.get_trainable_variables() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) #my_char_l = "5" #my_char_U = "" data_size = 0 train_data_indices = [] list_of_img_pos = [] with open('./cropped_annotations_5.txt', 'r') as f: annotation_file = f.readlines() #with open('Data/cropped_annotations_new/cropped_annotations' + my_char_U + '.txt', 'r') as f: # annotation_file += f.readlines() idx = 0 for line in annotation_file: if len(line)>1 and line[:13] == './cropped_img':# and str(line[14:27]) in training_list: data_size +=1 train_data_indices.append(idx) list_of_img_pos.append(line[14:].split(".")[0]+".tiff") idx += 1 list_of_img_all = os.listdir('./cropped_img') list_of_img_neg = np.array(list(set(list_of_img_all) - set(list_of_img_pos))) #print "Char model: " + my_char_U + my_char_l #print "Data size: " + str(data_size) epoch_size = data_size / (16 * 2) #print epoch_size print "This many steps per epoch: " + str(epoch_size) #list_of_img_neg_char = os.listdir('Data/j') with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: if FLAGS.restore: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) print '>> Checkpoint path: ', FLAGS.checkpoint_path print '>> second stuff: ', os.path.basename(ckpt_state.model_checkpoint_path) #all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)[1] var1 = saver_restore_vars[1] del saver_restore_vars[1] var2 = saver_restore_vars[422] del saver_restore_vars[422] #names = [var.name for var in saver_restore_vars] saver_to_restore = tf.train.Saver(saver_restore_vars) #print '>> global vars: ', names.index('resnet_v1_50/conv1/weights/ExponentialMovingAverage:0')#[var.name for var in tf.global_variables()] model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # originally saver.restore(sess, model_path) saver_to_restore.restore(sess, model_path) init_new_vars_op = tf.initialize_variables([var1, var2]) sess.run(init_new_vars_op) else: sess.run(init) if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) #print "below:" #tvars = tf.trainable_variables() #g_vars = [var for var in tvars if 'resnet_v1_50/block4' in var.name] #print g_vars #print tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50') #return print FLAGS.learning_rate print reg_constant for step in range(24*epoch_size): ### Generate Dwata ### data = [], [], [], [], [] np.random.shuffle(train_data_indices) num_im = 0 actual_num_im = 0 list_of_chars = list(string.ascii_lowercase)+[str(x) for x in range(10)] while len(data[0]) < 32: prob = 1#np.random.random(1)[0] if prob > 0.49: i = train_data_indices[num_im] im_fn = "./cropped_img/"+annotation_file[i][14:].split(".tiff",1)[0]+".tiff" #print im_fn im = cv2.imread(im_fn) ################################################################################ # adding rest of the channels for ids_c in range(len(list_of_chars)): crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_cropped/'+list_of_chars[ids_c]+'/' filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff" pad = cv2.imread(filename) pad = pad[:,:,0] pad = np.expand_dims(pad, axis=2) im = np.append(im, pad, axis = 2) ################################################################################ ################################################################################ if im is not None: r, c, _ = im.shape text_polys = [] text_tags = [] if int(annotation_file[i+1]) > 0: for idx in range(i+2,i+2+int(annotation_file[i+1])): annotation_data = annotation_file[idx] annotation_data = annotation_data.split(" ") x, y = float(annotation_data[0]), float(annotation_data[1]) w, h = float(annotation_data[2]), float(annotation_data[3]) text_polys.append([list([int(x),int(y-h)]),list([int(x+w),int(y-h)]),list([int(x+w),int(y)]),list([int(x),int(y)])]) text_tags.append(False) score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array(text_polys), np.array(text_tags)) data[0].append(im[:, :, ::-1].astype(np.float32)) data[1].append(im_fn) data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32)) data[3].append(geo_map[::4, ::4, :].astype(np.float32)) data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32)) actual_num_im += 1 num_im += 1 else: im_fn = np.random.choice(list_of_img_neg) ################################################################################ # adding rest of the channels #for i in range(len(list_of_chars)): crop_dir = '/mnt/nfs/work1/elm/ray/evaluation/EAST_single_cropped/' filename = crop_dir+annotation_file[i][14:].split(".tiff",1)[0]+".tiff" pad = cv2.imread(filename) pad = pad[:,:,0] pad = np.expand_dims(pad, axis=2) im = np.append(im, pad, axis = 2) ################################################################################ # im_fn = np.random.choice(list_of_img_neg_char) # im_mini = cv2.imread("Data/j/" + im_fn) # r0, c0, _ = im_mini.shape # im = np.zeros((512, 512, 3), dtype=np.uint8) # ra, rb, ca, cb = 256-r0/2, 256+(r0+1)/2, 256-c0/2, 256+(c0+1)/2 # im[ra:rb, ca:cb, :] = im_mini.copy() if im is not None: r, c, _ = im.shape score_map, geo_map, training_mask = icdar.generate_rbox((int(r), int(c)), np.array([]), np.array([])) data[0].append(im[:, :, ::-1].astype(np.float32)) data[1].append(im_fn) data[2].append(score_map[::4, ::4, np.newaxis].astype(np.float32)) data[3].append(geo_map[::4, ::4, :].astype(np.float32)) data[4].append(training_mask[::4, ::4, np.newaxis].astype(np.float32)) ### Run model ### ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) epoch = step / epoch_size batch_num = step % epoch_size if step % (epoch_size/3) == 0: print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(ml) print "Epoch no.: " + str(epoch) + " batch no.: " + str(batch_num) + " loss: " + str(tl) if step % (epoch_size/2) == 0: #print "Epoche: " + str(step / (epoch_size/2)) saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step) _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0], input_score_maps: data[2], input_geo_maps: data[3], input_training_masks: data[4]}) summary_writer.add_summary(summary_str, global_step=step) if False: count_right = 0 count_wrong = 0 count_posNotDetected = 0 im0 = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1] w, h, _ = im0.shape slide_window = 300 crop_size = 512 crop_center = (256, 256) num_rows, num_cols = int(np.ceil(w/slide_window)), int(np.ceil(h/slide_window)) print num_cols for rot in [-90.0, -60.0, -30.0, 0.0, 30.0, 60.0, 90.0]: im = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1] boxes_one_rot = [] count = 0 while count < num_rows * num_cols: images, data2, data3, data4 = [], [], [], [] for k in range(16): i = (count + k) / num_rows j = (count + k) % num_cols temp = im[slide_window*i:slide_window*i+crop_size, \ slide_window*j:slide_window*j+crop_size, ::-1] w2, h2, _ = temp.shape if w2 < crop_size or h2 < crop_size: result = np.zeros((crop_size,crop_size,3)) result[:w2,:h2] = temp temp = result M = cv2.getRotationMatrix2D(crop_center,rot,1.0) temp = cv2.warpAffine(temp, M, (crop_size, crop_size)) images.append(temp) score_map, geo_map, training_mask = icdar.generate_rbox((int(crop_size), int(crop_size)), np.array([]), np.array([])) data2.append(score_map[::4, ::4, np.newaxis].astype(np.float32)) data3.append(geo_map[::4, ::4, :].astype(np.float32)) data4.append(training_mask[::4, ::4, np.newaxis].astype(np.float32)) score, geometry = sess.run([f_score, f_geometry], feed_dict={input_images: images, input_score_maps:data2, input_geo_maps: data3, input_training_masks: data4}) for k in range(16): i = (count + k) / num_rows j = (count + k) % num_cols boxes = detect(score_map=score[j], geo_map=geometry[j], score_map_thresh=0.01, box_thresh=0.01, nms_thres=0.01) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) for box in boxes: M_inv = cv2.getRotationMatrix2D(crop_center,-1*rot,1) box[0] = M_inv.dot(np.array((box[0,0], box[0,1]) + (1,))) box[1] = M_inv.dot(np.array((box[1,0], box[1,1]) + (1,))) box[2] = M_inv.dot(np.array((box[2,0], box[2,1]) + (1,))) box[3] = M_inv.dot(np.array((box[3,0], box[3,1]) + (1,))) box = sort_poly(box.astype(np.int32)) box[0,0] = box[0,0] + j * slide_window box[0,1] = box[0,1] + i * slide_window box[1,0] = box[1,0] + j * slide_window box[1,1] = box[1,1] + i * slide_window box[2,0] = box[2,0] + j * slide_window box[2,1] = box[2,1] + i * slide_window box[3,0] = box[3,0] + j * slide_window box[3,1] = box[3,1] + i * slide_window boxes_one_rot.append(box) boxes_single_rot = np.zeros((len(boxes_one_rot), 9)) boxes_single_rot[:, :8] = np.array(boxes_one_rot).reshape((-1, 8)) boxes_single_rot[:, 8] = 1 labels += boxes_single_rot.tolist() boxes = lanms.merge_quadrangle_n9(np.array(labels), nms_thres) annotation = np.load("/mnt/nfs/work1/elm/ray/new_char_anots_ncs/" + "j" + "/" + "D0117-5755036" + ".npy").item() ### Compute the TP, FP, FN info for each image count_right_cache = 0 boxes = boxes[:, :8].reshape((-1, 4, 2)) num_true_pos = len(annotation) for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: continue k = 0 idx = 0 count_wrong += 1 while (idx < num_true_pos): if k in annotation: proposed_label = annotation[k]['vertices'] if len(proposed_label) == 4: x3, y3, x2, y2, x1, y1, x0, y0 = proposed_label[0][0], proposed_label[0][1], proposed_label[1][0], proposed_label[1][1], \ proposed_label[2][0], proposed_label[2][1], proposed_label[3][0], proposed_label[3][1] if (checkIOU(box, [[x0,y0],[x1,y1],[x2,y2],[x3,y3]]) == True): count_right_cache += 1 count_wrong -= 1 break idx += 1 k += 1 count_posNotDetected += num_true_pos - count_right_cache count_right += count_right_cache precision = (float) (count_right) / (float) (count_right + count_wrong) # TP / TP + FP recall = (float) (count_right) / (float) (count_right + count_posNotDetected) # TP / TP + FN fscore = 2 * (precision * recall) / (precision + recall) print "Precision, recall, fscore: " + str(precision) + ", " + str(recall) + ", " + str(fscore)
def main(_): nH, nW, nD = 28, 28, 1 nC = 10 nB = FLAGS.B nT = 10 with tf.device("/gpu:"+ str(FLAGS.gpu)): n_hidden = 10 n_sequence = 4 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev = 0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def RNN(x): x_list = [] for i in range(n_sequence): x_list.append(x) with tf.variable_scope('rnn'): # Define a lstm cell with tensorflow lstm_cell = rnn_cell.BasicLSTMCell(n_hidden) # Get lstm cell output outputs, states = rnn.rnn(lstm_cell, x_list, dtype=tf.float32) logits = [] for i in range(len(outputs)): with tf.variable_scope('linear'+str(i)): logits.append(linear(outputs[i], n_hidden, 2)) return logits def Hash(y): with tf.variable_scope('hash'): y1 = relu(linear(y, 10, 100)) z_logits = [] for i in range(n_sequence): with tf.variable_scope('linear'+str(i)): z_logits.append(linear(y1, 100, 2)) return z_logits def InvHash(z_logits): with tf.variable_scope('invhash'): z_concat = tf.concat(1, z_logits) with tf.variable_scope('linear1'): y1 = relu(linear(z_concat, 2*n_sequence, 100)) with tf.variable_scope('linear2'): y__logit = linear(y1, 100, 10) return y__logit def match_all(y, y_): # Evaluate model num_correct_pred = 0 for i in range(n_sequence): num_correct_pred += tf.cast(tf.equal(tf.argmax(y_[i],1), tf.argmax(y[i],1)), tf.int32) correct_pred = tf.equal(num_correct_pred, tf.constant(n_sequence,dtype=tf.int32)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) return accuracy def TestHash(): v = np.zeros((10,10)) for i in range(10): v[i][i] = 1.0 output = sess.run(z, feed_dict = {'y:0': v}) s_list = [] for i in range(10): s = '' for j in range(n_sequence): if output[j][i][0] > output[j][i][1]: s += '0' else: s += '1' s_list.append(s) ok = True for i in range(10): for j in range(i+1,10): if s_list[i] == s_list[j]: ok = False if ok: print("Hash One to One") else: print("Hash not one one", s_list) sess = create_session() saver = tf.train.import_meta_graph('saved/model-45000.meta') saver.restore(sess, tf.train.latest_checkpoint('saved/')) g = tf.get_default_graph() prev_vars = tf.all_variables() x = g.get_tensor_by_name('x:0') y = g.get_tensor_by_name('y:0') print x,y is_training = g.get_tensor_by_name('is_training:0') y_pred = g.get_tensor_by_name('cnn_train/y_pred_logits:0') y_pred = tf.stop_gradient(y_pred) features = g.get_tensor_by_name('cnn_train/features:0') features = tf.stop_gradient(features) accuracy = g.get_tensor_by_name('cnn_train/accuracy:0') #---------------------------------------------------------------------------------------------# z_logits = RNN(features) zlogits = Hash(y) z_ = [] for i in range(n_sequence): z_.append(tf.nn.softmax(z_logits[i])) z = [] for i in range(n_sequence): z.append(softmax(zlogits[i])) y__logits = InvHash(z) eq_check_3 = cross_entropy(y__logits, y) tf.summary.scalar('inverse_loss', eq_check_3) l2 = 0 for i in range(n_sequence): l2 += tf.nn.l2_loss(z[i]) eq_check_2 = sum_cross_entropy(z_logits, z) tf.summary.scalar('rnn_classification_loss', eq_check_2) tf.summary.scalar('l2_z', l2) total_loss = eq_check_2 + 100*eq_check_3 + 0.01*l2 tf.summary.scalar('loss', total_loss) rnn_acc = match_all(z_, z) tf.summary.scalar('rnn_acc', rnn_acc) learning_rate = 0.0001 with tf.variable_scope('optimizer'): optimizer= minimize(total_loss, { 'learning rate' : learning_rate}, algo='adam') sess.run(tf.initialize_variables(list(set(tf.all_variables()) - set(prev_vars)) )) writer = tf.summary.FileWriter('logs', graph = sess.graph) summary_op = tf.summary.merge_all() n_epoch = 100 n_batch = 200 n_display = 10000 for e in range(n_epoch): for i in range(0, train.shape[0], nB): batch = next_batch(nB) feed_dict = { 'x:0': batch['data'], 'y:0': batch['labels'], 'is_training:0': True } a = sess.run([optimizer,summary_op], feed_dict = feed_dict) writer.add_summary(a[-1], e*50000 + i) if i % 4000 == 0: TestHash() writer.flush()
def meta_loss(self, make_loss, len_unroll, net_assignments=None, second_derivatives=False): """Returns an operator computing the meta-loss. Args: make_loss: Callable which returns the optimizee loss; note that this should create its ops in the default graph. len_unroll: Number of steps to unroll. net_assignments: variable to optimizer mapping. If not None, it should be a list of (k, names) tuples, where k is a valid key in the kwargs passed at at construction time and names is a list of variable names. second_derivatives: Use second derivatives (default is false). Returns: namedtuple containing (loss, update, reset, fx, x) """ # Construct an instance of the problem only to grab the variables. This # loss will never be evaluated. x, constants = _get_variables(make_loss) print("Optimizee variables") print([op.name for op in x]) print("Problem variables") print([op.name for op in constants]) # Create the optimizer networks and find the subsets of variables to assign # to each optimizer. nets, net_keys, subsets = _make_nets(x, self._config, net_assignments) # Store the networks so we can save them later. self._nets = nets # Create hidden state for each subset of variables. state = [] with tf.name_scope("states"): for i, (subset, key) in enumerate(zip(subsets, net_keys)): net = nets[key] with tf.name_scope("state_{}".format(i)): state.append( _nested_variable([ net.initial_state_for_inputs(x[j], dtype=tf.float32) for j in subset ], name="state", trainable=False)) def update(net, fx, x, state): """Parameter and RNN state update.""" with tf.name_scope("gradients"): gradients = tf.gradients(fx, x) # Stopping the gradient here corresponds to what was done in the # original L2L NIPS submission. However it looks like things like # BatchNorm, etc. don't support second-derivatives so we still need # this term. if not second_derivatives: gradients = [tf.stop_gradient(g) for g in gradients] with tf.name_scope("deltas"): deltas, state_next = zip( *[net(g, s) for g, s in zip(gradients, state)]) state_next = list(state_next) return deltas, state_next def time_step(t, fx_array, x, state): """While loop body.""" x_next = list(x) state_next = [] with tf.name_scope("fx"): fx = _make_with_custom_variables(make_loss, x) fx_array = fx_array.write(t, fx) with tf.name_scope("dx"): for subset, key, s_i in zip(subsets, net_keys, state): x_i = [x[j] for j in subset] deltas, s_i_next = update(nets[key], fx, x_i, s_i) for idx, j in enumerate(subset): x_next[j] += deltas[idx] state_next.append(s_i_next) with tf.name_scope("t_next"): t_next = t + 1 return t_next, fx_array, x_next, state_next # Define the while loop. fx_array = tf.TensorArray(tf.float32, size=len_unroll + 1, clear_after_read=False) _, fx_array, x_final, s_final = tf.while_loop( cond=lambda t, *_: t < len_unroll, body=time_step, loop_vars=(0, fx_array, x, state), parallel_iterations=1, swap_memory=True, name="unroll") with tf.name_scope("fx"): fx_final = _make_with_custom_variables(make_loss, x_final) fx_array = fx_array.write(len_unroll, fx_final) loss = tf.reduce_sum(fx_array.pack(), name="loss") # Reset the state; should be called at the beginning of an epoch. with tf.name_scope("reset"): variables = (nest.flatten(state) + x + constants) # Empty array as part of the reset process. reset = [tf.initialize_variables(variables), fx_array.close()] # Operator to update the parameters and the RNN state after our loop, but # during an epoch. with tf.name_scope("update"): update = (nest.flatten(_nested_assign(x, x_final)) + nest.flatten(_nested_assign(state, s_final))) # Log internal variables. for k, net in nets.iteritems(): print("Optimizer '{}' variables".format(k)) print([op.name for op in nn.get_variables_in_module(net)]) return MetaLoss(loss, update, reset, fx_final, x_final)
def train_rnn(data_folder, model_file): y, rnn_state = RNN(x) print "Loading training pickles.." # We want to keep the sentences in order to train per sentence # Sentences are padded to num_steps train_set = import_data.load_dataset(data_folder + '/train_data.pickle', data_folder + '/train_labels.pickle', keep_sentences=True, context_frames=1, seq_length=num_steps, batch_size=train_batch_size) print "Loading done" global sess global summary_op global train_writer global saver saver = tf.train.Saver() # Create the dir for the model if not os.path.isdir('%s/models/%s' % (save_loc, start_date)): try: os.makedirs('%s/models/%s' % (save_loc, start_date)) except OSError: if not os.path.isdir('%s/models/%s' % (save_loc, start_date)): raise sess = tf.InteractiveSession() summary_op = tf.merge_all_summaries() train_writer = tf.train.SummaryWriter( '%s/summaries/%s' % (save_loc, start_date), sess.graph) # Cost function cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y, true_labels)) # Optimizer # For gradient descend, learning rate = 0.002 (see Hinton et al.) # For AdamOptimizer, learning rate = 0.001 (default) if (optimizer_name == 'Adam'): # Hacky solution for always making sure that the beta2_power var # is always initialized temp = set(tf.all_variables()) optimizer = tf.train.AdamOptimizer().minimize(cost) sess.run(tf.initialize_variables(set(tf.all_variables()) - temp)) else: optimizer = tf.train.GradientDescentOptimizer(0.02).minimize(cost) if model_file: saver.restore(sess, model_file) print "Model restored" else: # Initialization init_op = tf.initialize_all_variables() sess.run(init_op) print("Training network. Date: %s" % start_date) train(train_set, y, rnn_state, cost, optimizer) save_path = saver.save(sess, "%s/models/%s/model.ckpt" % (save_loc, start_date)) print("Model saved in file: %s" % save_path) print("Summaries written to %s/summaries/%s" % (save_loc, start_date)) evaluate_rnn(data_folder, y, rnn_state)
# DiscreteDeepQ object current_controller = DiscreteDeepQ(input_size, num_actions, brain, optimizer, session, discount_rate=0.95, target_network_update_rate=0.005, exploration_period=5000, max_experience=10000, store_every_nth=4, train_every_nth=4, summary_writer=journalist) init_all_vars_op = tf.initialize_variables(tf.all_variables(), name='init_all_vars_op') session.run(tf.initialize_all_variables()) #for saving graph state, trainable variable values for variable in tf.trainable_variables(): tf.identity(variable, name="readVariable") tf.assign(variable, tf.placeholder(tf.float32, variable.get_shape(), name="variableValue"), name="resoreVariable") tf.train.write_graph(session.graph_def, 'models/', 'graph-separated-1d.pb',