def __init__(self, dataset_path_or_instance, layers, epochs, starter_learning_rate, noise_std, train_ratio=0.8, test_ratio=0.1, validation_ratio=0.1): super(MultilayerPerceptron, self).__init__(dataset_path_or_instance, epochs, starter_learning_rate, train_ratio, test_ratio, validation_ratio) self._noise_std = noise_std self._layers = layers self._layers.insert(0, self._input_size) self._layers.append(self._output_size) self._L = len(self._layers) - 1 # size of layers ignoring input layer # build network and return cost function self._cost = self.__build_network__() # define the y function as the classification function self._y = self.__build_classifier__() # loss self._loss = -tf.reduce_mean(tf.reduce_sum(self._outputs*tf.log(self._cost), 1)) # y_true and y_pred used to get the metrics self._y_true = tf.argmax(self._outputs, 1) self._y_pred = tf.argmax(self._y, 1) # train_step for the weight parameters, optimized with Adam self._learning_rate = tf.Variable(self._starter_learning_rate, trainable=False) self._train_step = tf.train.AdamOptimizer(self._learning_rate).minimize(self._loss) # add the updates of batch normalization statistics to train_step bn_updates = tf.group(*self._bn_assigns) with tf.control_dependencies([self._train_step]): self._train_step = tf.group(bn_updates)
def build_rmsprop_optimizer(self, learning_rate, rmsprop_decay, rmsprop_constant, gradient_clip, version): with tf.name_scope('rmsprop'): optimizer = tf.train.GradientDescentOptimizer(learning_rate) grads_and_vars = optimizer.compute_gradients(self.loss) grads = [gv[0] for gv in grads_and_vars] params = [gv[1] for gv in grads_and_vars] if gradient_clip > 0: grads = tf.clip_by_global_norm(grads, gradient_clip) if version == 'rmsprop': return optimizer.apply_gradients(zip(grads, params)) elif version == 'graves_rmsprop': square_grads = [tf.square(grad) for grad in grads] avg_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params] avg_square_grads = [tf.Variable(tf.ones(var.get_shape())) for var in params] update_avg_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * grad_pair[1])) for grad_pair in zip(avg_grads, grads)] update_avg_square_grads = [grad_pair[0].assign((rmsprop_decay * grad_pair[0]) + ((1 - rmsprop_decay) * tf.square(grad_pair[1]))) for grad_pair in zip(avg_square_grads, grads)] avg_grad_updates = update_avg_grads + update_avg_square_grads rms = [tf.sqrt(avg_grad_pair[1] - tf.square(avg_grad_pair[0]) + rmsprop_constant) for avg_grad_pair in zip(avg_grads, avg_square_grads)] rms_updates = [grad_rms_pair[0] / grad_rms_pair[1] for grad_rms_pair in zip(grads, rms)] train = optimizer.apply_gradients(zip(rms_updates, params)) return tf.group(train, tf.group(*avg_grad_updates))
def nabla(self,cost,n,c=1,q=0.001,a=0.001,A=100,alpha=0.602,gamma=0.101): cn=(c+0.0)/(n+A)**gamma an=a/(n+1+A)**alpha qk=math.sqrt(q/(n+A)*math.log(math.log(n+A))) wk=normal() dv=[] sess=self.sess g=[] orig=self.var for m in self.var: shape=m.shape nm=np.ones(shape=shape) for x in np.nditer(nm, op_flags=['readwrite']): x[...]=dist.bernoulli() * 2 * cn dv.append(nm) del l=[:] for m,d,t in zip(self.var,dv,self.var_t): l.append(t.assign(m+d)) sess.run(tf.group(*l)) f1=sess.run(cost,self.feed) del l=[:] for m,d,t in zip(self.var,dv,self.var_t): l.append(t.assign(m-d)) sess.run(tf.group(*l)) f0=sess.run(cost,self.feed) df=f1-f0 for m in dv: for x in np.nditer(m, op_flags=['readwrite']): x[...]=-(df+0.0)/x/2 return dv
def solve(global_step): """add solver to losses""" # learning reate lr = _configure_learning_rate(82783, global_step) optimizer = _configure_optimizer(lr) tf.summary.scalar('learning_rate', lr) # compute and apply gradient losses = tf.get_collection(tf.GraphKeys.LOSSES) regular_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) regular_loss = tf.add_n(regular_losses) out_loss = tf.add_n(losses) total_loss = tf.add_n(losses + regular_losses) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('out_loss', out_loss) tf.summary.scalar('regular_loss', regular_loss) update_ops = [] variables_to_train = _get_variables_to_train() # update_op = optimizer.minimize(total_loss) gradients = optimizer.compute_gradients(total_loss, var_list=variables_to_train) grad_updates = optimizer.apply_gradients(gradients, global_step=global_step) update_ops.append(grad_updates) # update moving mean and variance if FLAGS.update_bn: update_bns = tf.get_collection(tf.GraphKeys.UPDATE_OPS) update_bn = tf.group(*update_bns) update_ops.append(update_bn) return tf.group(*update_ops)
def central_step(): # restore v1, slots op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(restored_vars, tmp_vars)]) with tf.get_default_graph().control_dependencies([op5]): back = tf.group(*[tf.assign_sub(v, -self._lr_t*grad) for grad,v in grads_and_vars]) with tf.get_default_graph().control_dependencies([back]): return tf.gradients(self.gan.trainer.d_loss, d_vars) + tf.gradients(self.gan.trainer.g_loss, g_vars)
def optimize(self, learning_rate, train_layers,global_step,source_centroid,target_centroid): print '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' print train_layers var_list = [v for v in tf.trainable_variables() if v.name.split('/')[1] in train_layers+['fc9']] finetune_list=[v for v in var_list if v.name.split('/')[1] in ['conv1','conv2','conv3','conv4','conv5','fc6','fc7']] new_list=[v for v in var_list if v.name.split('/')[1] in ['fc8','fc9']] self.Gregloss=0.0005*tf.reduce_mean([tf.nn.l2_loss(x) for x in var_list if 'weights' in x.name]) finetune_weights=[v for v in finetune_list if 'weights' in v.name] finetune_biases=[v for v in finetune_list if 'biases' in v.name] new_weights=[v for v in new_list if 'weights' in v.name] new_biases=[v for v in new_list if 'biases' in v.name] print '==============finetune_weights=======================' print finetune_weights print '==============finetune_biases=======================' print finetune_biases print '==============new_weights=======================' print new_weights print '==============new_biases=======================' print new_biases self.F_loss=self.loss+self.Gregloss+global_step*self.G_loss+global_step*self.Semanticloss train_op1=tf.train.MomentumOptimizer(learning_rate*0.1,0.9).minimize(self.F_loss, var_list=finetune_weights) train_op2=tf.train.MomentumOptimizer(learning_rate*0.2,0.9).minimize(self.F_loss, var_list=finetune_biases) train_op3=tf.train.MomentumOptimizer(learning_rate*1.0,0.9).minimize(self.F_loss, var_list=new_weights) train_op4=tf.train.MomentumOptimizer(learning_rate*2.0,0.9).minimize(self.F_loss, var_list=new_biases) train_op=tf.group(train_op1,train_op2,train_op3,train_op4) with tf.control_dependencies([train_op1,train_op2,train_op3,train_op4]): update_sc=self.source_moving_centroid.assign(source_centroid) update_tc=self.target_moving_centroid.assign(target_centroid) return tf.group(update_sc,update_tc)
def optimize(self, learning_rate, train_layers,global_step,source_centroid,target_centroid): print '+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' print train_layers var_list=[v for v in tf.trainable_variables() if v.name.split('/')[1] in ['conv1','conv2','fc1','fc2']] self.Gregloss=5e-4*tf.reduce_mean([tf.nn.l2_loss(x) for x in var_list if 'weights' in x.name]) new_weights=[v for v in var_list if 'weights' in v.name or 'gamma' in v.name] new_biases=[v for v in var_list if 'biases' in v.name or 'beta' in v.name] print '==============new_weights=======================' print new_weights print '==============new_biases=======================' print new_biases self.F_loss=self.loss+self.Gregloss+global_step*self.Semanticloss+global_step*self.G_loss update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print '+++++++++++++++ batch norm update ops +++++++++++++++++' print update_ops with tf.control_dependencies(update_ops): train_op3=tf.train.MomentumOptimizer(learning_rate*1.0,0.9).minimize(self.F_loss, var_list=new_weights) train_op4=tf.train.MomentumOptimizer(learning_rate*2.0,0.9).minimize(self.F_loss, var_list=new_biases) train_op=tf.group(train_op3,train_op4) with tf.control_dependencies([train_op3,train_op4]): update_sc=self.source_moving_centroid.assign(source_centroid) update_tc=self.target_moving_centroid.assign(target_centroid) return tf.group(update_sc,update_tc)
def init_gradients(self, loss, var_train): if self.play_mode: return with tf.device(self.args.device): var_refs = [v.ref() for v in var_train] train_gradients = tf.gradients( loss, var_refs, gate_gradients=False, aggregation_method=None, colocate_gradients_with_ops=False) acc_gradient_list = [] train_step_list = [] new_grad_vars = [] self.grad_list = [] var_list = [] for grad, var in zip(train_gradients, self.global_vars): acc_gradient = tf.Variable(tf.zeros(grad.get_shape()), trainable=False) acc_gradient_list.append(acc_gradient) train_step_list.append(acc_gradient.assign_add(grad)) new_grad_vars.append((tf.convert_to_tensor(acc_gradient, dtype=tf.float32), var)) self.grad_list.append(acc_gradient) var_list.append(var) self.train_step = tf.group(*train_step_list) self.reset_acc_gradients = tf.initialize_variables(acc_gradient_list) self.apply_grads = self.global_optimizer.apply_gradients(new_grad_vars) sync_list = [] for i in range(0, len(self.global_vars)): sync_list.append(var_train[i].assign(self.global_vars[i])) self.sync = tf.group(*sync_list)
def running_mean(cost, tag_name, batch_size=1): with tf.name_scope("running_mean_" + tag_name): with tf.variable_scope(tag_name): cost_sum = tf.get_variable( "cost_sum", initializer=tf.zeros_initializer, dtype=tf.float64, shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES], trainable=False) batches = tf.get_variable( "cost_num_batches", initializer=tf.zeros_initializer, dtype=tf.int32, shape=(), collections=[tf.GraphKeys.LOCAL_VARIABLES], trainable=False) cost_add = tf.assign_add(cost_sum, tf.cast(cost, dtype=tf.float64)) batches_add = tf.assign_add(batches, batch_size) update_cost_mean = tf.group(cost_add, batches_add) reset_batches = tf.assign(batches, 0) reset_cost_sum = tf.assign(cost_sum, 0.0) reset_cost_mean = tf.group(reset_batches, reset_cost_sum) mean_cost = tf.divide( cost_sum, tf.cast(batches, dtype=tf.float64)) train_loss_summary = tf.summary.scalar(tag_name, mean_cost) return reset_cost_mean, update_cost_mean, train_loss_summary
def testSummariesAreFlushedToDiskWithoutGlobalStep(self): output_dir = os.path.join(self.get_temp_dir(), 'flush_test_no_global_step') if tf.gfile.Exists(output_dir): # For running on jenkins. tf.gfile.DeleteRecursively(output_dir) names_to_metrics, names_to_updates = self._create_names_to_metrics( self._predictions, self._labels) for k in names_to_metrics: v = names_to_metrics[k] tf.summary.scalar(k, v) summary_writer = tf.train.SummaryWriter(output_dir) initial_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) eval_op = tf.group(*names_to_updates.values()) with self.test_session() as sess: slim.evaluation.evaluation( sess, initial_op=initial_op, eval_op=eval_op, summary_op=tf.summary.merge_all(), summary_writer=summary_writer) names_to_values = {name: names_to_metrics[name].eval() for name in names_to_metrics} self._verify_summaries(output_dir, names_to_values)
def AddTraining(self, task_context, batch_size, learning_rate=0.1, decay_steps=4000, momentum=0.9, corpus_name='documents'): """Builds a trainer to minimize the cross entropy cost function. Args: task_context: file path from which to read the task context batch_size: batch size to request from reader op learning_rate: initial value of the learning rate decay_steps: decay learning rate by 0.96 every this many steps momentum: momentum parameter used when training with momentum corpus_name: name of the task input to read parses from Returns: Dictionary of named training nodes. """ with tf.name_scope('training'): nodes = self.training nodes.update(self._AddGoldReader(task_context, batch_size, corpus_name)) nodes.update(self._BuildNetwork(nodes['feature_endpoints'], return_average=False)) nodes.update(self._AddCostFunction(batch_size, nodes['gold_actions'], nodes['logits'])) # Add the optimizer if self._only_train: trainable_params = [v for k, v in self.params.iteritems() if k in self._only_train] else: trainable_params = self.params.values() lr = self._AddLearningRate(learning_rate, decay_steps) optimizer = tf.train.MomentumOptimizer(lr, momentum, use_locking=self._use_locking) train_op = optimizer.minimize(nodes['cost'], var_list=trainable_params) for param in trainable_params: slot = optimizer.get_slot(param, 'momentum') self.inits[slot.name] = state_ops.init_variable(slot, tf.zeros_initializer) self.variables[slot.name] = slot numerical_checks = [ tf.check_numerics(param, message='Parameter is not finite.') for param in trainable_params if param.dtype.base_dtype in [tf.float32, tf.float64] ] check_op = tf.group(*numerical_checks) avg_update_op = tf.group(*self._averaging.values()) train_ops = [train_op] if self._check_parameters: train_ops.append(check_op) if self._use_averaging: train_ops.append(avg_update_op) nodes['train_op'] = tf.group(*train_ops, name='train_op') return nodes
def create_variables(self, settings): self.network_names = [ 'state_encoder', 'action_decoder', 'value_decoder', ] #### CREATE ALL THE NETWORKS self.networks = { name:parse_block(settings['networks'][name]) for name in self.network_names } #### CREATE VARIABLES TO STORE GRADIENTS self.net_grads = {} for n in self.network_names: self.net_grads[n] = [ tf.Variable(tf.zeros_like(v), name=v.name.split(':')[0]+"_grad") for v in self.networks[n].variables() ] #### CREATE COMBINED NETWORK: state -> action self.action_network = SequenceWrapper( [self.networks["state_encoder"], self.networks["action_decoder"]], scope="action_network") #### CREATE COMBINED NETWORK: state -> state_value self.value_network = SequenceWrapper( [self.networks["state_encoder"], self.networks["value_decoder"]], scope="value_network") #### COMPUTE STATE VALUE AND ACTION self.state = self.networks["state_encoder"].input_placeholder() self.action_probs = self.action_network(self.state) self.action_id = tf.argmax(self.action_probs, dimension=1) self.state_value = tf.reduce_sum(self.value_network(self.state), 1) #### COMPUTE ACTOR UPDATE self.reward = tf.placeholder(tf.float32, (None,)) self.chosen_action_id = tf.placeholder(tf.int64, (None,)) self.advantage = self.reward - tf.stop_gradient(self.state_value) self.onehot = tf.constant(np.diag( np.ones((self.num_actions,), dtype=np.float32))) self.chosen_action_mask = tf.nn.embedding_lookup(self.onehot, self.chosen_action_id) self.chosen_action_prob = tf.reduce_sum(self.action_probs * self.chosen_action_mask, 1) self.actor_loss = - tf.log(self.chosen_action_prob) * self.advantage self.update_actor_grads = tf.group(*[ self.update_network_grads('state_encoder', self.actor_loss), self.update_network_grads('action_decoder', self.actor_loss), ]) #### COMPUTE VALUE NETWORK UPDATE self.value_loss = tf.square(self.reward - self.state_value) self.update_value_grads = tf.group(*[ self.update_network_grads('state_encoder', self.value_loss), self.update_network_grads('value_decoder', self.value_loss), ])
def __init__(self, target, name, do_inverses=False): self.name = name self.target = target self.do_inverses = do_inverses self.tf_svd = SvdTuple(tf.svd(target)) self.update_counter = 0 self.init = SvdTuple( ones(target.shape[0], name=name+"_s_init"), Identity(target.shape[0], name=name+"_u_init"), Identity(target.shape[0], name=name+"_v_init"), Identity(target.shape[0], name=name+"_inv_init"), ) assert self.tf_svd.s.shape == self.init.s.shape assert self.tf_svd.u.shape == self.init.u.shape assert self.tf_svd.v.shape == self.init.v.shape # assert self.tf_svd.inv.shape == self.init.inv.shape self.cached = SvdTuple( tf.Variable(self.init.s, name=name+"_s"), tf.Variable(self.init.u, name=name+"_u"), tf.Variable(self.init.v, name=name+"_v"), tf.Variable(self.init.inv, name=name+"_inv"), ) self.s = self.cached.s self.u = self.cached.u self.v = self.cached.v self.inv = self.cached.inv self.holder = SvdTuple( tf.placeholder(default_dtype, shape=self.cached.s.shape, name=name+"_s_holder"), tf.placeholder(default_dtype, shape=self.cached.u.shape, name=name+"_u_holder"), tf.placeholder(default_dtype, shape=self.cached.v.shape, name=name+"_v_holder"), tf.placeholder(default_dtype, shape=self.cached.inv.shape, name=name+"_inv_holder") ) self.update_tf_op = tf.group( self.cached.s.assign(self.tf_svd.s), self.cached.u.assign(self.tf_svd.u), self.cached.v.assign(self.tf_svd.v), self.cached.inv.assign(self.tf_svd.inv) ) self.update_external_op = tf.group( self.cached.s.assign(self.holder.s), self.cached.u.assign(self.holder.u), self.cached.v.assign(self.holder.v), ) self.update_externalinv_op = tf.group( self.cached.inv.assign(self.holder.inv), ) self.init_ops = (self.s.initializer, self.u.initializer, self.v.initializer, self.inv.initializer)
def test_mnist(self): import tensor_dynamic.data.input_data as mnist num_labeled = 100 data = mnist.read_data_sets("../data/MNIST_data", n_labeled=num_labeled, one_hot=True) batch_size = 100 num_epochs = 1 num_examples = 60000 num_iter = (num_examples/batch_size) * num_epochs starter_learning_rate = 0.02 inputs = tf.placeholder(tf.float32, shape=(None, 784)) targets = tf.placeholder(tf.float32) with tf.Session() as s: s.as_default() i = InputLayer(inputs) l1 = LadderLayer(i, 500, 1000.0, s) l2 = LadderGammaLayer(l1, 10, 10.0, s) ladder = LadderOutputLayer(l2, 0.1, s) loss = ladder.cost_all_layers_train(targets) learning_rate = tf.Variable(starter_learning_rate, trainable=False) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) bn_updates = tf.group(*(l1.bn_assigns + l2.bn_assigns)) with tf.control_dependencies([train_step]): train_step = tf.group(bn_updates) pred_cost = -tf.reduce_mean(tf.reduce_sum(targets * tf.log(tf.clip_by_value(ladder.activation_predict, 1e-10, 1.0)), 1)) # cost used for prediction correct_prediction = tf.equal(tf.argmax(ladder.activation_predict, 1), tf.argmax(targets, 1)) # no of correct predictions accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) * tf.constant(100.0) s.run(tf.initialize_all_variables()) #print "init accuracy", s.run([accuracy], feed_dict={inputs: data.test.images, targets: data.test.labels}) min_loss = 100000. writer = tf.train.SummaryWriter("/tmp/td", s.graph_def) writer.add_graph(s.graph_def) for i in range(num_iter): images, labels = data.train.next_batch(batch_size) _, loss_val = s.run([train_step, loss], feed_dict={inputs: images, targets: labels}) if loss_val < min_loss: min_loss = loss_val print(i, loss_val) # print "acc", s.run([accuracy], feed_dict={inputs: data.test.images, targets: data.test.labels}) #acc = s.run(accuracy, feed_dict={inputs: data.test.images, targets: data.test.labels}) print "min loss", min_loss #print "final accuracy ", acc self.assertLess(min_loss, 20.0)
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] with ops.init_scope(): zt = [self._get_or_make_slot(v, v, "zt", self._name) for _,v in grads_and_vars] slots_list = [] for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._get_or_make_slot(var, var, "zt", "zt") self._prepare() def _name(post, s): ss = s.split(":") return ss[0] + "_" + post + "_dontsave" zt = [self.get_slot(v, "zt") for _,v in grads_and_vars] xt = [tf.Variable(v, name=_name("gigaxt",v.name)) for _,v in grads_and_vars] tmp = [tf.Variable(v, name=_name("gigatmp",v.name)) for _,v in grads_and_vars] xslots_list = [] zslots_list = [] tmpslots_list = [] slots_vars = [] for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): slots_vars += [var] xslots_list.append(tf.Variable(var)) zslots_list.append(self._get_or_make_slot(var, var, "zt", "zt")) tmpslots_list.append(tf.Variable(var, name=_name("gigaslottmp", var.name))) restored_vars = var_list + slots_vars zt_vars = zt + zslots_list xt_vars = xt + xslots_list tmp_vars = tmp + tmpslots_list all_grads = [ g for g, _ in grads_and_vars ] # store variables for resetting op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, restored_vars)]) # store tmp_vars with tf.get_default_graph().control_dependencies([op1]): op2 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op2]): op3 = tf.group(*[tf.assign(w, v) for w,v in zip(xt_vars, restored_vars)]) # store xt^+1 in xt_vars with tf.get_default_graph().control_dependencies([op3]): op4 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, zt_vars)]) # restore vars to zt (different weights) with tf.get_default_graph().control_dependencies([op4]): op5 = self.optimizer2.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) # zt+1 with tf.get_default_graph().control_dependencies([op5]): zt1_xt1 = [_restored_vars - _xt1_vars for _restored_vars, _xt1_vars in zip(restored_vars, xt_vars)] St1 = [tf.minimum(1.0, tf.norm(_zt1_vars-_zt_vars) / tf.norm(_zt1_xt1)) for _zt1_vars, _zt_vars, _zt1_xt1 in zip(restored_vars, zt_vars, zt1_xt1)] self.gan.add_metric('st1',tf.reduce_mean(tf.add_n(St1)/len(St1))) #self.gan.add_metric('xzt1',tf.norm(xt_vars[0]-zt_vars[0])) nextw = [_xt_t1 + _St1 * _zt1_xt1 for _xt_t1, _St1, _zt1_xt1 in zip(xt_vars, St1, zt1_xt1)] op6 = tf.group(*[tf.assign(w, v) for w,v in zip(zt_vars, restored_vars)]) # set zt+1 with tf.get_default_graph().control_dependencies([op6]): op7 = tf.group(*[tf.assign(w, v) for w,v in zip(restored_vars, nextw)]) # set xt+1 with tf.get_default_graph().control_dependencies([op7]): return tf.no_op()
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] d_vars = [] g_vars = [] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise("Couldn't find var in g_vars or d_vars") w = [tf.Variable(self.config.start_at or 0.0), tf.Variable(self.config.start_at or 0.0)] Vidv = [self.gan.trainer.d_loss, self.gan.trainer.g_loss] #Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, -1/2. * self.gan.trainer.d_loss - 1/2.* self.gan.trainer.g_loss] Vsoc = [1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss, 1/2. * self.gan.trainer.d_loss + 1/2.* self.gan.trainer.g_loss] wlr = self.config.w_learn_rate or 0.01 wt1 = [w[0] + wlr * (Vidv[0] - Vsoc[0]), w[1] + wlr * (Vidv[1] - Vsoc[1])] def clamped(net): return tf.maximum(self.config.min or 0., tf.minimum(net, self.config.max or 1.)) self._prepare() wt1 = [clamped(wt1[0]),clamped(wt1[1])] self.gan.add_metric('wt0', wt1[0]) self.gan.add_metric('wt1', wt1[1]) op1 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables with tf.get_default_graph().control_dependencies([op1]): Vi = [(1. - w[0]) * Vidv[0] + w[0] * Vsoc[0], (1. - w[1]) * Vidv[1] + w[1] * Vsoc[1]] if self.config.reverse_w: Vi = [(w[0]) * Vidv[0] + (1.0-w[0]) * Vsoc[0], (w[1]) * Vidv[1] + (1.0-w[1]) * Vsoc[1]] self.gan.add_metric('w0', w[0]) self.gan.add_metric('w1', w[1]) new_grads = tf.gradients(Vi[0], d_vars) + tf.gradients(Vi[1], g_vars) self.gan.trainer.d_loss = Vi[0] self.gan.trainer.g_loss = Vi[1] new_grads_and_vars = list(zip(new_grads, var_list)).copy() op3 = self.optimizer.apply_gradients(new_grads_and_vars.copy(), global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op3]): if(self.config.w_l1): # return to selfish state wt1 = [wt1[0] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[0]), wt1[1] + self.config.w_l1 * ((self.config.l1_default or 0.0)-wt1[1])] op4 = tf.group(*[tf.assign(w, v) for w,v in zip(w, wt1)]) # store variables with tf.get_default_graph().control_dependencies([op4]): self.gan.add_metric('l1w0', w[0]) self.gan.add_metric('l1w1', w[1]) return tf.no_op() else: return tf.no_op()
def _add_ema(model, decay): """Create ops needed to track EMA when training. :param model: The model with a `.sess` we want to track. :param decay: float, Decay to use in the EMA :returns: ema_op: The update op. This applies the ema to each variable. Should be set as a control dependency on the training op. load: Op to copy emas to the variables. restore_var: Op to copy the original variables back from the EMA ones. Note: If you run the load op multiple times then the backup variables will be replaced by the ema variables. Currently there was a bug I haven't been able to fix. I haven't found why but sometimes when you run it with a tf.cond you get this error. `tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[0] does not have value` The stop gap is to remove this which means if you run load multiple times it will over write the backup variables with ema values. The load op is set up to automatically save the normal parameters when you load the ema's in. """ ema = tf.train.ExponentialMovingAverage(decay=decay) model_vars = model.sess.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) with tf.variable_scope("BackupVariables"): backup_vars = [ tf.get_variable( var.op.name, dtype=var.value().dtype, trainable=False, initializer=var.initialized_value() ) for var in model_vars ] ema_op = ema.apply(model_vars) save_back_up = tf.group(*( tf.assign(back, var.read_value()) for var, back in zip(model_vars, backup_vars) ), name='save_backups') with tf.control_dependencies([save_back_up]): load = tf.group(*( tf.assign(var, ema.average(var).read_value()) for var in model_vars ), name="load_ema") restore_vars = tf.group(*( tf.assign(var, back.read_value()) for var, back in zip(model_vars, backup_vars) ), name="restore_backups") return ema_op, load, restore_vars
def _create(self): gan = self.gan generator = self.gan.generator config = self.config d_vars = self.d_vars or gan.discriminator.variables() loss = self.loss or gan.loss d_loss, g_loss = loss.sample self.d_log = -tf.log(tf.abs(d_loss+TINY)) d_optimizer = self.build_optimizer(config, 'd_', config.d_trainer, self.d_lr, d_vars, d_loss) #TODO more than one g_loss g_optimizer = [self.build_optimizer(config, 'g_', config.g_trainer, self.g_lr, g.variables(), g_loss) for g, l in zip(generator.children, loss.children_losses)] assign_children = [] for p, o in generator.parent_child_tuples: for ov, pv in zip(o.variables(), p.variables()): op=tf.assign(ov, pv) if config.mutation_percent: op += tf.random_normal(self.gan.ops.shape(pv), mean=0, stddev=0.01) * tf.cast(tf.greater(config.mutation_percent, tf.random_uniform(shape=self.gan.ops.shape(pv), minval=0, maxval=1)), tf.float32) assign_children.append(op) self.clone_parent = tf.group(*assign_children) update_parent=[] for p, o in generator.parent_child_tuples: c_to_p = [] for ov, pv in zip(o.variables(), p.variables()): op=tf.assign(pv, ov) c_to_p.append(op) update_parent.append(tf.group(*c_to_p)) self.update_parent = update_parent f_lambda = config.f_lambda or 1 def _squash(grads): return tf.add_n([tf.reshape(gan.ops.squash(g), [1]) for g in grads]) children_grads = [_squash(tf.gradients(l, d_vars)) for l in loss.children_losses] if config.fitness == "g": self.measure_g = [-l for l in loss.children_losses] else: self.measure_g = [-l+f_lambda*(-tf.log(TINY+grad_d - tf.log(TINY+tf.nn.sigmoid(loss.d_loss)) - tf.log(TINY+1-tf.nn.sigmoid(l)))) for l, grad_d in zip(loss.children_losses, children_grads)] loss.metrics['measure_g'] = tf.reduce_mean(self.measure_g) loss.metrics['g_loss'] = loss.g_loss loss.metrics['d_loss'] = loss.d_loss self.g_loss = g_loss self.d_loss = d_loss self.d_optimizer = d_optimizer self.g_optimizer = g_optimizer self.hist = [0 for i in range(len(self.gan.generator.children))] return g_optimizer, d_optimizer
def create_networks_and_training_method(self,state_dim,action_dim): theta_p = networks.theta_p(state_dim,action_dim) theta_q = networks.theta_q(state_dim,action_dim) target_theta_p,target_update_p = self.exponential_moving_averages(theta_p,TAU) target_theta_q,target_update_q = self.exponential_moving_averages(theta_q,TAU) self.state = tf.placeholder(tf.float32,[None,state_dim],'state') self.action_test = networks.policy_network(self.state,theta_p) # Initialize a random process the Ornstein-Uhlenbeck process for action exploration self.exploration = OUNoise(action_dim) noise = self.exploration.noise() self.action_exploration = self.action_test + noise q = networks.q_network(self.state,self.action_test,theta_q) # policy optimization mean_q = tf.reduce_mean(q) weight_decay_p = tf.add_n([L2_POLICY * tf.nn.l2_loss(var) for var in theta_p]) loss_p = -mean_q + weight_decay_p optim_p = tf.train.AdamOptimizer(P_LEARNING_RATE) grads_and_vars_p = optim_p.compute_gradients(loss_p, var_list=theta_p) optimize_p = optim_p.apply_gradients(grads_and_vars_p) with tf.control_dependencies([optimize_p]): self.train_p = tf.group(target_update_p) # q optimization self.action_train = tf.placeholder(tf.float32,[None,action_dim],'action_train') self.reward = tf.placeholder(tf.float32,[None],'reward') self.next_state = tf.placeholder(tf.float32,[None,state_dim],'next_state') self.done = tf.placeholder(tf.bool,[None],'done') q_train = networks.q_network(self.state,self.action_train,theta_q) next_action = networks.policy_network(self.next_state,theta=target_theta_p) next_q = networks.q_network(self.next_state,next_action,theta=target_theta_q) q_target = tf.stop_gradient(tf.select(self.done,self.reward,self.reward + GAMMA * next_q)) # q loss q_error = tf.reduce_mean(tf.square(q_target - q_train)) weight_decay_q = tf.add_n([L2_Q * tf.nn.l2_loss(var) for var in theta_q]) loss_q = q_error + weight_decay_q optim_q = tf.train.AdamOptimizer(Q_LEARNING_RATE) grads_and_vars_q = optim_q.compute_gradients(loss_q, var_list=theta_q) optimize_q = optim_q.apply_gradients(grads_and_vars_q) with tf.control_dependencies([optimize_q]): self.train_q = tf.group(target_update_q) tf.scalar_summary("loss_q",loss_q) tf.scalar_summary("loss_p",loss_p) tf.scalar_summary("q_mean",mean_q) global merged_summary_op merged_summary_op = tf.merge_all_summaries()
def get_target_updates(vars, target_vars, tau): logger.info('setting up target updates ...') soft_updates = [] init_updates = [] assert len(vars) == len(target_vars) for var, target_var in zip(vars, target_vars): logger.info(' {} <- {}'.format(target_var.name, var.name)) init_updates.append(tf.assign(target_var, var)) soft_updates.append(tf.assign(target_var, (1. - tau) * target_var + tau * var)) assert len(init_updates) == len(vars) assert len(soft_updates) == len(vars) return tf.group(*init_updates), tf.group(*soft_updates)
def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None): """A normalizer that ensures that observations are approximately distributed according to a standard Normal distribution (i.e. have mean zero and variance one). Args: size (int): the size of the observation to be normalized eps (float): a small constant that avoids underflows default_clip_range (float): normalized observations are clipped to be in [-default_clip_range, default_clip_range] sess (object): the TensorFlow session to be used """ self.size = size self.eps = eps self.default_clip_range = default_clip_range self.sess = sess if sess is not None else tf.get_default_session() self.local_sum = np.zeros(self.size, np.float32) self.local_sumsq = np.zeros(self.size, np.float32) self.local_count = np.zeros(1, np.float32) self.sum_tf = tf.get_variable( initializer=tf.zeros_initializer(), shape=self.local_sum.shape, name='sum', trainable=False, dtype=tf.float32) self.sumsq_tf = tf.get_variable( initializer=tf.zeros_initializer(), shape=self.local_sumsq.shape, name='sumsq', trainable=False, dtype=tf.float32) self.count_tf = tf.get_variable( initializer=tf.ones_initializer(), shape=self.local_count.shape, name='count', trainable=False, dtype=tf.float32) self.mean = tf.get_variable( initializer=tf.zeros_initializer(), shape=(self.size,), name='mean', trainable=False, dtype=tf.float32) self.std = tf.get_variable( initializer=tf.ones_initializer(), shape=(self.size,), name='std', trainable=False, dtype=tf.float32) self.count_pl = tf.placeholder(name='count_pl', shape=(1,), dtype=tf.float32) self.sum_pl = tf.placeholder(name='sum_pl', shape=(self.size,), dtype=tf.float32) self.sumsq_pl = tf.placeholder(name='sumsq_pl', shape=(self.size,), dtype=tf.float32) self.update_op = tf.group( self.count_tf.assign_add(self.count_pl), self.sum_tf.assign_add(self.sum_pl), self.sumsq_tf.assign_add(self.sumsq_pl) ) self.recompute_op = tf.group( tf.assign(self.mean, self.sum_tf / self.count_tf), tf.assign(self.std, tf.sqrt(tf.maximum( tf.square(self.eps), self.sumsq_tf / self.count_tf - tf.square(self.sum_tf / self.count_tf) ))), ) self.lock = threading.Lock()
def parameterized_vs_naive(shape, num_iters): np.random.seed(1618) # Make it reproducible. # No CSE/CF. optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0) config = tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=optimizer_options)) with tf.Session(config=config) as sess: param_op = tf.group(random_ops.parameterized_truncated_normal(shape)) naive_op = tf.group(random_ops.truncated_normal(shape)) param_dt = timeit.timeit(lambda: sess.run(param_op), number=num_iters) naive_dt = timeit.timeit(lambda: sess.run(naive_op), number=num_iters) return param_dt, naive_dt
def apply_gradients(self, grads_and_vars, global_step=None, name=None): var_list = [ v for _,v in grads_and_vars] d_vars = [] g_vars = [] for grad,var in grads_and_vars: if var in self.gan.d_vars(): d_vars += [var] elif var in self.gan.g_vars(): g_vars += [var] else: raise("Couldn't find var in g_vars or d_vars") with ops.init_scope(): v1 = [self._zeros_slot(v, "v1", self._name) for _,v in grads_and_vars] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): self._zeros_slot(var, "pm", "pm") self._prepare() v1 = [self.get_slot(v, "v1") for _,v in grads_and_vars] slots_list = [] slots_vars = [] if self.config.include_slots: for name in self.optimizer.get_slot_names(): for var in self.optimizer.variables(): slots_vars += [var] slots_list.append(self._zeros_slot(var, "pm", "pm")) current_vars = var_list + slots_vars tmp_vars = v1 + slots_list all_grads = [ g for g, _ in grads_and_vars ] op1 = tf.group(*[tf.assign(w, v) for w,v in zip(tmp_vars, current_vars)]) # store variables with tf.get_default_graph().control_dependencies([op1]): # store g2 #op3 = tf.group(*[tf.assign_sub(v, self._lr_t*grad) for grad,v in grads_and_vars]) op3 = self.optimizer.apply_gradients(grads_and_vars.copy(), global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op3]): def pmcombine(_v1,_v2): return _v2 + (_v2 - _v1) combined = [pmcombine(_v1, _v2) for _v1, _v2 in zip(tmp_vars, current_vars)] # restore v1, slots op5 = tf.group(*[ tf.assign(w,v) for w,v in zip(current_vars, combined)]) with tf.get_default_graph().control_dependencies([op5]): return tf.no_op()
def __init__(self, queues, enqueue_ops): close_op = tf.group(* [q.close() for q in queues]) cancel_op = tf.group( * [q.close(cancel_pending_enqueues=True) for q in queues]) queue_closed_exception_types = (errors.OutOfRangeError,) enqueue_op = tf.group(*enqueue_ops, name="multi_enqueue") super(MultiQueueRunner, self).__init__( queues[0], enqueue_ops=[enqueue_op], close_op=close_op, cancel_op=cancel_op, queue_closed_exception_types=queue_closed_exception_types)
def calculate_depth(grads_and_vars_k,k=0): if(k == 0): return tf.group(*[tf.assign(v,nv) for v,nv in zip(depth_vars, depth_slots)]) op2 = self.optimizer.apply_gradients(grads_and_vars_k, global_step=global_step, name=name) with tf.get_default_graph().control_dependencies([op2]): w_k_combined = [self._decay *w_k_1 + (1.-self._decay)*w_hat for w_hat, w_k_1 in zip(depth_slots, depth_vars)] op3 = tf.group(*[tf.assign(w, v) for w,v in zip(depth_slots, w_k_combined)]) # store variables with tf.get_default_graph().control_dependencies([op3]): d_loss, g_loss = self.gan.loss.sample d_grads = tf.gradients(d_loss, d_vars) g_grads = tf.gradients(g_loss, g_vars) grads_k_1 = d_grads + g_grads grads_and_vars_k_1 = list(zip(grads_k_1,depth_vars)).copy() return calculate_depth(grads_and_vars_k_1,k-1)
def testSummariesAreFlushedToDisk(self): output_dir = os.path.join(self.get_temp_dir(), 'flush_test') if tf.gfile.Exists(output_dir): # For running on jenkins. tf.gfile.DeleteRecursively(output_dir) accuracy0, update_op0 = tf.contrib.metrics.streaming_accuracy( self._predictions, self._labels) accuracy1, update_op1 = tf.contrib.metrics.streaming_accuracy( self._predictions+1, self._labels) names_to_metrics = { 'Accuracy': accuracy0, 'Another accuracy': accuracy1, } for k in names_to_metrics: v = names_to_metrics[k] tf.scalar_summary(k, v) summary_writer = tf.train.SummaryWriter(output_dir) init_op = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) eval_op = tf.group(update_op0, update_op1) with self.test_session() as sess: slim.evaluation.evaluation( sess, init_op=init_op, eval_op=eval_op, summary_op=tf.merge_all_summaries(), summary_writer=summary_writer, global_step=self._global_step) # Check that the results were saved. The events file may have additional # entries, e.g. the event version stamp, so have to parse things a bit. output_filepath = glob.glob(os.path.join(output_dir, '*')) self.assertEqual(len(output_filepath), 1) events = tf.train.summary_iterator(output_filepath[0]) summaries = [e.summary for e in events if e.summary.value] values = [] for summary in summaries: for value in summary.value: values.append(value) saved_results = {v.tag: v.simple_value for v in values} for name in names_to_metrics: self.assertAlmostEqual(names_to_metrics[name].eval(), saved_results[name])
def evaluate(): """Eval ocr for a number of steps.""" with tf.Graph().as_default() as g: images, labels, seq_lengths = ocr.inputs() logits, timesteps = ocr.inference(images, FLAGS.eval_batch_size, train=True) ler = ocr.create_label_error_rate(logits, labels, timesteps) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) config = tf.ConfigProto( device_count={'GPU': 0} ) sess = tf.Session(config=config) sess.run(init_op) saver = tf.train.Saver() summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, ler, summary_op) if FLAGS.run_once: break # print("Waiting for next evaluation for " + str(FLAGS.eval_interval_secs) + " sec") time.sleep(FLAGS.eval_interval_secs)
def moving_average(value, window): value = tf.to_float(value) shape = value.get_shape() queue_init = tf.zeros(tf.TensorShape(window).concatenate(shape)) total_init = tf.zeros(shape) num_init = tf.constant(0, dtype=tf.float32) queue = tf.FIFOQueue(window, [tf.float32], shapes=[shape]) total = tf.Variable(total_init, trainable=False) num = tf.Variable(num_init, trainable=False) init = tf.cond( tf.equal(queue.size(), 0), lambda: tf.group( queue.enqueue_many(queue_init), total.assign(total_init), num.assign(num_init)), lambda: tf.no_op()) with tf.control_dependencies([init]): total_ = total + value - queue.dequeue() num_ = num + 1 value_averaged = total_ / (tf.minimum(num_, window) + EPSILON) with tf.control_dependencies([queue.enqueue([value]), total.assign(total_), num.assign(num_)]): return tf.identity(value_averaged)
def initialize_variables(sess, saver, logdir, checkpoint=None, resume=None): """Initialize or restore variables from a checkpoint if available. Args: sess: Session to initialize variables in. saver: Saver to restore variables. logdir: Directory to search for checkpoints. checkpoint: Specify what checkpoint name to use; defaults to most recent. resume: Whether to expect recovering a checkpoint or starting a new run. Raises: ValueError: If resume expected but no log directory specified. RuntimeError: If no resume expected but a checkpoint was found. """ sess.run(tf.group( tf.local_variables_initializer(), tf.global_variables_initializer())) if resume and not (logdir or checkpoint): raise ValueError('Need to specify logdir to resume a checkpoint.') if logdir: state = tf.train.get_checkpoint_state(logdir) if checkpoint: checkpoint = os.path.join(logdir, checkpoint) if not checkpoint and state and state.model_checkpoint_path: checkpoint = state.model_checkpoint_path if checkpoint and resume is False: message = 'Found unexpected checkpoint when starting a new run.' raise RuntimeError(message) if checkpoint: saver.restore(sess, checkpoint)
def _outputs_with_release(self, handle, inputs, outputs): """Ensures ComputeSession is released before outputs are returned. Args: handle: Handle to ComputeSession on which all computation until now has depended. It will be released and assigned to the output 'run'. inputs: list of nodes we want to pass through without any dependencies. outputs: list of nodes whose access should ensure the ComputeSession is safely released. Returns: A dictionary of both input and output nodes. """ with tf.control_dependencies(outputs.values()): with tf.name_scope('ComputeSession'): release_op = dragnn_ops.release_session(handle) run_op = tf.group(release_op, name='run') for output in outputs: with tf.control_dependencies([release_op]): outputs[output] = tf.identity(outputs[output], name=output) all_nodes = inputs.copy() all_nodes.update(outputs) # Add an alias for simply running without collecting outputs. # Common, for instance, with training. all_nodes['run'] = run_op return all_nodes
def _finish(self, update_ops, name_scope): update_ops.append([self._counter.assign_add(1)]) return tf.group(*update_ops, name=name_scope)
def train(logits, represent_feature_tensor, images_tensor, expand_images_tensor, labels_tensor, is_training_tensor, save_model_path=None, step_width=100, record_loss=False): cross_id = 1 patches_dir = '/home/give/Documents/dataset/MICCAI2018/Patches/crossvalidation' roi_dir = '/home/give/Documents/dataset/MICCAI2018/Slices/crossvalidation' pre_load = True train_dataset = DataSet(os.path.join(patches_dir, str(cross_id), 'train'), 'train', pre_load=pre_load, rescale=True, divied_liver=False, expand_is_roi=True, full_roi_path=os.path.join(roi_dir, str(cross_id), 'train')) val_dataset = DataSet(os.path.join(patches_dir, str(cross_id), 'test'), 'test', pre_load=pre_load, rescale=True, divied_liver=False, expand_is_roi=True, full_roi_path=os.path.join(roi_dir, str(cross_id), 'test')) train_batchdata = train_dataset.get_next_batch(net_config.BATCH_SIZE) val_batchdata = val_dataset.get_next_batch(net_config.BATCH_SIZE) global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) val_step = tf.get_variable('val_step', [], initializer=tf.constant_initializer(0), trainable=False) # inter loss loss_last = loss(logits, labels_tensor) loss_inter = loss_last # intra loss if has_centerloss: represent_feature_tensor_shape = represent_feature_tensor.get_shape().as_list() print 'represent_feature_tensor_shape is ', represent_feature_tensor_shape centers_value = np.zeros([category_num, represent_feature_tensor_shape[1]], dtype=np.float32) print 'centers_value shape is ', np.shape(centers_value) centers_saved_tensor = tf.get_variable('center_tensor', shape=[category_num, represent_feature_tensor_shape[1]], initializer=tf.truncated_normal_initializer(stddev=CONV_WEIGHT_STDDEV), dtype=tf.float32, trainable=False) centers_tensor = tf.placeholder(dtype=tf.float32, shape=[category_num, represent_feature_tensor_shape[1]]) print 'center_tensor shape is ', tf.shape(centers_tensor) center_loss = calculate_centerloss(represent_feature_tensor, labels_tensor, centers_tensor=centers_tensor) owner_step = tf.py_func(update_centers, [centers_tensor, represent_feature_tensor, labels_tensor, category_num], tf.float32) loss_ = loss_inter + _lambda * center_loss else: loss_ = loss_inter predictions = tf.nn.softmax(logits) print 'predictions shape is ', predictions print 'label is ', labels_tensor top1_error = top_k_error(predictions, labels_tensor, 1) labels_onehot = tf.one_hot(labels_tensor, logits.get_shape().as_list()[-1]) print 'output node is ', logits.get_shape().as_list()[-1] accuracy_tensor = calculate_accuracy(predictions, labels_onehot) # loss_avg ema = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) tf.add_to_collection(UPDATE_OPS_COLLECTION, ema.apply([loss_])) tf.summary.scalar('loss_avg', ema.average(loss_)) # validation stats ema = tf.train.ExponentialMovingAverage(0.9, val_step) val_op = tf.group(val_step.assign_add(1), ema.apply([top1_error])) top1_error_avg = ema.average(top1_error) tf.summary.scalar('val_top1_error_avg', top1_error_avg) tf.summary.scalar('learning_rate', FLAGS.learning_rate) opt = tf.train.MomentumOptimizer(FLAGS.learning_rate, MOMENTUM) grads = opt.compute_gradients(loss_) for grad, var in grads: if grad is not None and not FLAGS.minimal_summaries: tf.summary.histogram(var.op.name + '/gradients', grad) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) if not FLAGS.minimal_summaries: # Display the training images in the visualizer. tf.summary.image('images', images_tensor) for var in tf.trainable_variables(): tf.summary.image(var.op.name, var) batchnorm_updates = tf.get_collection(UPDATE_OPS_COLLECTION) batchnorm_updates_op = tf.group(*batchnorm_updates) if has_centerloss: with tf.control_dependencies([apply_gradient_op, batchnorm_updates_op, owner_step]): train_op = tf.no_op('train') else: train_op = tf.group(apply_gradient_op, batchnorm_updates_op) saver = tf.train.Saver(tf.all_variables()) summary_op = tf.summary.merge_all() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) val_summary_writer = tf.summary.FileWriter(FLAGS.log_val_dir, sess.graph) if FLAGS.resume: latest = tf.train.latest_checkpoint(FLAGS.load_model_path) if not latest: print "No checkpoint to continue from in", FLAGS.train_dir sys.exit(1) print "resume", latest saver.restore(sess, latest) centers_value = sess.run(centers_saved_tensor) for x in xrange(FLAGS.max_steps + 1): start_time = time.time() step = sess.run(global_step) if has_centerloss: i = [train_op, loss_, owner_step] else: i = [train_op, loss_] write_summary = step % 100 and step > 1 if write_summary: i.append(summary_op) train_roi_batch_images, train_expand_roi_batch_images, train_labels = train_batchdata.next() o = sess.run(i, feed_dict={ images_tensor: train_roi_batch_images, expand_images_tensor: train_expand_roi_batch_images, labels_tensor: train_labels, centers_tensor: centers_value, is_training_tensor: True }) if has_centerloss: centers_value = o[2] centers_saved_tensor = tf.convert_to_tensor(np.asarray(centers_value, np.float32), np.float32) loss_value = o[1] duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if (step - 1) % step_width == 0: accuracy_value, inter_loss_value, center_loss_value, labels_values, predictions_values = sess.run( [accuracy_tensor, loss_inter, center_loss, labels_tensor, predictions], feed_dict={ images_tensor: train_roi_batch_images, expand_images_tensor: train_expand_roi_batch_images, labels_tensor: train_labels, centers_tensor: centers_value, is_training_tensor: True }) examples_per_sec = FLAGS.batch_size / float(duration) # accuracy = eval_accuracy(predictions_values, labels_values) format_str = ('step %d, loss = %.2f, inter_loss = %.5f, center_loss =%.5f, accuracy value = %g (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (step, loss_value, inter_loss_value, center_loss_value, accuracy_value, examples_per_sec, duration)) if write_summary: if has_centerloss: summary_str = o[3] else: summary_str = o[2] summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step > 1 and step % step_width == 0: checkpoint_path = os.path.join(save_model_path, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) save_dir = os.path.join(save_model_path, str(step)) if not os.path.exists(save_dir): os.mkdir(save_dir) filenames = glob(os.path.join(save_model_path, '*-'+str(int(step + 1))+'.*')) for filename in filenames: shutil.copy( filename, os.path.join(save_dir, os.path.basename(filename)) ) # Run validation periodically if step > 1 and step % step_width == 0: val_roi_batch_images, val_expand_roi_batch_images, val_labels = val_batchdata.next() _, top1_error_value, summary_value, accuracy_value, labels_values, predictions_values = sess.run( [val_op, top1_error, summary_op, accuracy_tensor, labels_tensor, predictions], { images_tensor: val_roi_batch_images, expand_images_tensor: val_expand_roi_batch_images, centers_tensor: centers_value, labels_tensor: val_labels, is_training_tensor: False }) predictions_values = np.argmax(predictions_values, axis=1) # accuracy = eval_accuracy(predictions_values, labels_values) calculate_acc_error( logits=predictions_values, label=labels_values, show=True ) print('Validation top1 error %.2f, accuracy value %f' % (top1_error_value, accuracy_value)) val_summary_writer.add_summary(summary_value, step)
def train(dataset): """Train on dataset for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) # Get images and labels for ImageNet and split the batch across GPUs. assert FLAGS.batch_size % FLAGS.num_gpus == 0, ( 'Batch size must be divisible by number of GPUs') split_batch_size = int(FLAGS.batch_size / FLAGS.num_gpus) # Override the number of preprocessing threads to account for the increased # number of GPU towers. num_preprocess_threads = FLAGS.num_preprocess_threads * FLAGS.num_gpus images, labels = image_processing.distorted_inputs( dataset, num_preprocess_threads=num_preprocess_threads) input_summaries = copy.copy(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Number of classes in the Dataset label. num_classes = dataset.num_classes() # Split the batch of images and labels for towers. images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images) labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels) # Calculate the gradients for each model tower. tower_grads = [] reuse_variables = None for i in range(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope: # Force all Variables to reside on the CPU. with slim.arg_scope([slim.variables.variable], device='/cpu:0'): # Calculate the loss for one tower of the ImageNet model. This # function constructs the entire ImageNet model but shares the # variables across all towers. loss = _tower_loss(images_splits[i], labels_splits[i], num_classes, scope, reuse_variables) # Reuse variables for the next tower. reuse_variables = True # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Retain the Batch Normalization updates operations only from the # final tower. Ideally, we should grab the updates from all towers # but these stats accumulate extremely fast so we can ignore the # other stats from the other towers without significant detriment. batchnorm_updates = tf.get_collection( slim.ops.UPDATE_OPS_COLLECTION, scope) # Calculate the gradients for the batch of data on this ImageNet # tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = _average_gradients(tower_grads) # Add a summaries for the input processing and global_step. summaries.extend(input_summaries) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append( tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. # Note that we maintain a "double-average" of the BatchNormalization # global statistics. This is more complicated then need be but we employ # this for backward-compatibility with our previous models. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) # Another possibility is to use tf.slim.get_variables(). variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) # Group all updates to into a single train op. batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, variables_averages_op, batchnorm_updates_op) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) if FLAGS.pretrained_model_checkpoint_path: assert tf.gfile.Exists(FLAGS.pretrained_model_checkpoint_path) variables_to_restore = tf.get_collection( slim.variables.VARIABLES_TO_RESTORE) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) print('%s: Pre-trained model restored from %s' % (datetime.now(), FLAGS.pretrained_model_checkpoint_path)) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) for step in range(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: examples_per_sec = FLAGS.batch_size / float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, duration)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 5000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def build_graph(checkpoint_file): with tf.Graph().as_default() as graph: tf.logging.set_verbosity(tf.logging.INFO) # Get the dataset first and load one batch of validation images and labels tensors. Set is_training as False so as to use the evaluation preprocessing dataset = get_split('validation', FLAGS.dataset_dir) images, raw_images, labels = load_batch(dataset, batch_size=FLAGS.batch_size, is_training=False) # Create some information about the training steps num_batches_per_epoch = dataset.num_samples / FLAGS.batch_size num_steps_per_epoch = num_batches_per_epoch # Now create the inference model but set is_training=False with slim.arg_scope(inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2( images, num_classes=dataset.num_classes, is_training=False) logits_op = end_points['Logits'] pred_op = end_points['Predictions'] # logging.info("The logits output from the model is: %s, The prediction of the model is: %s" % (end_points['Logits'], end_points['Predictions'])) # #get all the variables to restore from the checkpoint file and create the saver function to restore variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) def restore_fn(sess): return saver.restore(sess, checkpoint_file) # Just define the metrics to track without the loss or whatsoever predictions = tf.argmax(end_points['Predictions'], 1) accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy( predictions, labels) ## decleartion? acc_mine = tf.reduce_mean( tf.cast(tf.equal(predictions, labels), tf.float32)) metrics_op = tf.group(accuracy_update) def get_pred_and_label(sess): pred = sess.run([pred_op]) label = sess.run([labels]) label = label[0] # logging.info('--------visulizing the pred: %s' % pred) # logging.info('--------get the shape of pred: %s' % pred[0][0][1]) pred_pos = np.empty(FLAGS.batch_size) for i in range(len(pred)): pos_list = pred[0][i] pred_pos[i] = pos_list[1] label[i] = float(label[i]) # logging.info('--------visulizing the pred: %s' % type(pred_pos)) logging.info('--------visulizing the label: %s' % label) # logging.info('--------visulizing the label: %s' % type(label)) return pred_pos, label # Create the global step and an increment op for monitoring global_step = get_or_create_global_step() global_step_op = tf.assign( global_step, global_step + 1 ) # no apply_gradient method so manually increasing the global_step # Create a evaluation step function def eval_step(sess, metrics_op): ''' Simply takes in a session, runs the metrics op and some logging information. ''' start_time = time.time() _, global_step_count, accuracy_value, step_logits, step_prediction, step_acc = sess.run( [ metrics_op, global_step_op, accuracy, logits_op, pred_op, acc_mine ]) time_elapsed = time.time() - start_time # Log some information # logging.info('Global Step %s: Streaming Accuracy: %.4f (%.2f sec/step)', global_step_count, accuracy_value, time_elapsed) logging.info( 'The averange accuracy of this batch(total 36 samples) is: %s' % step_acc) # for i in range(len(step_prediction)): # # pred = 'True' if predictions[i] == labels[i] else 'False' # logging.info("The prediction of %s th image is : %s" % ((i, max(step_prediction[i])))) return accuracy_value # Define some scalar quantities to monitor tf.summary.scalar('Validation_Accuracy', accuracy) my_summary_op = tf.summary.merge_all()
def train(): inputs, gt_alphas = datasets.get_dataset() model = network.MnasUnet(inputs, is_training=True) total_loss = losses.compute_loss(model.end_points, gt_alphas, mode=FLAGS.mode) """ set the update operations for training """ update_ops = [] variables_to_train = tf.trainable_variables() global_step = tf.Variable(0, name='global_step', trainable=False) lr = _get_learning_rate(FLAGS.num_images, global_step) optimizer = tf.train.GradientDescentOptimizer(lr) update_opt = optimizer.minimize(total_loss, global_step, variables_to_train) update_ops.append(update_opt) update_bns = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if len(update_bns): update_bn = tf.group(*update_bns) update_ops.append(update_bn) update_op = tf.group(*update_ops) """ set Summary and log info """ tf.summary.scalar('learning_rate', lr) tf.summary.scalar('total_loss', total_loss) tf.summary.scalar('model_loss', model.end_points['model_loss']) tf.summary.scalar('regular_loss', model.end_points['regular_loss']) summary_op = tf.summary.merge_all() logdir = os.path.join(FLAGS.summaries_dir, strftime('%Y%m%d%H%M%S', gmtime())) if not os.path.exists(logdir): os.makedirs(logdir) summary_writer = tf.summary.FileWriter(logdir, graph=tf.Session().graph) """ set saver for saving final model and backbone model for restore """ saver = tf.train.Saver(max_to_keep=3) """ Set Gpu Env """ init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) gpu_opt = tf.GPUOptions(per_process_gpu_memory_fraction=0.8, allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_opt)) as sess: sess.run(init_op) ckpt = tf.train.get_checkpoint_state(FLAGS.training_checkpoint_model) """ resotre checkpoint of Backbone network """ if ckpt is not None: lastest_ckpt = tf.train.latest_checkpoint( FLAGS.training_checkpoint_model) print('lastest', lastest_ckpt) re_saver = tf.train.Saver(var_list=tf.global_variables()) re_saver.restore(sess, lastest_ckpt) else: restore_vars = _get_restore_vars("MnasNet") re_saver = tf.train.Saver(var_list=restore_vars) re_saver.restore(sess, "data/pretrained_models/MnasNet_224_final.ckpt") """ Generate threads """ coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): s_time = time.time() _, loss, current_step = sess.run( [update_op, total_loss, global_step]) duration_time = time.time() - s_time print("""iter %d: time:%.3f(sec), total-loss %.4f""" % (current_step, duration_time, loss)) if np.isnan(loss) or np.isinf(loss): print('isnan or isinf', loss) raise if current_step % 10 == 0: # write summary summary = sess.run(summary_op) summary_writer.add_summary(summary, current_step) summary_writer.flush() if current_step % 50 == 0: # Save a checkpoint save_path = 'output/training/MnasUnet_Matting.ckpt' saver.save(sess, save_path, global_step=current_step) if current_step + 1 == FLAGS.max_iters: print('max iter : %d, current_step : %d' % (FLAGS.max_iters, current_step)) break except tf.errors.OutOfRangeError: print('Error occured') finally: saver.save(sess, './output/models/MnasUnet_Matting_final.ckpt', write_meta_graph=False) coord.request_stop() coord.join(threads) sess.close()
def run_training(fold_num, train_tfrecord_path, test_tfrecord_path, train_batch_size=60, test_batch_size=30): with tf.Graph().as_default(): # with tf.device('/gpu:'+GPU_NUM): images, label = read_and_decode(train_tfrecord_path) # 使用shuffle_batch可以随机打乱输入 images_batch, label_batch = tf.train.shuffle_batch( [images, label], batch_size=train_batch_size, capacity=1000, min_after_dequeue=800) images_test, label_test = read_and_decode_4_test(test_tfrecord_path) # 使用shuffle_batch可以随机打乱输入 images_batch_test, label_batch_test = tf.train.batch( [images_test, label_test], batch_size=test_batch_size, capacity=1000) # Generate placeholders for the images and labels. images_placeholder, labels_placeholder, keep_prob, is_train = placeholder_inputs( ) # Build a Graph that computes predictions from the inference model. fe_logits = model.inference(images_placeholder, keep_prob, is_train) # Add to the Graph the Ops for loss calculation. loss = model.loss(fe_logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. global_step = tf.Variable(0, trainable=False) train_op = model.training(loss, flags.learning_rate, global_step) # Add the Op to compare the logits to the labels during evaluation. eval_correct = model.evaluation(fe_logits, labels_placeholder) # Build the summary Tensor based on the TF collection of Summaries. summary = tf.summary.merge_all() # Add the variable initializer Op. init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Create a saver for writing training checkpoints. # saver = tf.train.Saver() # Create a session for running Ops on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.48) with tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True, gpu_options=gpu_options)) as sess: # Instantiate a SummaryWriter to output summaries and the Graph. train_writer = tf.summary.FileWriter( './summaries_new/summaries_graph_0420/' + str(fold_num) + '/train', sess.graph) test_writer = tf.summary.FileWriter( './summaries_new/summaries_graph_0420/' + str(fold_num) + '/test', sess.graph) # And then after everything is built: # Run the Op to initialize the variables. sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) img_test, l_test = sess.run([images_batch_test, label_batch_test]) test_feed_dict = fill_feed_dict(img_test, l_test, 1.0, False, images_placeholder, labels_placeholder, keep_prob, is_train) # Start the training loop. last_train_correct = [] last_test_correct = [] for step in range(flags.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. img, l = sess.run([images_batch, label_batch]) feed_dict = fill_feed_dict(img, l, 0.9, True, images_placeholder, labels_placeholder, keep_prob, is_train) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0 or (step + 1) == flags.max_steps: print('fold_num:{}'.format(fold_num)) print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. train_summary_str = sess.run(summary, feed_dict=feed_dict) test_summary_str = sess.run(summary, feed_dict=test_feed_dict) train_writer.add_summary(train_summary_str, step) test_writer.add_summary(test_summary_str, step) # summary_writer.flush() print('Training Data Eval:') train_correct = sess.run(eval_correct, feed_dict=feed_dict) print('train_correct:{}'.format(train_correct)) print('Test Data Eval:') test_correct = sess.run(eval_correct, feed_dict=test_feed_dict) print('test_correct:{}\n\n'.format(test_correct)) # if (step + 1) == flags.max_steps: if step > flags.max_steps - 10 * 50: last_train_correct.append(train_correct) last_test_correct.append(test_correct) if (step + 1) == flags.max_steps: # fe_logits_last_values = sess.run(fe_logits, feed_dict=test_feed_dict) # np.savetxt('./summaries/summaries_graph_1219/' + str(fold_num) + '/logit.txt', # fe_logits_last_values) # np.savetxt('./summaries/summaries_graph_1219/' + str(fold_num) + '/test_l.txt', # l_test) print(last_train_correct) print(last_test_correct) print(np.array(last_train_correct).mean()) print(np.array(last_test_correct).mean()) # saver_path = saver.save(sess, "/home/duheran/facial_expresssion/save/dtgn.ckpt") # 将模型保存到save/model.ckpt文件 # print("Model saved in file:", saver_path) coord.request_stop() coord.join(threads) return last_train_correct, last_test_correct
def __init__(self, conf, tasksconf, dataconf, modelconf, evaluatorconf, expdir, init_filename, server, task_index): ''' NnetTrainer constructor, creates the training graph Args: conf: the trainer config taskconf: the config file for each task dataconf: the data configuration as a ConfigParser modelconf: the neural net model configuration evaluatorconf: the evaluator configuration for evaluating if None no evaluation will be done expdir: directory where the summaries will be written init_filename: filename of the network that should be used to initialize the model. Put to None if no network is available/wanted. server: optional server to be used for distributed training task_index: optional index of the worker task in the cluster ''' self.expdir = expdir self.server = server self.conf = conf self.tasksconf = tasksconf self.task_index = task_index self.init_filename = init_filename self.batch_size = int(conf['batch_size']) cluster = tf.train.ClusterSpec(server.server_def.cluster) #create the graph self.graph = tf.Graph() if 'local' in cluster.as_dict(): num_replicas = 1 device = tf.DeviceSpec(job='local') else: #distributed training num_replicas = len(cluster.as_dict()['worker']) num_servers = len(cluster.as_dict()['ps']) ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( num_tasks=num_servers, load_fn=tf.contrib.training.byte_size_load_fn) device = tf.train.replica_device_setter(ps_tasks=num_servers, ps_strategy=ps_strategy) chief_ps = tf.DeviceSpec(job='ps', task=0) self.is_chief = task_index == 0 #create the model modelfile = os.path.join(expdir, 'model', 'model.pkl') with open(modelfile, 'wb') as fid: self.model = model_factory.factory( modelconf.get('model', 'architecture'))(conf=modelconf) pickle.dump(self.model, fid) evaltype = evaluatorconf.get('evaluator', 'evaluator') #define the placeholders in the graph with self.graph.as_default(): #create a local num_steps variable self.num_steps = tf.get_variable( name='num_steps', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) #a variable to hold the amount of steps already taken self.global_step = tf.get_variable( name='global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) should_terminate = tf.get_variable( name='should_terminate', shape=[], dtype=tf.bool, initializer=tf.constant_initializer(False), trainable=False) self.terminate = should_terminate.assign(True).op #create a check if training should continue self.should_stop = tf.logical_or( tf.greater_equal(self.global_step, self.num_steps), should_terminate) with tf.variable_scope('train') as train_scope: tasks_losses = [] if evaltype != 'None': with tf.variable_scope('validate') as val_scope: tasks_val_losses = [] #3 model types for multi task: single one to one; single one to many; multiple one to one #single one to one: the whole model is shared for all tasks, only loss function can be different #single one to many: each task has a separate output so only part of the network is shared, eg evrything but the output layer #multiple one to one: each task has its own network. Possibly the outputs are combined in a loss function for task in self.conf['tasks'].split(' '): taskconf = self.tasksconf[task] #get the database configurations input_names = modelconf.get('io', 'inputs').split(' ') if input_names == ['']: input_names = [] input_sections = [taskconf[i].split(' ') for i in input_names] input_dataconfs = [] for sectionset in input_sections: input_dataconfs.append([]) for section in sectionset: input_dataconfs[-1].append(dict(dataconf.items(section))) output_names = taskconf['targets'].split(' ') if output_names == ['']: output_names = [] target_sections = [taskconf[o].split(' ') for o in output_names] target_dataconfs = [] for sectionset in target_sections: target_dataconfs.append([]) for section in sectionset: target_dataconfs[-1].append(dict(dataconf.items(section))) #create the loss computer loss_computer = loss_computer_factory.factory( taskconf['loss_type'])(self.batch_size) #create the evaluator if evaltype != 'None': evaluator = evaluator_factory.factory(evaltype)( conf=evaluatorconf, dataconf=dataconf, model=self.model, task=task) with self.graph.as_default(): #check if running in distributed model if 'local' in cluster.as_dict(): #get the filenames data_queue_elements, _ = input_pipeline.get_filenames( input_dataconfs + target_dataconfs) #create the data queue and queue runners (inputs get shuffled! I already did this so set to False) data_queue = tf.train.string_input_producer( string_tensor=data_queue_elements, shuffle=False, seed=None, capacity=self.batch_size * 2, shared_name='data_queue') #compute the number of steps if int(conf['numbatches_to_aggregate']) == 0: num_steps = (int(conf['num_epochs']) * len(data_queue_elements) / self.batch_size) else: num_steps = (int(conf['num_epochs']) * len(data_queue_elements) / (self.batch_size * int(conf['numbatches_to_aggregate']))) #set the number of steps self.set_num_steps = self.num_steps.assign(num_steps).op self.done = tf.no_op() else: with tf.device(chief_ps): #get the data queue data_queue = tf.FIFOQueue(capacity=self.batch_size * (num_replicas + 1), shared_name='data_queue', name='data_queue', dtypes=[tf.string], shapes=[[]]) #get the number of steps from the parameter server num_steps_queue = tf.FIFOQueue( capacity=num_replicas, dtypes=[tf.int32], shared_name='num_steps_queue', name='num_steps_queue', shapes=[[]]) #set the number of steps self.set_num_steps = self.num_steps.assign( num_steps_queue.dequeue()).op #get the done queues done_ops = [] for i in range(num_servers): with tf.device('job:ps/task:%d' % i): done_queue = tf.FIFOQueue( capacity=num_replicas, dtypes=[tf.bool], shapes=[[]], shared_name='done_queue%d' % i, name='done_queue%d' % i) done_ops.append(done_queue.enqueue(True)) self.done = tf.group(*done_ops) #training part with tf.variable_scope(train_scope): with tf.variable_scope(task): #create the input pipeline data, seq_length = input_pipeline.input_pipeline( data_queue=data_queue, batch_size=self.batch_size, numbuckets=int(conf['numbuckets']), dataconfs=input_dataconfs + target_dataconfs) inputs = { input_names[i]: d for i, d in enumerate(data[:len(input_sections)]) } seq_length = { input_names[i]: d for i, d in enumerate( seq_length[:len(input_sections)]) } targets = { output_names[i]: d for i, d in enumerate(data[len(input_sections):]) } #target_seq_length = { #output_names[i]: d #for i, d in enumerate(seq_length[len(input_sections):])} #compute the training outputs of the model logits = self.model(inputs=inputs, input_seq_length=seq_length, is_training=True) #compute the loss task_loss = loss_computer(targets, logits, seq_length) tasks_losses.append(task_loss) #validation part if evaltype != 'None': with tf.variable_scope(val_scope): with tf.variable_scope(task): task_val_batch_loss, self.valbatches, _, _ = evaluator.evaluate( ) tasks_val_losses.append(task_val_batch_loss) with self.graph.as_default(): with tf.variable_scope(train_scope): #a variable to scale the learning rate (used to reduce the #learning rate in case validation performance drops) learning_rate_fact = tf.get_variable( name='learning_rate_fact', shape=[], initializer=tf.constant_initializer(1.0), trainable=False) #compute the learning rate with exponential decay and scale #with the learning rate factor self.learning_rate = (tf.train.exponential_decay( learning_rate=float(conf['initial_learning_rate']), global_step=self.global_step, decay_steps=self.num_steps, decay_rate=float(conf['learning_rate_decay'])) * learning_rate_fact) #create the optimizer optimizer = tf.train.AdamOptimizer(self.learning_rate) #TODO: The proper way to exploit data paralellism is via the #SyncReplicasOptimizer defined below. However for some reason it hangs #and I have not yet found a solution for it. For the moment the gradients #are accumulated in a way that does not allow data paralellism and there # is no advantage on having multiple workers. (We also accumulate the loss) #create an optimizer that aggregates gradients #if int(conf['numbatches_to_aggregate']) > 0: #optimizer = tf.train.SyncReplicasOptimizer( #opt=optimizer, #replicas_to_aggregate=int( #conf['numbatches_to_aggregate'])#, ##total_num_replicas=num_replicas #) loss = tf.reduce_mean(tasks_losses) self.total_loss = tf.get_variable( name='total_loss', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) self.reset_loss = self.total_loss.assign(0.0) self.acc_loss = self.total_loss.assign_add(loss) ##compute the gradients #grads_and_vars = optimizer.compute_gradients(self.loss) #with tf.variable_scope('clip'): #clip_value = float(conf['clip_grad_value']) ##clip the gradients #grads_and_vars = [(tf.clip_by_value(grad, -clip_value, clip_value), var) #for grad, var in grads_and_vars] self.params = tf.trainable_variables() grads = [ tf.get_variable(param.op.name, param.get_shape().as_list(), initializer=tf.constant_initializer(0), trainable=False) for param in self.params ] self.reset_grad = tf.variables_initializer(grads) #compute the gradients minibatch_grads_and_vars = optimizer.compute_gradients(loss) with tf.variable_scope('clip'): clip_value = float(conf['clip_grad_value']) #clip the gradients minibatch_grads_and_vars = [ (tf.clip_by_value(grad, -clip_value, clip_value), var) for grad, var in minibatch_grads_and_vars ] (minibatchgrads, minibatchvars) = zip(*minibatch_grads_and_vars) #update gradients by accumulating them self.update_gradients = [ grad.assign_add(batchgrad) for batchgrad, grad in zip(minibatchgrads, grads) ] #opperation to apply the gradients grads_and_vars = list(zip(grads, minibatchvars)) apply_gradients_op = optimizer.apply_gradients( grads_and_vars=grads_and_vars, global_step=self.global_step, name='apply_gradients') #all remaining operations with the UPDATE_OPS GraphKeys update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #create an operation to update the gradients, the batch_loss #and do all other update ops self.update_op = tf.group(*([apply_gradients_op] + update_ops), name='update') with self.graph.as_default(): if evaltype != 'None': #validation part with tf.variable_scope(val_scope): #create a variable to hold the validation loss self.validation_loss = tf.get_variable( name='validation_loss', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(0), trainable=False) #create a variable to save the last step where the model #was validated validated_step = tf.get_variable( name='validated_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer( -int(conf['valid_frequency'])), trainable=False) #a check if validation is due self.should_validate = tf.greater_equal( self.global_step - validated_step, int(conf['valid_frequency'])) #compute the loss val_batch_loss = tf.reduce_mean(tasks_val_losses) self.update_loss = self.validation_loss.assign( self.validation_loss + val_batch_loss #/self.valbatches ).op #update the learning rate factor self.half_lr = learning_rate_fact.assign( learning_rate_fact / 2).op #create an operation to updated the validated step self.update_validated_step = validated_step.assign( self.global_step).op #variable to hold the best validation loss so far self.best_validation = tf.get_variable( name='best_validation', shape=[], dtype=tf.float32, initializer=tf.constant_initializer(1.79e+308), trainable=False) #op to update the best velidation loss self.update_best = self.best_validation.assign( self.validation_loss).op #a variable that holds the amount of workers at the #validation point waiting_workers = tf.get_variable( name='waiting_workers', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) #an operation to signal a waiting worker self.waiting = waiting_workers.assign_add(1).op #an operation to set the waiting workers to zero self.reset_waiting = waiting_workers.initializer #an operation to check if all workers are waiting self.all_waiting = tf.equal(waiting_workers, num_replicas - 1) tf.summary.scalar('validation loss', self.validation_loss) else: self.update_loss = None tf.summary.scalar('learning rate', self.learning_rate) #create a histogram for all trainable parameters for param in tf.trainable_variables(): tf.summary.histogram(param.name, param) #create the scaffold self.scaffold = tf.train.Scaffold()
def setup_model(self): with SetVerbosity(self.verbose): assert issubclass(self.policy, ActorCriticPolicy), "Error: the input policy for the ACER model must be " \ "an instance of common.policies.ActorCriticPolicy." if isinstance(self.action_space, Discrete): self.n_act = self.action_space.n continuous = False elif isinstance(self.action_space, Box): # self.n_act = self.action_space.shape[-1] # continuous = True raise NotImplementedError("WIP: Acer does not support Continuous actions yet.") else: raise ValueError("Error: ACER does not work with {} actions space.".format(self.action_space)) self.n_batch = self.n_envs * self.n_steps self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf_util.make_session(num_cpu=self.num_procs, graph=self.graph) n_batch_step = None if issubclass(self.policy, LstmPolicy): n_batch_step = self.n_envs n_batch_train = self.n_envs * (self.n_steps + 1) step_model = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs, 1, n_batch_step, reuse=False, **self.policy_kwargs) self.params = find_trainable_variables("model") with tf.variable_scope("train_model", reuse=True, custom_getter=tf_util.outer_scope_getter("train_model")): train_model = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs, self.n_steps + 1, n_batch_train, reuse=True, **self.policy_kwargs) with tf.variable_scope("moving_average"): # create averaged model ema = tf.train.ExponentialMovingAverage(self.alpha) ema_apply_op = ema.apply(self.params) def custom_getter(getter, name, *args, **kwargs): name = name.replace("polyak_model/", "") val = ema.average(getter(name, *args, **kwargs)) return val with tf.variable_scope("polyak_model", reuse=True, custom_getter=custom_getter): self.polyak_model = polyak_model = self.policy(self.sess, self.observation_space, self.action_space, self.n_envs, self.n_steps + 1, self.n_envs * (self.n_steps + 1), reuse=True, **self.policy_kwargs) with tf.variable_scope("loss", reuse=False): self.done_ph = tf.placeholder(tf.float32, [self.n_batch]) # dones self.reward_ph = tf.placeholder(tf.float32, [self.n_batch]) # rewards, not returns self.mu_ph = tf.placeholder(tf.float32, [self.n_batch, self.n_act]) # mu's self.action_ph = train_model.pdtype.sample_placeholder([self.n_batch]) self.learning_rate_ph = tf.placeholder(tf.float32, []) eps = 1e-6 # Notation: (var) = batch variable, (var)s = sequence variable, # (var)_i = variable index by action at step i # shape is [n_envs * (n_steps + 1)] if continuous: value = train_model.value_fn[:, 0] else: value = tf.reduce_sum(train_model.policy_proba * train_model.q_value, axis=-1) rho, rho_i_ = None, None if continuous: action_ = strip(train_model.proba_distribution.sample(), self.n_envs, self.n_steps) distribution_f = tf.contrib.distributions.MultivariateNormalDiag( loc=strip(train_model.proba_distribution.mean, self.n_envs, self.n_steps), scale_diag=strip(train_model.proba_distribution.logstd, self.n_envs, self.n_steps)) f_polyak = tf.contrib.distributions.MultivariateNormalDiag( loc=strip(polyak_model.proba_distribution.mean, self.n_envs, self.n_steps), scale_diag=strip(polyak_model.proba_distribution.logstd, self.n_envs, self.n_steps)) f_i = distribution_f.prob(self.action_ph) f_i_ = distribution_f.prob(action_) f_polyak_i = f_polyak.prob(self.action_ph) phi_i = strip(train_model.proba_distribution.mean, self.n_envs, self.n_steps) q_value = strip(train_model.value_fn, self.n_envs, self.n_steps) q_i = q_value[:, 0] rho_i = tf.reshape(f_i, [-1, 1]) / (self.mu_ph + eps) rho_i_ = tf.reshape(f_i_, [-1, 1]) / (self.mu_ph + eps) qret = q_retrace(self.reward_ph, self.done_ph, q_i, value, tf.pow(rho_i, 1/self.n_act), self.n_envs, self.n_steps, self.gamma) else: # strip off last step # f is a distribution, chosen to be Gaussian distributions # with fixed diagonal covariance and mean \phi(x) # in the paper distribution_f, f_polyak, q_value = \ map(lambda variables: strip(variables, self.n_envs, self.n_steps), [train_model.policy_proba, polyak_model.policy_proba, train_model.q_value]) # Get pi and q values for actions taken f_i = get_by_index(distribution_f, self.action_ph) f_i_ = distribution_f phi_i = distribution_f f_polyak_i = f_polyak q_i = get_by_index(q_value, self.action_ph) # Compute ratios for importance truncation rho = distribution_f / (self.mu_ph + eps) rho_i = get_by_index(rho, self.action_ph) # Calculate Q_retrace targets qret = q_retrace(self.reward_ph, self.done_ph, q_i, value, rho_i, self.n_envs, self.n_steps, self.gamma) # Calculate losses # Entropy entropy = tf.reduce_sum(train_model.proba_distribution.entropy()) # Policy Gradient loss, with truncated importance sampling & bias correction value = strip(value, self.n_envs, self.n_steps, True) # check_shape([qret, value, rho_i, f_i], [[self.n_envs * self.n_steps]] * 4) # check_shape([rho, distribution_f, q_value], [[self.n_envs * self.n_steps, self.n_act]] * 2) # Truncated importance sampling adv = qret - value log_f = tf.log(f_i + eps) # [n_envs * n_steps] gain_f = log_f * tf.stop_gradient(adv * tf.minimum(self.correction_term, rho_i)) loss_f = -tf.reduce_mean(gain_f) # Bias correction for the truncation adv_bc = (q_value - tf.reshape(value, [self.n_envs * self.n_steps, 1])) # [n_envs * n_steps, n_act] # check_shape([adv_bc, log_f_bc], [[self.n_envs * self.n_steps, self.n_act]] * 2) if continuous: gain_bc = tf.stop_gradient(adv_bc * tf.nn.relu(1.0 - (self.correction_term / (rho_i_ + eps))) * f_i_) else: log_f_bc = tf.log(f_i_ + eps) # / (f_old + eps) gain_bc = tf.reduce_sum(log_f_bc * tf.stop_gradient( adv_bc * tf.nn.relu(1.0 - (self.correction_term / (rho + eps))) * f_i_), axis=1) # IMP: This is sum, as expectation wrt f loss_bc = -tf.reduce_mean(gain_bc) loss_policy = loss_f + loss_bc # Value/Q function loss, and explained variance check_shape([qret, q_i], [[self.n_envs * self.n_steps]] * 2) explained_variance = q_explained_variance(tf.reshape(q_i, [self.n_envs, self.n_steps]), tf.reshape(qret, [self.n_envs, self.n_steps])) loss_q = tf.reduce_mean(tf.square(tf.stop_gradient(qret) - q_i) * 0.5) # Net loss check_shape([loss_policy, loss_q, entropy], [[]] * 3) loss = loss_policy + self.q_coef * loss_q - self.ent_coef * entropy tf.summary.scalar('entropy_loss', entropy) tf.summary.scalar('policy_gradient_loss', loss_policy) tf.summary.scalar('value_function_loss', loss_q) tf.summary.scalar('loss', loss) norm_grads_q, norm_grads_policy, avg_norm_grads_f = None, None, None avg_norm_k, avg_norm_g, avg_norm_k_dot_g, avg_norm_adj = None, None, None, None if self.trust_region: # [n_envs * n_steps, n_act] grad = tf.gradients(- (loss_policy - self.ent_coef * entropy) * self.n_steps * self.n_envs, phi_i) # [n_envs * n_steps, n_act] # Directly computed gradient of KL divergence wrt f kl_grad = - f_polyak_i / (f_i_ + eps) k_dot_g = tf.reduce_sum(kl_grad * grad, axis=-1) adj = tf.maximum(0.0, (tf.reduce_sum(kl_grad * grad, axis=-1) - self.delta) / ( tf.reduce_sum(tf.square(kl_grad), axis=-1) + eps)) # [n_envs * n_steps] # Calculate stats (before doing adjustment) for logging. avg_norm_k = avg_norm(kl_grad) avg_norm_g = avg_norm(grad) avg_norm_k_dot_g = tf.reduce_mean(tf.abs(k_dot_g)) avg_norm_adj = tf.reduce_mean(tf.abs(adj)) grad = grad - tf.reshape(adj, [self.n_envs * self.n_steps, 1]) * kl_grad # These are turst region adjusted gradients wrt f ie statistics of policy pi grads_f = -grad / (self.n_envs * self.n_steps) grads_policy = tf.gradients(f_i_, self.params, grads_f) grads_q = tf.gradients(loss_q * self.q_coef, self.params) grads = [gradient_add(g1, g2, param, verbose=self.verbose) for (g1, g2, param) in zip(grads_policy, grads_q, self.params)] avg_norm_grads_f = avg_norm(grads_f) * (self.n_steps * self.n_envs) norm_grads_q = tf.global_norm(grads_q) norm_grads_policy = tf.global_norm(grads_policy) else: grads = tf.gradients(loss, self.params) norm_grads = None if self.max_grad_norm is not None: grads, norm_grads = tf.clip_by_global_norm(grads, self.max_grad_norm) grads = list(zip(grads, self.params)) with tf.variable_scope("input_info", reuse=False): tf.summary.scalar('rewards', tf.reduce_mean(self.reward_ph)) tf.summary.scalar('learning_rate', tf.reduce_mean(self.learning_rate)) tf.summary.scalar('advantage', tf.reduce_mean(adv)) tf.summary.scalar('action_probabilty', tf.reduce_mean(self.mu_ph)) if self.full_tensorboard_log: tf.summary.histogram('rewards', self.reward_ph) tf.summary.histogram('learning_rate', self.learning_rate) tf.summary.histogram('advantage', adv) tf.summary.histogram('action_probabilty', self.mu_ph) if tf_util.is_image(self.observation_space): tf.summary.image('observation', train_model.obs_ph) else: tf.summary.histogram('observation', train_model.obs_ph) trainer = tf.train.RMSPropOptimizer(learning_rate=self.learning_rate_ph, decay=self.rprop_alpha, epsilon=self.rprop_epsilon) _opt_op = trainer.apply_gradients(grads) # so when you call _train, you first do the gradient step, then you apply ema with tf.control_dependencies([_opt_op]): _train = tf.group(ema_apply_op) # Ops/Summaries to run, and their names for logging assert norm_grads is not None run_ops = [_train, loss, loss_q, entropy, loss_policy, loss_f, loss_bc, explained_variance, norm_grads] names_ops = ['loss', 'loss_q', 'entropy', 'loss_policy', 'loss_f', 'loss_bc', 'explained_variance', 'norm_grads'] if self.trust_region: self.run_ops = run_ops + [norm_grads_q, norm_grads_policy, avg_norm_grads_f, avg_norm_k, avg_norm_g, avg_norm_k_dot_g, avg_norm_adj] self.names_ops = names_ops + ['norm_grads_q', 'norm_grads_policy', 'avg_norm_grads_f', 'avg_norm_k', 'avg_norm_g', 'avg_norm_k_dot_g', 'avg_norm_adj'] self.train_model = train_model self.step_model = step_model self.step = step_model.step self.proba_step = step_model.proba_step self.initial_state = step_model.initial_state tf.global_variables_initializer().run(session=self.sess) self.summary = tf.summary.merge_all()
def __init__(self, env): self.env = env tf.reset_default_graph() self.sess = tf.Session() # A few starter hyperparameters # hyperparameters self.gamma = 0.99 self.h1 = 64 self.h2 = 64 self.h3 = 64 self.l2_reg = 1e-6 self.max_episode_step = 1000 self.update_slow_target_every = 100 self.batch_size = 1024 self.eps_start = 1.0 self.epsilon_end = 0.05 self.epsilon_decay_length = 1e5 self.epsilon_decay_exp = 0.97 self.num_episodes = 0 self.num_steps = 0 self.epsilon_linear_step = ( self.eps_start - self.epsilon_end) / self.epsilon_decay_length # memory self.replay_memory = ReplayMemory(1e6) # Perhaps you want to have some samples in the memory before starting to train? self.min_replay_size = 2000 # define yours training operations here... self.observation_input = tf.placeholder( tf.float32, shape=[None] + list(self.env.observation_space.shape)) self.target_input = tf.placeholder( dtype=tf.float32, shape=[None] + list(self.env.observation_space.shape) ) # input to slow target network with tf.variable_scope('q_network') as scope: self.q_values = self.build_model(self.observation_input) with tf.variable_scope('target_network') as scope: self.target_q_values = self.build_model(self.observation_input, False) self.q_network_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='q_network') self.q_target_network_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='target_network') # update values for slowly-changing target network to match current critic network update_slow_target_ops = [] for i, slow_target_var in enumerate(self.q_target_network_vars): update_slow_target_op = slow_target_var.assign( self.q_network_vars[i]) update_slow_target_ops.append(update_slow_target_op) self.update_slow_target_op = tf.group(*update_slow_target_ops, name='update_slow_target') # define your update operations here... self.saver = tf.train.Saver(tf.trainable_variables()) self.target = tf.placeholder(tf.float32, shape=[None]) self.actions = tf.placeholder(shape=[None], dtype=tf.int32) #Calculating the action q value is taken from https://github.com/dennybritz/reinforcement-learning/tree/master/DQN gather_indices = tf.range(self.batch_size) * tf.shape( self.q_values)[1] + self.actions self.action_predictions = tf.gather(tf.reshape(self.q_values, [-1]), gather_indices) self.loss = tf.losses.huber_loss( self.target, self.action_predictions ) #tf.squared_difference(self.target, self.action_predictions) #Adding a regularization term for the weights for var in self.q_network_vars: if not 'bias' in var.name: self.loss += self.l2_reg * 0.5 * tf.nn.l2_loss(var) #self.loss = (self.target-self.action_predictions)**2 #self.losses = tf.reduce_mean(self.loss) self.minimizer = tf.train.AdamOptimizer(learning_rate=1e-6).minimize( self.loss ) #tf.train.GradientDescentOptimizer(1e-5).minimize(self.losses) self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter(LOGDIR) self.writer.add_graph(self.sess.graph) self.count = 0 # Summaries for Tensorboard tf.summary.scalar("loss", self.loss) #tf.summary.scalar("loss_hist", self.losses), tf.summary.histogram("q_values_hist", self.q_values), tf.summary.scalar("max_q_value", tf.reduce_max(self.q_values)) self.summ = tf.summary.merge_all()
def _build_graph(self): self.training = tf.placeholder_with_default(False, shape=(), name='is_training') self.input_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name='input_ids') self.input_mask = tf.placeholder(shape=[None, None], dtype=tf.int32, name="input_mask") self.segment_ids = tf.placeholder(shape=[None, None], dtype=tf.int32, name="segment_ids") self.y = tf.placeholder(tf.int32, [None]) self.bert_embedding = BertEmbedding(self.bert_dir) _, output_layer = self.bert_embedding(input_ids=self.input_ids, input_mask=self.input_mask, segment_ids=self.segment_ids, is_training=self.training, return_pool_output=True, use_fp16=self.use_fp16) hidden_size = output_layer.shape[-1].value # output_weights = tf.get_variable( # "output_weights", [self.num_class, hidden_size], # initializer=tf.truncated_normal_initializer(stddev=0.02)) # # output_bias = tf.get_variable( # "output_bias", [self.num_class], initializer=tf.zeros_initializer()) dropout = Dropout(0.9) output_layer = dropout(output_layer, self.training) #add cnn layer pooled = [] for idx, kernel_size in enumerate(self.filter_sizes1): con1d = tf.layers.conv1d(output_layer, self.filter_nums1[idx], kernel_size, padding='same', activation=tf.nn.relu, name='conv1d-%d' % (idx)) pooled_conv = tf.reduce_max(con1d, axis=1) pooled.append(pooled_conv) merge = tf.concat(pooled, axis=1) merge = dropout(merge, self.training) merge = tf.layers.dense(merge, 128, activation=tf.nn.tanh, name='dense1') # merge=tf.layers.batch_normalization(inputs=merge) merge = dropout(merge, self.training) logits = tf.layers.dense(merge, self.num_class, activation=None, use_bias=False) # if is_training: # # I.e., 0.1 dropout # output_layer = tf.nn.dropout(output_layer, keep_prob=0.9,) # logits = tf.matmul(output_layer, output_weights, transpose_b=True) # logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1, name="probs") log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(self.y, depth=self.num_class, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) self.loss = tf.reduce_mean(per_example_loss) self.probs = probabilities self.input_placeholder_dict = OrderedDict({ "input_ids": self.input_ids, "segment_ids": self.segment_ids, "labels": self.y, "input_mask": self.input_mask, "training": self.training }) self.output_variable_dict = OrderedDict({ "predict": tf.argmax(self.probs, axis=1), "probabilities": probabilities }) # 8. Metrics and summary with tf.variable_scope("train_metrics"): self.train_metrics = {'loss': tf.metrics.mean(self.loss)} self.train_update_metrics = tf.group( *[op for _, op in self.train_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="train_metrics") self.train_metric_init_op = tf.variables_initializer(metric_variables) with tf.variable_scope("eval_metrics"): self.eval_metrics = {'loss': tf.metrics.mean(self.loss)} self.eval_update_metrics = tf.group( *[op for _, op in self.eval_metrics.values()]) metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES, scope="eval_metrics") self.eval_metric_init_op = tf.variables_initializer(metric_variables) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def get_tfrecord_path(self, mode): return os.path.join(self.data_path, mode + ".tfr") if __name__ == '__main__': pars_wsj = ParseWSJ('../../data/wsj') pars_wsj.build_tfrecords("train") pars_wsj.build_tfrecords("dev") pars_wsj.build_tfrecords("test") batch_size = 10 dataset = tf.data.TFRecordDataset(pars_wsj.get_tfrecord_path(mode="train")) dataset = dataset.map(pars_wsj.parse_examples) dataset = dataset.padded_batch(batch_size, padded_shapes=pars_wsj.get_padded_shapes()) iterator = dataset.make_initializable_iterator() example = iterator.get_next() inputs, targets, inputs_length, targets_length = example global_step = tf.train.get_or_create_global_step() scaffold = tf.train.Scaffold(local_init_op=tf.group( tf.local_variables_initializer(), iterator.initializer)) with tf.train.MonitoredTrainingSession(checkpoint_dir='logs/tests', scaffold=scaffold) as sess: inp, targ, tag_len = \ sess.run([inputs, targets, targets_length]) print(pars_wsj.decode(inp[0], pars_wsj.input_id2word)) print(pars_wsj.decode(targ[0], pars_wsj.target_id2word)) print(tag_len)
def build_train_imitation(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=False, scope="deepq", reuse=None, param_noise=False, param_noise_filter_func=None): """Creates the train function: Parameters ---------- make_obs_ph: str -> tf.placeholder or TfInput a function that takes a name and creates a placeholder of input with that name q_func: (tf.Variable, int, str, bool) -> tf.Variable the model that takes the following inputs: observation_in: object the output of observation placeholder num_actions: int number of actions scope: str reuse: bool should be passed to outer variable scope and returns a tensor of shape (batch_size, num_actions) with values of every action. num_actions: int number of actions reuse: bool whether or not to reuse the graph variables optimizer: tf.train.Optimizer optimizer to use for the Q-learning objective. grad_norm_clipping: float or None clip gradient norms to this value. If None no clipping is performed. gamma: float discount rate. double_q: bool if true will use Double Q Learning (https://arxiv.org/abs/1509.06461). In general it is a good idea to keep it enabled. scope: str or VariableScope optional scope for variable_scope. reuse: bool or None whether or not the variables should be reused. To be able to reuse the scope must be given. param_noise: bool whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905) param_noise_filter_func: tf.Variable -> bool function that decides whether or not a variable should be perturbed. Only applicable if param_noise is True. If set to None, default_param_noise_filter is used by default. Returns ------- act: (tf.Variable, bool, float) -> tf.Variable function to select and action given observation. ` See the top of the file for details. train: (object, np.array, np.array, object, np.array, np.array) -> np.array``` optimize the error in Bellman's equation. ` See the top of the file for details. update_target: () -> () copy the parameters from optimized Q function to the target Q function. ` See the top of the file for details. debug: {str: function} a bunch of functions to print debug data like q_values. """ if param_noise: act_f = build_act_with_param_noise( make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse, param_noise_filter_func=param_noise_filter_func) else: act_f = build_act_imitation(make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse) with tf.variable_scope(scope, reuse=reuse): # set up placeholders obs_t_input = make_obs_ph("obs_t") act_t_ph = tf.placeholder(tf.int32, [None], name="action") rew_t_ph = tf.placeholder(tf.float32, [None], name="reward") obs_tp1_input = make_obs_ph("obs_tp1") done_mask_ph = tf.placeholder(tf.float32, [None], name="done") importance_weights_ph = tf.placeholder(tf.float32, [None], name="weight") # q network evaluation q_t = q_func(obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") # target q network evalution q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func") target_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") # q scores for actions which we know were selected in the given state. q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions), 1) # Q(s,a;θi) # compute estimate of best possible value starting from state at t + 1 if double_q: q_tp1_using_online_net = q_func(obs_tp1_input.get(), num_actions, scope="q_func", reuse=True) q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1) q_tp1_best = tf.reduce_sum( q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions), 1) else: q_tp1_best = tf.reduce_max(q_tp1, 1) q_tp1_best_masked = (1.0 - done_mask_ph) * q_tp1_best # maxQ(s',a';θi-) # compute RHS of bellman equation q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked # compute the error (potentially clipped) td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) errors = U.huber_loss(td_error) weighted_error = tf.reduce_mean(importance_weights_ph * errors) # compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(weighted_error, var_list=q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) optimize_expr = optimizer.apply_gradients(gradients) else: optimize_expr = optimizer.minimize(weighted_error, var_list=q_func_vars) # -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-! OBSERVER !-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- # TED's set up placeholders ment_obs_t_input = make_obs_ph("ment_obs_t") ment_act_t_ph = tf.placeholder(tf.int32, [None], name="ment_action") ment_obs_tp1_input = make_obs_ph("ment_obs_tp1") old_error_ph = tf.placeholder(tf.float32, shape=[None], name="old_error") old_imp_weights_ph = tf.placeholder(tf.float32, [None], name="old_imp_weights") # TED's q network evaluation aug_q_t = q_func(obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act aug_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") # TED's target q network evalution aug_q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func", reuse=True) aug_target_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") # TED's q scores for actions which we know were selected in the given state. aug_q_t_selected = tf.reduce_sum( aug_q_t * tf.one_hot(act_t_ph, num_actions), 1) # Q(s,a;θi) aug_q_tp1_selected = tf.reduce_sum( q_tp1 * tf.one_hot(ment_act_t_ph, num_actions), 1) # Q(s',am;θi) aug_q_tp1_selected_masked = (1.0 - done_mask_ph) * aug_q_tp1_selected # TED's compute estimate of best possible value starting from state at t + 1 if double_q: aug_q_tp1_using_online_net = q_func(obs_tp1_input.get(), num_actions, scope="q_func", reuse=True) aug_q_tp1_best_using_online_net = tf.argmax( aug_q_tp1_using_online_net, 1) aug_q_tp1_best = tf.reduce_sum( aug_q_tp1 * tf.one_hot(aug_q_tp1_best_using_online_net, num_actions), 1) else: aug_q_tp1_best = tf.reduce_max(aug_q_tp1, 1) aug_q_tp1_best_masked = ( 1.0 - done_mask_ph) * aug_q_tp1_best # maxQ(s',a';θi-) # TED's compute RHS of bellman equation aug_q_t_selected_target = rew_t_ph + gamma * tf.maximum( aug_q_tp1_best_masked, aug_q_tp1_selected_masked) # aug_q_t_selected_target = rew_t_ph + gamma * aug_q_tp1_best_masked # TED's compute the error (potentially clipped) aug_td_error = aug_q_t_selected - tf.stop_gradient( aug_q_t_selected_target) aug_errors = U.huber_loss(aug_td_error) aug_weighted_error = tf.reduce_mean(importance_weights_ph * aug_errors) # aug_weighted_error = tf.Print(aug_weighted_error, [tf.shape(importance_weights_ph)], "AGENT WEIGHTED ERROR: ") # TED's compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(aug_weighted_error, var_list=aug_q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) aug_optimize_expr = optimizer.apply_gradients(gradients) else: aug_optimize_expr = optimizer.minimize(aug_weighted_error, var_list=aug_q_func_vars) # -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-! OBSERVER !-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- # -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- MENTOR -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- # TED's mentor's q network evaluation ment_q_t = q_func(ment_obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act ment_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") # TED's mentor's target q network evalution ment_q_tp1 = q_func(ment_obs_tp1_input.get(), num_actions, scope="target_q_func", reuse=True) ment_target_q_func_vars = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") # TED's mentor's q scores for action am which we know was selected in the given state. ment_q_t_selected = tf.reduce_sum( ment_q_t * tf.one_hot(ment_act_t_ph, num_actions), 1) # Q(sm,am;θi) ment_q_tp1_selected = tf.reduce_sum( ment_q_tp1 * tf.one_hot(ment_act_t_ph, num_actions), 1) # Q(sm',am;θi-) ment_q_tp1_selected_masked = (1.0 - done_mask_ph) * ment_q_tp1_selected # TED's compute estimate of best possible value starting from state at t + 1 if double_q: ment_q_tp1_using_online_net = q_func(ment_obs_tp1_input.get(), num_actions, scope="q_func", reuse=True) ment_q_tp1_best_using_online_net = tf.argmax( ment_q_tp1_using_online_net, 1) ment_q_tp1_best = tf.reduce_sum( ment_q_tp1 * tf.one_hot(ment_q_tp1_best_using_online_net, num_actions), 1) else: ment_q_tp1_best = tf.reduce_max(ment_q_tp1, 1) ment_q_tp1_best_masked = ( 1.0 - done_mask_ph) * ment_q_tp1_best # maxQ(sm',a';θi-) # TED's compute RHS of bellman equation ment_q_t_selected_target = rew_t_ph + gamma * tf.maximum( ment_q_tp1_best_masked, ment_q_tp1_selected_masked) # TED's compute the error (potentially clipped) ment_td_error = ment_q_t_selected - tf.stop_gradient( ment_q_t_selected_target) ment_errors = U.huber_loss(ment_td_error) ment_weighted_error = tf.reduce_mean(importance_weights_ph * ment_errors) # ment_weighted_error = tf.Print(ment_weighted_error, [tf.shape(importance_weights_ph)], "MENTOR WEIGHTED ERROR: ") # TED's compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(ment_weighted_error, var_list=ment_q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) ment_optimize_expr = optimizer.apply_gradients(gradients) else: ment_optimize_expr = optimizer.minimize(ment_weighted_error, var_list=ment_q_func_vars) # -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- MENTOR -!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!-!- def temp_func1(): return aug_td_error, aug_optimize_expr # return td_error, optimize_expr def temp_func2(): return ment_td_error, ment_optimize_expr old_errors = U.huber_loss(old_error_ph) old_weighted_error = tf.reduce_mean(old_imp_weights_ph * old_errors) final_td_error, final_optimize_expr = tf.cond( tf.greater((ment_weighted_error - old_weighted_error)**2, (aug_weighted_error - old_weighted_error)**2), temp_func1, temp_func2) # update_target_fn will be called periodically to copy Q network to target Q network update_target_expr = [] for var, var_target in zip( sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_target.assign(var)) update_target_expr = tf.group(*update_target_expr) # Create callable functions train = U.function(inputs=[ obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph, importance_weights_ph ], outputs=td_error, updates=[optimize_expr]) update_target = U.function([], [], updates=[update_target_expr]) q_values = U.function([obs_t_input], q_t) # TED's create callable functions trainAugmented = U.function(inputs=[ obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph, importance_weights_ph, ment_obs_t_input, ment_obs_tp1_input, ment_act_t_ph, old_error_ph, old_imp_weights_ph ], outputs=final_td_error, updates=[final_optimize_expr]) return act_f, train, trainAugmented, update_target, { 'q_values': q_values }