def _build_train(self): print("-" * 80) print("Build train graph") print(self.x_train) logits = self._model(self.x_train, is_training=True) """ # CIFAR10 to chess modification log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train ) """ print("@@@@@@@@@@@@@@@@@@@@@@@@") print(logits) print(self.y_train) print("@@@@@@@@@@@@@@@@@@@@@@@@") #log_probs = tf.keras.backend.categorical_crossentropy(target=logits, output=self.y_train, axis=1) log_probs = tf.keras.losses.MSE(logits, self.y_train) self.loss = tf.reduce_mean(log_probs) self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.cast(self.train_preds, tf.float32) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.cast(self.train_acc, tf.int32) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("Model has {} params".format(self.num_vars)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, )
def _build_train(self): print("-" * 80) print("Build train graph") logits = self._model(self.x_train, is_training=True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) #self._weight_transfer_loss() if self.use_aux_heads: log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.aux_logits, labels=self.y_train) self.aux_loss = tf.reduce_mean(log_probs) train_loss = self.loss + 0.4 * self.aux_loss else: train_loss = self.loss self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if (var.name.startswith(self.name) and "aux_head" not in var.name) ] for var in tf_variables: print(var) #print ('tf_variables!!!!!!!!') #print (tf_variables) self.num_vars = count_model_params(tf_variables) print("Model has {0} params".format(self.num_vars)) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( train_loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def main(_): print("-" * 80) if not os.path.isdir(FLAGS.output_dir): print("Path {0} does not exist. Creating.".format(FLAGS.output_dir)) os.makedirs(FLAGS.output_dir) elif FLAGS.reset_output_dir: print("Path {0} exists. Remove and remake.".format(FLAGS.output_dir)) shutil.rmtree(FLAGS.output_dir) os.makedirs(FLAGS.output_dir) print_user_flags() hparams = Hparams() images, labels = read_data(FLAGS.data_path) g = tf.Graph() with g.as_default(): ops = get_ops(images, labels) # count model variables tf_variables = tf.trainable_variables() num_params = count_model_params(tf_variables) print("-" * 80) print("Starting session") config = tf.ConfigProto(allow_soft_placement=True) with tf.train.SingularMonitoredSession( config=config, checkpoint_dir=FLAGS.output_dir) as sess: # training loop print("-" * 80) print("Starting training") for step in range(1, hparams.train_steps + 1): sess.run(ops["train_op"]) if step % FLAGS.log_every == 0: global_step, train_loss, valid_acc = sess.run([ ops["global_step"], ops["train_loss"], ops["valid_acc"], ]) log_string = "" log_string += "step={0:<6d}".format(step) log_string += " loss={0:<5.2f}".format(train_loss) log_string += " val_acc={0:<3d}/{1:<3d}".format( valid_acc, hparams.eval_batch_size) print(log_string) sys.stdout.flush() # final test print("-" * 80) print("Training done. Eval on TEST set") num_corrects = 0 for _ in range(10000 // hparams.eval_batch_size): num_corrects += sess.run(ops["test_acc"]) print("test_accuracy: {0:>5d}/10000".format(num_corrects))
def _build_train(self): print("-" * 80) print("Build train graph") self.output, self.layers = output, layers = self._model( self.x_train, is_training=True) # update loss to SSE label_onehot = tf.cast(tf.one_hot(self.y_train, 10), tf.float32) with tf.name_scope('loss'): # TODO: change to reduce_mean? self.loss = 0.5 * tf.reduce_sum(tf.square(label_onehot - output)) self.train_preds = tf.argmax(output, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("Model has {} params".format(self.num_vars)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.grads, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, bitsW=self.bitsW, bitsG=self.bitsG, is_child=True)
def _build_train(self): print("Build train graph") all_h, self.train_reset = self._model(self.x_train, True, False) log_probs = self._get_log_probs(all_h, self.y_train, batch_size=self.batch_size, is_training=True) self.loss = tf.reduce_sum(log_probs) / tf.to_float(self.batch_size) self.train_ppl = tf.exp(tf.reduce_mean(log_probs)) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("-" * 80) print("Model has {} parameters".format(self.num_vars)) loss = self.loss if self.rnn_l2_reg is not None: loss += (self.rnn_l2_reg * tf.reduce_sum(all_h**2) / tf.to_float(self.batch_size)) if self.rnn_slowness_reg is not None: loss += (self.rnn_slowness_reg * self.all_h_diff / tf.to_float(self.batch_size)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") (self.train_op, self.lr, self.grad_norm, self.optimizer, self.grad_norms) = get_train_ops( loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_warmup_val=self.lr_warmup_val, lr_warmup_steps=self.lr_warmup_steps, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_dec_min=self.lr_dec_min, optim_algo=self.optim_algo, moving_average=self.optim_moving_average, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, get_grad_norms=True, )
def _build_train(self): print("-" * 80) print("Build train graph") output = self._model(self.x_train, is_training=True) target = (self.y_train - 127) / 127 self.loss = tf.reduce_mean( tf.losses.absolute_difference(target, output)) train_loss = self.loss self.train_psnr = psnr(self.y_train, output) tf.summary.scalar('loss', self.loss) output = output * 127 + 127 output = tf.clip_by_value(output, 0, 255) input_img = self.x_train*127 + 127 bicubic_img = tf.image.resize_bicubic(input_img, [128, 128]) tf.summary.image("output", tf.cast(output, tf.uint8)) tf.summary.image("target", tf.cast(self.y_train, tf.uint8)) tf.summary.image("input", tf.cast(input_img, tf.uint8)) tf.summary.image("bicubic", tf.cast(bicubic_img, tf.uint8)) tf_variables = [ var for var in tf.trainable_variables() if ( var.name.startswith(self.name) and "aux_head" not in var.name)] self.num_vars = count_model_params(tf_variables) print("Model has {0} params".format(self.num_vars)) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( train_loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) tf.summary.scalar('lr', self.lr) self.summaries = tf.summary.merge_all()
def _build_train(self): print("-" * 80) print("Build train graph") logits = self._model(self.x_train, is_training=True) log_probs = tf.nn.sigmoid_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) outs = tf.nn.sigmoid(logits) self.train_preds = tf.greater_equal(outs, tf.constant(0.5)) self.train_preds = tf.to_int32(self.train_preds) self.y_train = tf.to_int32(self.y_train) self.soft_acc_count = tf.count_nonzero(tf.equal( self.train_preds, self.y_train), axis=1) self.train_acc = tf.to_int32(tf.equal(self.soft_acc_count, 6)) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("Model has {} params".format(self.num_vars)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def _build_train(self): print("Build train graph") if self.use_model == "SRCNN": self.train_preds = self._model_srcnn(self.x_train, True) elif self.use_model == "RDN": self.train_preds = self._model_RDN(self.x_train, True) else: self.train_preds = self._model(self.x_train, True) self.loss = tf.losses.mean_squared_error(labels=self.y_train, predictions=self.train_preds) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("-" * 80) for var in tf_variables: print(var) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_warmup_steps=self.lr_warmup_steps, lr_warmup_val=self.lr_warmup_val, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo # sync_replicas=self.sync_replicas, # num_aggregate=self.num_aggregate, # num_replicas=self.num_replicas )
def _build_train(self): print "Build train graph" logits = self._model(self.x_train, True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print "-" * 80 for var in tf_variables: print var self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def _model(self, images, is_training, reuse=False): """Compute the logits given the images.""" if self.fixed_arc is None: is_training = True with tf.variable_scope(self.name, reuse=reuse): # the first two inputs with tf.variable_scope("stem_conv"): w = create_weight("w", [3, 3, 3, self.out_filters * 3]) x = tf.nn.conv2d(images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) x = batch_norm(x, is_training, data_format=self.data_format) if self.data_format == "NHCW": split_axis = 3 elif self.data_format == "NCHW": split_axis = 1 else: raise ValueError("Unknown data_format '{0}'".format( self.data_format)) layers = [x, x] # building layers in the micro space out_filters = self.out_filters for layer_id in range(self.num_layers + 2): with tf.variable_scope("layer_{0}".format(layer_id)): if layer_id not in self.pool_layers: if self.fixed_arc is None: x = self._enas_layer(layer_id, layers, self.normal_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.normal_arc, out_filters, 1, is_training, normal_or_reduction_cell="normal") else: out_filters *= 2 if self.fixed_arc is None: x = self._factorized_reduction( x, out_filters, 2, is_training) layers = [layers[0], x] x = self._enas_layer(layer_id, layers, self.reduce_arc, out_filters) else: x = self._fixed_layer( layer_id, layers, self.reduce_arc, out_filters, 2, is_training, normal_or_reduction_cell="reduction") print("Layer {0:>2d}: {1}".format(layer_id, x)) layers = [layers[-1], x] # auxiliary heads self.num_aux_vars = 0 if (self.use_aux_heads and layer_id in self.aux_head_indices and is_training): print("Using aux_head at layer {0}".format(layer_id)) with tf.variable_scope("aux_head"): aux_logits = tf.nn.relu(x) aux_logits = tf.layers.average_pooling2d( aux_logits, [5, 5], [3, 3], "VALID", data_format=self.actual_data_format) with tf.variable_scope("proj"): inp_c = self._get_C(aux_logits) w = create_weight("w", [1, 1, inp_c, 128]) aux_logits = tf.nn.conv2d( aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm( aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("avg_pool"): inp_c = self._get_C(aux_logits) hw = self._get_HW(aux_logits) w = create_weight("w", [hw, hw, inp_c, 768]) aux_logits = tf.nn.conv2d( aux_logits, w, [1, 1, 1, 1], "SAME", data_format=self.data_format) aux_logits = batch_norm( aux_logits, is_training=True, data_format=self.data_format) aux_logits = tf.nn.relu(aux_logits) with tf.variable_scope("fc"): aux_logits = global_avg_pool( aux_logits, data_format=self.data_format) inp_c = aux_logits.get_shape()[1].value w = create_weight("w", [inp_c, 10]) aux_logits = tf.matmul(aux_logits, w) self.aux_logits = aux_logits aux_head_variables = [ var for var in tf.trainable_variables() if (var.name.startswith(self.name) and "aux_head" in var.name) ] self.num_aux_vars = count_model_params(aux_head_variables) print("Aux head uses {0} params".format(self.num_aux_vars)) x = tf.nn.relu(x) x = global_avg_pool(x, data_format=self.data_format) if is_training and self.keep_prob is not None and self.keep_prob < 1.0: x = tf.nn.dropout(x, self.keep_prob) with tf.variable_scope("fc"): inp_c = self._get_C(x) w = create_weight("w", [inp_c, 10]) x = tf.matmul(x, w) return x