def __init__(self, batch_loader, devices, optimizer, config): self.dic = {} self.loss = 0 outputs = [] self.config = config tower_grads = [] n_gpus = len(devices) x, y = batch_loader.get_batch() inputs = tf.split(axis=0, num_or_size_splits=n_gpus, value=x) labels = tf.split(axis=0, num_or_size_splits=n_gpus, value=y) for i in range(n_gpus): with tf.device(devices[i]): with tf.variable_scope('UNet'): print(devices[i]) try: outputs.append(self.build_tower(inputs[i])) except: tf.get_variable_scope().reuse_variables() outputs.append(self.build_tower(inputs[i])) loss = tf.reduce_mean(tf.square(outputs[-1] - labels[i])) if config.is_training: tower_grads.append(optimizer.compute_gradients(loss)) self.train_step = optimizer.apply_gradients(ops.average_gradients(tower_grads)) self.loss += loss / n_gpus tf.get_variable_scope().reuse_variables() self.output = tf.concat(outputs, axis=0) if config.is_training: self.train_step = optimizer.apply_gradients(ops.average_gradients(tower_grads))
def _prepare_training(self): """Prepare Training Make tensorflow's graph. To support Multi-GPU, divide mini-batch. And this program has resume function. If there is checkpoint file in FLAGS.gan_dir/log, load checkpoint file and restart training. """ assert FLAGS.batch_size >= FLAGS.style_ids_n, 'batch_size must be greater equal than style_ids_n' self.gpu_n = len(FLAGS.gpu_ids.split(',')) self.embedding_chars = set_chars_type(FLAGS.chars_type) assert self.embedding_chars != [], 'embedding_chars is empty' self.char_embedding_n = len(self.embedding_chars) self.z_size = FLAGS.style_z_size + self.char_embedding_n with tf.device('/cpu:0'): # Set embeddings from uniform distribution style_embedding_np = np.random.uniform( -1, 1, (FLAGS.style_ids_n, FLAGS.style_z_size)).astype(np.float32) with tf.variable_scope('embeddings'): self.style_embedding = tf.Variable(style_embedding_np, name='style_embedding') self.style_ids = tf.placeholder(tf.int32, (FLAGS.batch_size, ), name='style_ids') self.char_ids = tf.placeholder(tf.int32, (FLAGS.batch_size, ), name='char_ids') self.is_train = tf.placeholder(tf.bool, name='is_train') self.real_imgs = tf.placeholder(tf.float32, (FLAGS.batch_size, FLAGS.img_width, FLAGS.img_height, FLAGS.img_dim), name='real_imgs') self.labels = tf.placeholder( tf.float32, (FLAGS.batch_size, self.char_embedding_n), name='labels') d_opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0., beta2=0.9) g_opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0., beta2=0.9) # Initialize lists for multi gpu fake_imgs = [0] * self.gpu_n d_loss = [0] * self.gpu_n g_loss = [0] * self.gpu_n d_grads = [0] * self.gpu_n g_grads = [0] * self.gpu_n divided_batch_size = FLAGS.batch_size // self.gpu_n is_not_first = False # Build graph for i in range(self.gpu_n): batch_start = i * divided_batch_size batch_end = (i + 1) * divided_batch_size with tf.device('/gpu:{}'.format(i)): if FLAGS.arch == 'DCGAN': generator = GeneratorDCGAN(img_size=(FLAGS.img_width, FLAGS.img_height), img_dim=FLAGS.img_dim, z_size=self.z_size, layer_n=4, k_size=3, smallest_hidden_unit_n=64, is_bn=False) discriminator = DiscriminatorDCGAN( img_size=(FLAGS.img_width, FLAGS.img_height), img_dim=FLAGS.img_dim, layer_n=4, k_size=3, smallest_hidden_unit_n=64, is_bn=False) elif FLAGS.arch == 'ResNet': generator = GeneratorResNet(k_size=3, smallest_unit_n=64) discriminator = DiscriminatorResNet(k_size=3, smallest_unit_n=64) # If sum of (style/char)_ids is less than -1, z is generated from uniform distribution style_z = tf.cond( tf.less( tf.reduce_sum(self.style_ids[batch_start:batch_end]), 0), lambda: tf.random_uniform( (divided_batch_size, FLAGS.style_z_size), -1, 1), lambda: tf.nn.embedding_lookup( self.style_embedding, self.style_ids[batch_start: batch_end])) char_z = tf.one_hot(self.char_ids[batch_start:batch_end], self.char_embedding_n) z = tf.concat([style_z, char_z], axis=1) # Generate fake images fake_imgs[i] = generator(z, is_reuse=is_not_first, is_train=self.is_train) # Calculate loss d_real = discriminator(self.real_imgs[batch_start:batch_end], is_reuse=is_not_first, is_train=self.is_train) d_fake = discriminator(fake_imgs[i], is_reuse=True, is_train=self.is_train) d_loss[i] = -(tf.reduce_mean(d_real) - tf.reduce_mean(d_fake)) g_loss[i] = -tf.reduce_mean(d_fake) # Calculate gradient Penalty epsilon = tf.random_uniform((divided_batch_size, 1, 1, 1), minval=0., maxval=1.) interp = self.real_imgs[batch_start:batch_end] + epsilon * ( fake_imgs[i] - self.real_imgs[batch_start:batch_end]) d_interp = discriminator(interp, is_reuse=True, is_train=self.is_train) grads = tf.gradients(d_interp, [interp])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(grads), reduction_indices=[-1])) grad_penalty = tf.reduce_mean((slopes - 1.)**2) d_loss[i] += 10 * grad_penalty # Get trainable variables d_vars = [ var for var in tf.trainable_variables() if 'discriminator' in var.name ] g_vars = [ var for var in tf.trainable_variables() if 'generator' in var.name ] d_grads[i] = d_opt.compute_gradients(d_loss[i], var_list=d_vars) g_grads[i] = g_opt.compute_gradients(g_loss[i], var_list=g_vars) is_not_first = True with tf.device('/cpu:0'): self.fake_imgs = tf.concat(fake_imgs, axis=0) avg_d_grads = average_gradients(d_grads) avg_g_grads = average_gradients(g_grads) self.d_train = d_opt.apply_gradients(avg_d_grads) self.g_train = g_opt.apply_gradients(avg_g_grads) # Calculate summary for tensorboard tf.summary.scalar('d_loss', -(sum(d_loss) / len(d_loss))) tf.summary.scalar('g_loss', -(sum(g_loss) / len(g_loss))) self.summary = tf.summary.merge_all() # Setup session sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions( visible_device_list=FLAGS.gpu_ids)) self.sess = tf.Session(config=sess_config) self.saver = tf.train.Saver(max_to_keep=5) # If checkpoint is found, restart training checkpoint = tf.train.get_checkpoint_state(self.dst_log) if checkpoint: saver_resume = tf.train.Saver() saver_resume.restore(self.sess, checkpoint.model_checkpoint_path) self.epoch_start = int( checkpoint.model_checkpoint_path.split('-')[-1]) print('restore ckpt') else: self.sess.run(tf.global_variables_initializer()) self.epoch_start = 0 # Setup writer for tensorboard self.writer = tf.summary.FileWriter(self.dst_log)
def model(self, inp_x, inp_y, time_x, time_y): # want to generate [inp_x, time_y] -> inp_y G_opt_grads = [] D_Y_opt_grads = [] F_opt_grads = [] D_X_opt_grads = [] fake_x_list = tf.split(self.fake_x, len(self.use_gpu)) fake_y_list = tf.split(self.fake_y, len(self.use_gpu)) inp_x_list = tf.split(inp_x, len(self.use_gpu)) inp_y_list = tf.split(inp_y, len(self.use_gpu)) time_x_list = tf.split(time_x, len(self.use_gpu)) time_y_list = tf.split(time_y, len(self.use_gpu)) tot_G_loss = 0. tot_F_loss = 0. tot_D_Y_loss = 0. tot_D_X_loss = 0. fake_xs = [] fake_ys = [] with tf.variable_scope(tf.get_variable_scope()): for i, gpu_id in enumerate(self.use_gpu): print('Initializing graph on gpu %i' % gpu_id) with tf.device('/gpu:%d' % gpu_id): pooled_fake_x = fake_x_list[ i] # already has the date appended pooled_fake_y = fake_y_list[ i] # already has the date appended inp_x = inp_x_list[i] inp_y = inp_y_list[i] time_x = time_x_list[i] time_y = time_y_list[i] inp_shapes = tf.shape(inp_x) tiled_time_x = tf.expand_dims( tf.expand_dims(time_x, axis=1), axis=1) * tf.ones( (inp_shapes[0], inp_shapes[1], inp_shapes[2], 25)) tiled_time_y = tf.expand_dims( tf.expand_dims(time_y, axis=1), axis=1) * tf.ones( (inp_shapes[0], inp_shapes[1], inp_shapes[2], 25)) # Real Examples inp_x_time_x = tf.concat([inp_x, tiled_time_x], axis=-1) inp_y_time_y = tf.concat([inp_y, tiled_time_y], axis=-1) # X -> Y fake_y = self.G(inp_x, tiled_time_y) fake_y_time_y = tf.concat([fake_y, tiled_time_y], axis=-1) fake_ys.append(fake_y_time_y) G_gan_loss = self.generator_loss(self.D_Y, fake_y_time_y, use_lsgan=self.use_lsgan) D_Y_loss = self.discriminator_loss( self.D_Y, inp_y_time_y, pooled_fake_y, use_lsgan=self.use_lsgan) # Y -> X fake_x = self.F(inp_y, tiled_time_x) fake_x_time_x = tf.concat([fake_x, tiled_time_x], axis=-1) fake_xs.append(fake_x_time_x) F_gan_loss = self.generator_loss(self.D_X, fake_x_time_x, use_lsgan=self.use_lsgan) D_X_loss = self.discriminator_loss( self.D_X, inp_x_time_x, pooled_fake_x, use_lsgan=self.use_lsgan) cycle_loss = self.cycle_consistency_loss( self.G, self.F, inp_x, inp_y, fake_x, fake_y, tiled_time_x, tiled_time_y) F_loss = F_gan_loss + cycle_loss G_loss = G_gan_loss + cycle_loss tot_F_loss += F_loss tot_D_X_loss += D_X_loss tot_G_loss += G_loss tot_D_Y_loss += D_Y_loss tf.get_variable_scope().reuse_variables() G_opt_grads.append( self.opt.compute_gradients(G_loss, var_list=self.G.variables)) F_opt_grads.append( self.opt.compute_gradients(F_loss, var_list=self.F.variables)) D_Y_opt_grads.append( self.opt.compute_gradients( D_Y_loss, var_list=self.D_Y.variables)) D_X_opt_grads.append( self.opt.compute_gradients( D_X_loss, var_list=self.D_X.variables)) G_grads = ops.average_gradients(G_opt_grads) F_grads = ops.average_gradients(F_opt_grads) D_Y_grads = ops.average_gradients(D_Y_opt_grads) D_X_grads = ops.average_gradients(D_X_opt_grads) G_ts = self.opt.apply_gradients(G_grads, global_step=self.global_step) F_ts = self.opt.apply_gradients(F_grads, global_step=self.global_step) D_Y_ts = self.opt.apply_gradients(D_Y_grads, global_step=self.global_step) D_X_ts = self.opt.apply_gradients(D_X_grads, global_step=self.global_step) with tf.control_dependencies([G_ts, D_Y_ts, F_ts, D_X_ts]): ts = tf.no_op(name='optimizers') with tf.control_dependencies([G_ts, F_ts]): gen_ts = tf.no_op(name='gen_optimizers') fake_x = tf.concat(fake_xs, axis=0) fake_y = tf.concat(fake_ys, axis=0) num_gpu = max(1., len(self.use_gpu) + 0.) return (tot_G_loss / num_gpu, tot_D_Y_loss / num_gpu, tot_F_loss / num_gpu, tot_D_X_loss / num_gpu, fake_y, fake_x, ts, gen_ts)
def build(self): """ Builds a multi-tower model """ with tf.device('/cpu:0'): assert self.batch_size % self.num_gpus == 0, ( 'Batch size must be divisible by number of GPUs') with tf.name_scope('Input_splits'): tower_inputs = [[] for i in range(self.num_gpus)] for inp in self.Inputs: splits = tf.split(inp, self.num_gpus, name=inp.name[:-2]) for i, s in enumerate(splits): tower_inputs[i].append(s) tower_outputs = [] tower_losses = [] tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in range(self.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % ('tower', i)) as scope: # Calculate the loss for one tower of the model. This function # constructs the entire model but shares the variables across # all towers. outputs, losses, grads = self._build_train_tower( tower_inputs[i], int(self.batch_size / self.num_gpus), reuse=i > 0 or self.model_built) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Save summaries from tower_1 if i == 0: summaries = tf.get_collection( tf.GraphKeys.SUMMARIES, scope) tower_outputs.append(outputs) tower_losses.append(losses) tower_grads.append(grads) with tf.name_scope('Concat_outputs'): outputs = [[] for _ in tower_outputs[0]] for t_outputs in tower_outputs: for i, output in enumerate(t_outputs): outputs[i].append(output) self.outputs = [] for outs in outputs: self.outputs.append(tf.concat(outs, 0)) with tf.name_scope('Concat_losses'): losses = [[] for _ in range(len(tower_losses[0]))] for t_losses in tower_losses: for i, loss in enumerate(t_losses): losses[i].append(loss) with tf.name_scope('Average_grads'): var_grads = [[] for _ in range(len(tower_grads[0]))] for t_grads in tower_grads: for i, grad in enumerate(t_grads): var_grads[i].append(grad) avg_grads = [] for v_grads in var_grads: avg_grads.append(ops.average_gradients(v_grads)) if self.grad_summ: # Add histograms for gradients. with tf.name_scope('Grad_summary'): grads_summ = [] for var_grads in avg_grads: for grad, var in var_grads: if grad is not None: grads_summ.append( tf.summary.histogram( self._remove_tower_name_prefix(var) + '/Grads', grad)) summaries.append(tf.summary.merge(grads_summ)) if self.var_summ: # Add histograms for trainable variables. t_vars = tf.trainable_variables() with tf.name_scope('Var_summary'): vars_summ = [] for var in t_vars: vars_summ.append( tf.summary.histogram( self._remove_tower_name_prefix(var), var)) summaries.append(tf.summary.merge(vars_summ)) summaries += self.additional_summaries() self._tower_outputs(self.outputs) self._build_train_ops(losses, avg_grads) self.summary_op = tf.summary.merge(summaries, name='summary_op') self.saver = tf.train.Saver() self.model_built = True utils.count_params()
def model(self, preemptive_reuse=False): """ Initialize the model to train Support multiple GPUs """ with tf.variable_scope(tf.get_variable_scope()): # split data into n equal batches and distribute them onto multiple GPUs im_a_list = tf.split(self.im_a, len(self.use_gpu)) im_b_list = tf.split(self.im_b, len(self.use_gpu)) labels_list = tf.split(self.label, len(self.use_gpu)) # initialize the optimizer self._opt = tf.train.AdamOptimizer( learning_rate=self.learning_rate) # Used to average all_grads = [] all_out = [] all_loss = [] batchnorm_updates = [] for i, gpu_id in enumerate(self.use_gpu): print('Initializing graph on gpu(cpu) %i' % gpu_id) with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('tower_%d' % gpu_id): if preemptive_reuse: tf.get_variable_scope().reuse_variables() im_a, im_b = im_a_list[i], im_b_list[i] labels = labels_list[i] with tf.name_scope('extract_feature_a') as scope: im_a_feat = self.extract_features_resnet50( im_a, scope_name='feature_cnn') self.im_a_feat = im_a_feat # we should retain update ops of batch norm on one branch of the 1st tower # because the var,mean,offset,scale are shared across branches, towers if i == 0: batchnorm_updates = tf.get_collection( tf.GraphKeys.UPDATE_OPS, scope=scope) with tf.name_scope('extract_feature_b'): im_b_feat = self.extract_features_resnet50( im_b, scope_name='feature_cnn', reuse=True) self.im_b_feat = im_b_feat with tf.name_scope('predict_same'): feat_ab = tf.concat( [self.im_a_feat, self.im_b_feat], axis=-1) out_ab = self.predict(feat_ab, name='predict') all_out.append(out_ab) with tf.name_scope('classification_loss'): loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=out_ab) all_loss.append(loss) # once calling this, all variables are reused. A setting reuse=False is no more effective tf.get_variable_scope().reuse_variables() grad = self._opt.compute_gradients( loss, var_list=self.get_variables()) all_grads.append( grad) # List of lists of (gradient, variable) tuples # Average the gradient and apply avg_grads = ops.average_gradients(all_grads) self.all_loss = all_loss self.avg_grads = avg_grads self.loss = tf.reduce_mean(all_loss) # Trains all variables for now apply_grad_op = self._opt.apply_gradients(self.avg_grads, global_step=self.global_iter) if len(batchnorm_updates) != 0: batchnorm_updates_op = tf.group(*batchnorm_updates) self.opt = tf.group(apply_grad_op, batchnorm_updates_op) else: self.opt = apply_grad_op # For logging results self.logits = tf.concat(all_out, axis=0) self.pred = tf.nn.sigmoid(self.logits) self.cls = tf.round(self.pred)
def _prepare_training(self): """Prepare Training Make tensorflow's graph. To support Multi-GPU, divide mini-batch. And this program has resume function. If there is checkpoint file in FLAGS.classifier_dir/log, load checkpoint file and restart training. """ self.gpu_n = len(FLAGS.gpu_ids.split(',')) with tf.device('/cpu:0'): self.imgs = tf.placeholder(tf.float32, (FLAGS.batch_size, FLAGS.img_width, FLAGS.img_height, FLAGS.img_dim), name='imgs') self.labels = tf.placeholder(tf.float32, (FLAGS.batch_size, 26), name='labels') self.is_train = tf.placeholder(tf.bool, name='is_train') c_opt = tf.train.AdamOptimizer(learning_rate=0.001) c_loss = [0] * self.gpu_n c_acc = [0] * self.gpu_n c_grads = [0] * self.gpu_n label_corrects = [0] * self.gpu_n label_ns = [0] * self.gpu_n is_not_first = False for i in range(self.gpu_n): with tf.device('/gpu:{}'.format(i)): classifier = Classifier(img_size=(FLAGS.img_width, FLAGS.img_height), img_dim=FLAGS.img_dim, k_size=3, class_n=26, smallest_unit_n=64) classified = classifier(self.imgs, is_train=self.is_train, is_reuse=is_not_first) c_loss[i] = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.labels, logits=classified)) label_indecies = tf.argmax(self.labels, 1) correct_pred = tf.equal(tf.argmax(classified, 1), label_indecies) c_acc[i] = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) if FLAGS.labelacc: masked_label_indecies = tf.boolean_mask(label_indecies, correct_pred) label_corrects[i] = tf.reduce_sum(tf.one_hot(masked_label_indecies, 26), axis=0) label_ns[i] = tf.reduce_sum(tf.one_hot(label_indecies, 26), axis=0) c_vars = [var for var in tf.trainable_variables() if 'classifier' in var.name] c_grads[i] = c_opt.compute_gradients(c_loss[i], var_list=c_vars) is_not_first = True with tf.device('/cpu:0'): self.c_loss = sum(c_loss) / len(c_loss) self.c_acc = sum(c_acc) / len(c_acc) if FLAGS.labelacc: self.c_acc_by_labels = sum(label_corrects) / sum(label_ns) avg_c_grads = average_gradients(c_grads) self.c_train = c_opt.apply_gradients(avg_c_grads) sess_config = tf.ConfigProto( gpu_options=tf.GPUOptions(visible_device_list=FLAGS.gpu_ids), allow_soft_placement=FLAGS.labelacc, log_device_placement=FLAGS.labelacc ) self.sess = tf.Session(config=sess_config) self.saver = tf.train.Saver() checkpoint = tf.train.get_checkpoint_state(self.dst_log) if checkpoint: saver_resume = tf.train.Saver() saver_resume.restore(self.sess, checkpoint.model_checkpoint_path) self.epoch_start = int(checkpoint.model_checkpoint_path.split('-')[-1]) print('restore ckpt') else: self.sess.run(tf.global_variables_initializer()) self.epoch_start = 0