def build_model(self): with tf.name_scope("batch_size"): # Get batch_size from the first dimension of self.images self.batch_size = tf.shape(self.images)[0] with tf.variable_scope("cnn"): image_emb = slim.fully_connected(self.fc7, self.input_encoding_size, activation_fn=None, scope='encode_image') with tf.variable_scope("rnnlm"): # Replicate self.seq_per_img times for each image embedding image_emb = tf.reshape(tf.tile(tf.expand_dims(image_emb, 1), [1, self.seq_per_img, 1]), [self.batch_size * self.seq_per_img, self.input_encoding_size]) # rnn_inputs is a list of input, each element is the input of rnn at each time step # time step 0 is the image embedding rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1])) rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs] rnn_inputs = [image_emb] + rnn_inputs # The initial sate is zero initial_state = self.cell.zero_state(self.batch_size * self.seq_per_img, tf.float32) outputs, last_state = tf.contrib.legacy_seq2seq.rnn_decoder(rnn_inputs, initial_state, self.cell, loop_function=None) outputs = tf.concat(axis=0, values=outputs[1:]) self.logits = slim.fully_connected(outputs, self.vocab_size + 1, activation_fn = None, scope = 'logit') self.logits = tf.split(axis=0, num_or_size_splits=len(rnn_inputs) - 1, value=self.logits) with tf.variable_scope("loss"): loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(self.logits, [tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:])], # self.labels[:,1:] is the target [tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:])]) self.cost = tf.reduce_mean(loss) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.cnn_lr = tf.Variable(0.0, trainable=False) # Collect the rnn variables, and create the optimizer of rnn tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm') grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip) #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), # self.opt.grad_clip) optimizer = utils.get_optimizer(self.opt, self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # Collect the cnn variables, and create the optimizer of cnn cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn') cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip) #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars), # self.opt.grad_clip) cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) self.cnn_train_op = cnn_optimizer.apply_gradients(zip(cnn_grads, cnn_tvars)) tf.summary.scalar('training loss', self.cost) tf.summary.scalar('learning rate', self.lr) tf.summary.scalar('cnn learning rate', self.cnn_lr) self.summaries = tf.summary.merge_all()
def main(hparams): model_info = hparams.model_info train_info = hparams.trainloader_info val_info = hparams.valloader_info test_info = hparams.testloader_info optimizer_info = hparams.optimizer_info main_info = hparams.main_info # initialize model and dataloader model = MMNIST_ConvLSTM(model_info) model = model.cuda() for name, param in model.named_parameters(): print(name, param.size()) num_epochs = main_info['num_epochs'] # learning rate scheduler halve_every = main_info['halve_every'] train_loader = get_dataloader(train_info) val_loader = get_dataloader(val_info) test_loader = get_dataloader(test_info) optimizer = get_optimizer(optimizer_info, model.parameters()) criterion = cross_entropy for epoch in xrange(num_epochs): if train_loader is not None: adjust_learning_rate(optimizer, epoch, halve_every) traininfo = { 'epoch': epoch, 'num_epochs': num_epochs, 'clip': main_info['clip'] } train(train_loader, model, optimizer, criterion, traininfo) if val_loader is not None: valinfo = {'epoch': epoch, 'num_epochs': num_epochs} loss = val(val_loader, model, valinfo, criterion) res = saver.save(model, optimizer, loss, epoch) if res: logger.info('\033[96m' + '[Best model]:' + '\033[0m: on Validation set!') if test_loader is not None: # TODO pass
def build_model(self): with tf.name_scope("batch_size"): # Get batch_size from the first dimension of self.images self.batch_size = tf.shape(self.images)[0] with tf.variable_scope("rnnlm"): # Flatten the context flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512]) # Initialize the first hidden state with the mean context initial_state = utils.get_initial_state(self.fc7, self.cell.state_size) # Replicate self.seq_per_img times for each state and image embedding self.initial_state = initial_state = utils.expand_feat( initial_state, self.seq_per_img) self.flattened_ctx = flattened_ctx = tf.reshape( tf.tile(tf.expand_dims(flattened_ctx, 1), [1, self.seq_per_img, 1, 1]), [self.batch_size * self.seq_per_img, 196, 512]) #projected context # This is used in attention module; do this outside the loop to reduce redundant computations # with tf.variable_scope("attention"): if self.att_hid_size == 0: pctx = slim.fully_connected( self.flattened_ctx, 1, activation_fn=None, scope='ctx_att') # (batch * seq_per_img) * 196 * 1 else: pctx = slim.fully_connected( self.flattened_ctx, self.att_hid_size, activation_fn=None, scope='ctx_att' ) # (batch * seq_per_img) * 196 * att_hid_size rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup( self.Wemb, self.labels[:, :self.seq_length + 1])) rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs] prev_h = utils.last_hidden_vec(initial_state) self.alphas = [] self.logits = [] outputs = [] state = initial_state for ind in range(self.seq_length + 1): if ind > 0: # Reuse the variables after the first timestep. tf.get_variable_scope().reuse_variables() with tf.variable_scope("attention"): alpha = self.get_alpha(prev_h, pctx) self.alphas.append(alpha) weighted_context = tf.reduce_sum( flattened_ctx * tf.expand_dims(alpha, 2), 1) output, state = self.cell( tf.concat(axis=1, values=[weighted_context, rnn_inputs[ind]]), state) # Save the current output for next time step attention prev_h = output # Get the score of each word in vocabulary, 0 is end token. self.logits.append( slim.fully_connected(output, self.vocab_size + 1, activation_fn=None, scope='logit')) with tf.variable_scope("loss"): loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( self.logits, [ tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:]) ], # self.labels[:,1:] is the target; ignore the first start token [ tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:]) ]) self.cost = tf.reduce_mean(loss) self.final_state = state self.lr = tf.Variable(0.0, trainable=False) self.cnn_lr = tf.Variable(0.0, trainable=False) # Collect the rnn variables, and create the optimizer of rnn tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm') grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip) optimizer = utils.get_optimizer(self.opt, self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # Collect the cnn variables, and create the optimizer of cnn cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn') cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip) cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) self.cnn_train_op = cnn_optimizer.apply_gradients( zip(cnn_grads, cnn_tvars)) tf.summary.scalar('training loss', self.cost) tf.summary.scalar('learning rate', self.lr) tf.summary.scalar('cnn learning rate', self.cnn_lr) self.summaries = tf.summary.merge_all()
def build_model(self): with tf.name_scope("batch_size"): # Get batch_size from the first dimension of self.images self.batch_size = tf.shape(self.images)[0] with tf.variable_scope("rnnlm"): flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512]) ctx_mean = tf.reduce_mean(flattened_ctx, 1) # Initialize the first hidden state with the mean context initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size) # Replicate self.seq_per_img times for each state and image embedding self.initial_state = initial_state = utils.expand_feat( initial_state, self.seq_per_img) self.flattened_ctx = flattened_ctx = tf.reshape( tf.tile(tf.expand_dims(flattened_ctx, 1), [1, self.seq_per_img, 1, 1]), [self.batch_size * self.seq_per_img, 196, 512]) rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup( self.Wemb, self.labels[:, :self.seq_length + 1])) rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs] outputs, last_state = tf.contrib.legacy_seq2seq.attention_decoder( rnn_inputs, initial_state, flattened_ctx, self.cell, loop_function=None) outputs = tf.concat(axis=0, values=outputs) self.logits = slim.fully_connected(outputs, self.vocab_size + 1, activation_fn=None, scope='logit') self.logits = tf.split(axis=0, num_or_size_splits=len(rnn_inputs), value=self.logits) with tf.variable_scope("loss"): loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( self.logits, [ tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:]) ], # self.labels[:,1:] is the target [ tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:]) ]) self.cost = tf.reduce_mean(loss) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) self.cnn_lr = tf.Variable(0.0, trainable=False) # Collect the rnn variables, and create the optimizer of rnn tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm') grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip) #grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), # self.opt.grad_clip) optimizer = utils.get_optimizer(self.opt, self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # Collect the cnn variables, and create the optimizer of cnn cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn') cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip) #cnn_grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, cnn_tvars), # self.opt.grad_clip) cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) self.cnn_train_op = cnn_optimizer.apply_gradients( zip(cnn_grads, cnn_tvars)) tf.summary.scalar('training loss', self.cost) tf.summary.scalar('learning rate', self.lr) tf.summary.scalar('cnn learning rate', self.cnn_lr) self.summaries = tf.summary.merge_all()
def build_model(self): with tf.name_scope("batch_size"): # Get batch_size from the first dimension of self.images self.batch_size = tf.shape(self.images)[0] with tf.variable_scope("rnnlm"): # Flatten the context flattened_ctx = tf.reshape(self.context, [self.batch_size, 196, 512]) ctx_mean = tf.reduce_mean(flattened_ctx, 1) # Initialize the first hidden state with the mean context initial_state = utils.get_initial_state(ctx_mean, self.cell.state_size) # Replicate self.seq_per_img times for each state and image embedding self.initial_state = initial_state = utils.expand_feat(initial_state, self.seq_per_img) self.flattened_ctx = flattened_ctx = tf.reshape(tf.tile(tf.expand_dims(flattened_ctx, 1), [1, self.seq_per_img, 1, 1]), [self.batch_size * self.seq_per_img, 196, 512]) #projected context # This is used in attention module; do this outside the loop to reduce redundant computations # with tf.variable_scope("attention"): if self.att_hid_size == 0: pctx = slim.fully_connected(self.flattened_ctx, 1, activation_fn = None, scope = 'ctx_att') # (batch * seq_per_img) * 196 * 1 else: pctx = slim.fully_connected(self.flattened_ctx, self.att_hid_size, activation_fn = None, scope = 'ctx_att') # (batch * seq_per_img) * 196 * att_hid_size rnn_inputs = tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=tf.nn.embedding_lookup(self.Wemb, self.labels[:,:self.seq_length + 1])) rnn_inputs = [tf.squeeze(input_, [1]) for input_ in rnn_inputs] prev_h = utils.last_hidden_vec(initial_state) self.alphas = [] self.logits = [] outputs = [] state = initial_state for ind in range(self.seq_length + 1): if ind > 0: # Reuse the variables after the first timestep. tf.get_variable_scope().reuse_variables() with tf.variable_scope("attention"): alpha = self.get_alpha(prev_h, pctx) self.alphas.append(alpha) weighted_context = tf.reduce_sum(flattened_ctx * tf.expand_dims(alpha, 2), 1) output, state = self.cell(tf.concat(axis=1, values=[weighted_context, rnn_inputs[ind]]), state) # Save the current output for next time step attention prev_h = output # Get the score of each word in vocabulary, 0 is end token. self.logits.append(slim.fully_connected(output, self.vocab_size + 1, activation_fn = None, scope = 'logit')) with tf.variable_scope("loss"): loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( self.logits, [tf.squeeze(label, [1]) for label in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.labels[:, 1:])], # self.labels[:,1:] is the target; ignore the first start token [tf.squeeze(mask, [1]) for mask in tf.split(axis=1, num_or_size_splits=self.seq_length + 1, value=self.masks[:, 1:])]) self.cost = tf.reduce_mean(loss) self.final_state = state self.lr = tf.Variable(0.0, trainable=False) self.cnn_lr = tf.Variable(0.0, trainable=False) # Collect the rnn variables, and create the optimizer of rnn tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='rnnlm') grads = utils.clip_by_value(tf.gradients(self.cost, tvars), -self.opt.grad_clip, self.opt.grad_clip) optimizer = utils.get_optimizer(self.opt, self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars)) # Collect the cnn variables, and create the optimizer of cnn cnn_tvars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='cnn') cnn_grads = utils.clip_by_value(tf.gradients(self.cost, cnn_tvars), -self.opt.grad_clip, self.opt.grad_clip) cnn_optimizer = utils.get_cnn_optimizer(self.opt, self.cnn_lr) self.cnn_train_op = cnn_optimizer.apply_gradients(zip(cnn_grads, cnn_tvars)) tf.summary.scalar('training loss', self.cost) tf.summary.scalar('learning rate', self.lr) tf.summary.scalar('cnn learning rate', self.cnn_lr) self.summaries = tf.summary.merge_all()
def train(classifier, generator, critic, src_data_loader, tgt_data_loader): """Train generator, classifier and critic jointly.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers classifier.train() generator.train() critic.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_c = get_optimizer(classifier, "Adam") optimizer_g = get_optimizer(generator, "Adam") optimizer_d = get_optimizer(critic, "Adam") # zip source and target data pair data_iter_src = get_inf_iterator(src_data_loader) data_iter_tgt = get_inf_iterator(tgt_data_loader) # counter g_step = 0 # positive and negative labels pos_labels = make_variable(torch.FloatTensor([1])) neg_labels = make_variable(torch.FloatTensor([-1])) #################### # 2. train network # #################### for epoch in range(params.num_epochs): ########################### # 2.1 train discriminator # ########################### # requires to compute gradients for D for p in critic.parameters(): p.requires_grad = True # set steps for discriminator if g_step < 25 or g_step % 500 == 0: # this helps to start with the critic at optimum # even in the first iterations. critic_iters = 100 else: critic_iters = params.d_steps # loop for optimizing discriminator for d_step in range(critic_iters): # convert images into torch.Variable images_src, labels_src = next(data_iter_src) images_tgt, _ = next(data_iter_tgt) images_src = make_variable(images_src) labels_src = make_variable(labels_src.squeeze_()) images_tgt = make_variable(images_tgt) if images_src.size(0) != params.batch_size or \ images_tgt.size(0) != params.batch_size: continue # zero gradients for optimizer optimizer_d.zero_grad() # compute source data loss for discriminator feat_src = generator(images_src) d_loss_src = critic(feat_src.detach()) d_loss_src = d_loss_src.mean() d_loss_src.backward(neg_labels) # compute target data loss for discriminator feat_tgt = generator(images_tgt) d_loss_tgt = critic(feat_tgt.detach()) d_loss_tgt = d_loss_tgt.mean() d_loss_tgt.backward(pos_labels) # compute gradient penalty gradient_penalty = calc_gradient_penalty(critic, feat_src.data, feat_tgt.data) gradient_penalty.backward() # optimize weights of discriminator d_loss = -d_loss_src + d_loss_tgt + gradient_penalty optimizer_d.step() ######################## # 2.2 train classifier # ######################## # zero gradients for optimizer optimizer_c.zero_grad() # compute loss for critic preds_c = classifier(generator(images_src).detach()) c_loss = criterion(preds_c, labels_src) # optimize source classifier c_loss.backward() optimizer_c.step() ####################### # 2.3 train generator # ####################### # avoid to compute gradients for D for p in critic.parameters(): p.requires_grad = False # zero grad for optimizer of generator optimizer_g.zero_grad() # compute source data classification loss for generator feat_src = generator(images_src) preds_c = classifier(feat_src) g_loss_cls = criterion(preds_c, labels_src) g_loss_cls.backward() # compute source data discriminattion loss for generator feat_src = generator(images_src) g_loss_src = critic(feat_src).mean() g_loss_src.backward(pos_labels) # compute target data discriminattion loss for generator feat_tgt = generator(images_tgt) g_loss_tgt = critic(feat_tgt).mean() g_loss_tgt.backward(neg_labels) # compute loss for generator g_loss = g_loss_src - g_loss_tgt + g_loss_cls # optimize weights of generator optimizer_g.step() g_step += 1 ################## # 2.4 print info # ################## if ((epoch + 1) % params.log_step == 0): print("Epoch [{}/{}]:" "d_loss={:.5f} c_loss={:.5f} g_loss={:.5f} " "D(x)={:.5f} D(G(z))={:.5f} GP={:.5f}".format( epoch + 1, params.num_epochs, d_loss.data[0], c_loss.data[0], g_loss.data[0], d_loss_src.data[0], d_loss_tgt.data[0], gradient_penalty.data[0])) ############################# # 2.5 save model parameters # ############################# if ((epoch + 1) % params.save_step == 0): save_model(critic, "WGAN-GP_critic-{}.pt".format(epoch + 1)) save_model(classifier, "WGAN-GP_classifier-{}.pt".format(epoch + 1)) save_model(generator, "WGAN-GP_generator-{}.pt".format(epoch + 1)) return classifier, generator
def domain_adapt(F, F_1, F_2, F_t, source_dataset, target_dataset, excerpt, pseudo_labels, plot): """Perform Doamin Adaptation between source and target domains.""" # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() if 0: optimType = "Adam" cfg.learning_rate = 1.0E-4 else: optimType = "sgd" cfg.learning_rate = 1.0E-4 optimizer_F = get_optimizer(F, optimType) optimizer_F_1 = get_optimizer(F_1, optimType) optimizer_F_2 = get_optimizer(F_2, optimType) optimizer_F_t = get_optimizer(F_t, optimType) # get labelled target dataset print('pseudo_labels = %s' % str(pseudo_labels)) target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # merge soruce data and target data merged_dataset = ConcatDataset([source_dataset, target_dataset_labelled]) print('target_dataset_labelled = %d' % len(target_dataset_labelled)) # start training plt.figure() for k in range(cfg.num_epochs_k): # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() losses = [] merged_dataloader = make_data_loader(merged_dataset) target_dataloader_labelled = make_data_loader(target_dataset_labelled) target_dataloader_labelled_iter = get_inf_iterator( target_dataloader_labelled) if 0: plt.figure() atr.showDataSet(target_dataloader_labelled) plt.waitforbuttonpress() if 0: # There's a bug here, the labels are not the same data type. print them out!! source_dataloader_iter = get_inf_iterator( make_data_loader(source_dataset)) a, b = next(source_dataloader_iter) c, d = next(target_dataloader_labelled_iter) print('source labels = {}'.format(b)) print('target labels = {}'.format(d)) sys.exit(0) for epoch in range(cfg.num_epochs_adapt): if optimType == 'sgd': adjustLearningRate(optimizer_F, cfg.learning_rate, epoch, cfg.num_epochs_adapt) adjustLearningRate(optimizer_F_1, cfg.learning_rate, epoch, cfg.num_epochs_adapt) adjustLearningRate(optimizer_F_2, cfg.learning_rate, epoch, cfg.num_epochs_adapt) adjustLearningRate(optimizer_F_t, cfg.learning_rate, epoch, cfg.num_epochs_adapt) for step, rez in enumerate(merged_dataloader): #!!print('rez = %s' % rez) images, labels = rez if images.shape[0] < cfg.batch_size: print('WARNING: batch of size %d smaller than desired %d: skipping' % \ (images.shape[0], cfg.batch_size)) continue # sample from T_l images_tgt, labels_tgt = next(target_dataloader_labelled_iter) while images_tgt.shape[0] < cfg.batch_size: print('WARNING: target batch of size %d smaller than desired %d' % \ (images_tgt.shape[0], cfg.batch_size)) images_tgt, labels_tgt = next( target_dataloader_labelled_iter) # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) images_tgt = make_variable(images_tgt) labels_tgt = make_variable(labels_tgt) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks #print('images shape = {}'.format(images.shape))#!! out_F = F(images) #print('out_F = {}'.format(out_F.shape))#!! out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(F(images_tgt)) # compute labelling loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_labelling = loss_F_1 + loss_F_2 + 0.03 * loss_similiar loss_labelling.backward() # compute target specific loss loss_F_t = criterion(out_F_t, labels_tgt) loss_F_t.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() losses.append(loss_F_t.item()) # print step info if ((step + 1) % cfg.log_step == 0): print("K[{}/{}] Epoch [{}/{}] Step[{}/{}] Loss(" "labelling={:.5f} target={:.5f})".format( k + 1, cfg.num_epochs_k, epoch + 1, cfg.num_epochs_adapt, step + 1, len(merged_dataloader), loss_labelling.item(), #.data[0], loss_F_t.item(), #.data[0], )) #!!print('end of loop') if plot: plt.clf() plt.plot(losses) plt.grid(1) plt.title( 'Loss for domain adaptation, k = {}/{}, epoch = {}/{}' .format(k, cfg.num_epochs_k, epoch, cfg.num_epochs_adapt)) plt.waitforbuttonpress(0.0001) # re-compute the number of selected taget data num_target = (k + 2) * len(source_dataset) // 20 num_target = min(num_target, cfg.num_target_max) print(">>> Set num of sampled target data: {}".format(num_target)) # re-generate pseudo labels excerpt, pseudo_labels = generate_labels(F, F_1, F_2, target_dataset, num_target, useWeightedSampling=True) print(">>> Genrate pseudo labels [{}] numtarget = {}".format( len(target_dataset_labelled), num_target)) print('sizes = {}, {}, excerpt = {}, \npseudo_labels = {}'.format( len(excerpt), len(pseudo_labels), excerpt, pseudo_labels)) # get labelled target dataset target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # re-merge soruce data and target data merged_dataset = ConcatDataset( [source_dataset, target_dataset_labelled]) # save model if ((k + 1) % cfg.save_step == 0): save_model(F, "adapt-F-{}.pt".format(k + 1)) save_model(F_1, "adapt-F_1-{}.pt".format(k + 1)) save_model(F_2, "adapt-F_2-{}.pt".format(k + 1)) save_model(F_t, "adapt-F_t-{}.pt".format(k + 1)) # save final model save_model(F, "adapt-F-final.pt") save_model(F_1, "adapt-F_1-final.pt") save_model(F_2, "adapt-F_2-final.pt") save_model(F_t, "adapt-F_t-final.pt")
def pre_train(F, F_1, F_2, F_t, source_data, plot): """Pre-train models on source domain dataset.""" # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() if 0: optimType = "Adam" cfg.learning_rate = 1.0E-4 else: optimType = "sgd" cfg.learning_rate = 1.0E-3 optimizer_F = get_optimizer(F, optimType) optimizer_F_1 = get_optimizer(F_1, optimType) optimizer_F_2 = get_optimizer(F_2, optimType) optimizer_F_t = get_optimizer(F_t, optimType) losses = [] if plot: plt.figure() # start training for epoch in range(cfg.num_epochs_pre): if optimType == 'sgd': adjustLearningRate(optimizer_F, cfg.learning_rate, epoch, cfg.num_epochs_pre) adjustLearningRate(optimizer_F_1, cfg.learning_rate, epoch, cfg.num_epochs_pre) adjustLearningRate(optimizer_F_2, cfg.learning_rate, epoch, cfg.num_epochs_pre) adjustLearningRate(optimizer_F_t, cfg.learning_rate, epoch, cfg.num_epochs_pre) for step, (images, labels) in enumerate(source_data): # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks out_F = F(images) #!! #out_F = torch.flatten(out_F,1) out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(out_F) # compute loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_F_t = criterion(out_F_t, labels) loss_F = loss_F_1 + loss_F_2 + loss_F_t + 0.03 * loss_similiar loss_F.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() losses.append(loss_F.item()) # print step info if ((step + 1) % cfg.log_step == 0): print("Epoch [{}/{}] Step[{}/{}] Loss(" "Total={:.5f} F_1={:.5f} F_2={:.5f} " "F_t={:.5f} sim={:.5f})" #!! "F_t={:.5f})" .format(epoch + 1, cfg.num_epochs_pre, step + 1, len(source_data), loss_F.item(), #.data[0], loss_F_1.item(), #.data[0], loss_F_2.item(), #.data[0], loss_F_t.item(), #.data[0], loss_similiar.item(), #.data[0], )) if plot: plt.clf() plt.plot(losses) plt.grid(1) plt.title('Loss for pre-training') plt.waitforbuttonpress(0.0001) # save model if ((epoch + 1) % cfg.save_step == 0): save_model(F, "pretrain-F-{}.pt".format(epoch + 1)) save_model(F_1, "pretrain-F_1-{}.pt".format(epoch + 1)) save_model(F_2, "pretrain-F_2-{}.pt".format(epoch + 1)) save_model(F_t, "pretrain-F_t-{}.pt".format(epoch + 1)) # save final model save_model(F, "pretrain-F-final.pt") save_model(F_1, "pretrain-F_1-final.pt") save_model(F_2, "pretrain-F_2-final.pt") save_model(F_t, "pretrain-F_t-final.pt")
def domain_adapt(F, F_1, F_2, F_t, source_dataset, target_dataset, excerpt, pseudo_labels): """Perform Doamin Adaptation between source and target domains.""" # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_F = get_optimizer(F, "Adam") optimizer_F_1 = get_optimizer(F_1, "Adam") optimizer_F_2 = get_optimizer(F_2, "Adam") optimizer_F_t = get_optimizer(F_t, "Adam") # get labelled target dataset target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # merge soruce data and target data merged_dataset = ConcatDataset([source_dataset, target_dataset_labelled]) # start training for k in range(cfg.num_epochs_k): # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() merged_dataloader = make_data_loader(merged_dataset) target_dataloader_labelled = get_inf_iterator( make_data_loader(target_dataset_labelled)) for epoch in range(cfg.num_epochs_adapt): for step, (images, labels) in enumerate(merged_dataloader): # sample from T_l images_tgt, labels_tgt = next(target_dataloader_labelled) # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) images_tgt = make_variable(images_tgt) labels_tgt = make_variable(labels_tgt) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks out_F = F(images) out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(F(images_tgt)) # compute labelling loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_labelling = loss_F_1 + loss_F_2 + loss_similiar loss_labelling.backward() # compute target specific loss loss_F_t = criterion(out_F_t, labels_tgt) loss_F_t.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() # print step info if ((step + 1) % cfg.log_step == 0): print("K[{}/{}] Epoch [{}/{}] Step[{}/{}] Loss(" "labelling={:.5f} target={:.5f})".format( k + 1, cfg.num_epochs_k, epoch + 1, cfg.num_epochs_adapt, step + 1, len(merged_dataloader), loss_labelling.data[0], loss_F_t.data[0], )) # re-compute the number of selected taget data num_target = (k + 2) * len(source_dataset) // 20 num_target = min(num_target, cfg.num_target_max) print(">>> Set num of sampled target data: {}".format(num_target)) # re-generate pseudo labels excerpt, pseudo_labels = genarate_labels(F, F_1, F_2, target_dataset, num_target) print(">>> Genrate pseudo labels [{}]".format( len(target_dataset_labelled))) # get labelled target dataset target_dataset_labelled = get_dummy(target_dataset, excerpt, pseudo_labels, get_dataset=True) # re-merge soruce data and target data merged_dataset = ConcatDataset( [source_dataset, target_dataset_labelled]) # save model if ((k + 1) % cfg.save_step == 0): save_model(F, "adapt-F-{}.pt".format(k + 1)) save_model(F_1, "adapt-F_1-{}.pt".format(k + 1)) save_model(F_2, "adapt-F_2-{}.pt".format(k + 1)) save_model(F_t, "adapt-F_t-{}.pt".format(k + 1)) # save final model save_model(F, "adapt-F-final.pt") save_model(F_1, "adapt-F_1-final.pt") save_model(F_2, "adapt-F_2-final.pt") save_model(F_t, "adapt-F_t-final.pt")
def pre_train(F, F_1, F_2, F_t, source_data): """Pre-train models on source domain dataset.""" # set train state for Dropout and BN layers F.train() F_1.train() F_2.train() F_t.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_F = get_optimizer(F, "Adam") optimizer_F_1 = get_optimizer(F_1, "Adam") optimizer_F_2 = get_optimizer(F_2, "Adam") optimizer_F_t = get_optimizer(F_t, "Adam") # start training for epoch in range(cfg.num_epochs_pre): for step, (images, labels) in enumerate(source_data): # convert into torch.autograd.Variable images = make_variable(images) labels = make_variable(labels) # zero-grad optimizer optimizer_F.zero_grad() optimizer_F_1.zero_grad() optimizer_F_2.zero_grad() optimizer_F_t.zero_grad() # forward networks out_F = F(images) out_F_1 = F_1(out_F) out_F_2 = F_2(out_F) out_F_t = F_t(out_F) # compute loss loss_similiar = calc_similiar_penalty(F_1, F_2) loss_F_1 = criterion(out_F_1, labels) loss_F_2 = criterion(out_F_2, labels) loss_F_t = criterion(out_F_t, labels) loss_F = loss_F_1 + loss_F_2 + loss_F_t + loss_similiar loss_F.backward() # optimize optimizer_F.step() optimizer_F_1.step() optimizer_F_2.step() optimizer_F_t.step() # print step info if ((step + 1) % cfg.log_step == 0): print("Epoch [{}/{}] Step[{}/{}] Loss(" "Total={:.5f} F_1={:.5f} F_2={:.5f} " "F_t={:.5f} sim={:.5f})".format( epoch + 1, cfg.num_epochs_pre, step + 1, len(source_data), loss_F.data[0], loss_F_1.data[0], loss_F_2.data[0], loss_F_t.data[0], loss_similiar.data[0], )) # save model if ((epoch + 1) % cfg.save_step == 0): save_model(F, "pretrain-F-{}.pt".format(epoch + 1)) save_model(F_1, "pretrain-F_1-{}.pt".format(epoch + 1)) save_model(F_2, "pretrain-F_2-{}.pt".format(epoch + 1)) save_model(F_t, "pretrain-F_t-{}.pt".format(epoch + 1)) # save final model save_model(F, "pretrain-F-final.pt") save_model(F_1, "pretrain-F_1-final.pt") save_model(F_2, "pretrain-F_2-final.pt") save_model(F_t, "pretrain-F_t-final.pt")
def train(classifier, generator, critic, src_data_loader, tgt_data_loader): """Train generator, classifier and critic jointly.""" #################### # 1. setup network # #################### # set train state for Dropout and BN layers classifier.train() generator.train() # set criterion for classifier and optimizers criterion = nn.CrossEntropyLoss() optimizer_c = get_optimizer(classifier, "Adam") # zip source and target data pair data_iter_src = get_inf_iterator(src_data_loader) # counter g_step = 0 #################### # 2. train network # #################### for epoch in range(params.num_epochs): ########################### # 2.1 train discriminator # ########################### # requires to compute gradients for D for p in critic.parameters(): p.requires_grad = True # set steps for discriminator if g_step < 25 or g_step % 500 == 0: # this helps to start with the critic at optimum # even in the first iterations. critic_iters = 100 else: critic_iters = params.d_steps critic_iters = 0 # loop for optimizing discriminator #for d_step in range(critic_iters): # convert images into torch.Variable images_src, labels_src = next(data_iter_src) images_src = make_variable(images_src).cuda() labels_src = make_variable(labels_src.squeeze_()).cuda() # print(type(images_src)) ######################## # 2.2 train classifier # ######################## # zero gradients for optimizer optimizer_c.zero_grad() # compute loss for critic preds_c = classifier(generator(images_src)) c_loss = criterion(preds_c, labels_src) # optimize source classifier c_loss.backward() optimizer_c.step() g_step += 1 ################## # 2.4 print info # ################## if ((epoch + 1) % 500 == 0): # print("Epoch [{}/{}]:" # "c_loss={:.5f}" # "D(x)={:.5f}" # .format(epoch + 1, # params.num_epochs, # c_loss.item(), # )) test(classifier, generator, src_data_loader, params.src_dataset) if ((epoch + 1) % 500 == 0): save_model(generator, "Mnist-generator-{}.pt".format(epoch + 1)) save_model(classifier, "Mnist-classifer{}.pt".format(epoch + 1))