def __init__(self, vocab_size, batch_size, pre_gen_epochs, pre_dis_epochs, gan_epochs, generate_sum, sequence_len, lr, real_file, fake_file, eval_file, update_rate): super(Solver, self).__init__() self.vocal_size = vocab_size self.batch_size = batch_size self.pre_gen_epochs = pre_gen_epochs self.pre_dis_epochs = pre_dis_epochs self.gan_epochs = gan_epochs self.generate_sum = generate_sum self.sequence_len = sequence_len self.lr = lr self.real_file = real_file self.fake_file = fake_file self.eval_file = eval_file self.update_rate = update_rate self.discriminator = discriminator.Discriminator( sequence_len, vocab_size, DisParams.emb_dim, DisParams.filter_sizes, DisParams.num_filters, DisParams.dropout) self.generator = generator.Generator(vocab_size, GenParams.emb_dim, GenParams.hidden_dim, GenParams.num_layers) self.target_lstm = target_lstm.TargetLSTM(vocab_size, GenParams.emb_dim, GenParams.hidden_dim, GenParams.num_layers) self.discriminator = util.to_cuda(self.discriminator) self.generator = util.to_cuda(self.generator) self.target_lstm = util.to_cuda(self.target_lstm)
def train_gan(self, backend): rollout = Rollout(self.generator, self.discriminator, self.update_rate) print('\nStart Adeversatial Training......') gen_optim, dis_optim = torch.optim.Adam(self.generator.parameters(), self.lr), torch.optim.Adam(self.discriminator.parameters(), self.lr) dis_criterion = util.to_cuda(nn.BCEWithLogitsLoss(size_average=False)) gen_criterion = util.to_cuda(nn.CrossEntropyLoss(size_average=False, reduce=True)) for epoch in range(self.gan_epochs): start = time.time() for _ in range(1): samples = self.generator.sample(self.batch_size, self.sequence_len) # (batch_size, sequence_len) zeros = util.to_var(torch.zeros(self.batch_size, 1).long()) # (batch_size, 1) inputs = torch.cat([samples, zeros], dim=1)[:, :-1] # (batch_size, sequence_len) rewards = rollout.reward(samples, 16) # (batch_size, sequence_len) rewards = util.to_var(torch.from_numpy(rewards)) logits = self.generator(inputs) # (None, vocab_size, sequence_len) pg_loss = self.pg_loss(logits, samples, rewards) gen_optim.zero_grad() pg_loss.backward() gen_optim.step() print 'generator updated via policy gradient......' if epoch % 10 == 0: util.generate_samples(self.generator, self.batch_size, self.sequence_len, self.generate_sum, self.eval_file) eval_data = GenData(self.eval_file) eval_data_loader = DataLoader(eval_data, batch_size=self.batch_size, shuffle=True, num_workers=8) loss = self.eval_epoch(self.target_lstm, eval_data_loader, gen_criterion) print 'epoch: [{0:d}], true loss: [{1:.4f}]'.format(epoch, loss) for _ in range(1): util.generate_samples(self.generator, self.batch_size, self.sequence_len, self.generate_sum, self.fake_file) dis_data = DisData(self.real_file, self.fake_file) dis_data_loader = DataLoader(dis_data, batch_size=self.batch_size, shuffle=True, num_workers=8) for _ in range(1): loss = self.train_epoch(self.discriminator, dis_data_loader, dis_criterion, dis_optim) print 'discriminator updated via gan loss......' rollout.update_params() end = time.time() print 'time: [{:.3f}s/epoch] in {}'.format(end-start, backend)
def __init__(self, window_size, num_cnn_layers, cnn_hidden_dim, num_mlp_layers, mlp_hidden_dim, num_classes, embeddings, pooling=max_pool_seq, gpu=False): super(PooledCnnClassifier, self).__init__() self.window_size = window_size self.hidden_dim = cnn_hidden_dim self.num_cnn_layers = num_cnn_layers self.num_mlp_layers = num_mlp_layers self.num_classes = num_classes self.embeddings = embeddings self.pooling = pooling self.cnn = \ Cnn(len(embeddings[0]), cnn_hidden_dim, num_cnn_layers, cnn_hidden_dim, window_size, gpu=gpu) self.mlp = \ MLP(cnn_hidden_dim, mlp_hidden_dim, num_mlp_layers, num_classes) self.to_cuda = to_cuda(gpu) print("# params:", sum(p.nelement() for p in self.parameters()))
def pretrain_gen(self): """ pretrain the generator """ util.generate_samples(self.target_lstm, self.batch_size, self.sequence_len, self.generate_sum, self.real_file) gen_data = GenData(self.real_file) gen_data_loader = DataLoader(gen_data, batch_size=self.batch_size, shuffle=True, num_workers=8) gen_criterion = util.to_cuda( nn.CrossEntropyLoss(size_average=False, reduce=True)) gen_optim = torch.optim.Adam(self.generator.parameters(), self.lr) print('\nPretrain generator......') for epoch in range(self.pre_gen_epochs): train_loss = self.train_epoch(self.generator, gen_data_loader, gen_criterion, gen_optim) util.generate_samples(self.generator, self.batch_size, self.sequence_len, self.generate_sum, self.eval_file) eval_data = GenData(self.eval_file) eval_data_loader = DataLoader(eval_data, batch_size=self.batch_size, shuffle=True, num_workers=8) eval_loss = self.eval_epoch(self.target_lstm, eval_data_loader, gen_criterion) print( 'epoch: {:4d}, train_loss: {:6.4f}, eval_loss: {:6.4f}'.format( epoch, train_loss, eval_loss))
def evaluate_accuracy(model, data, batch_size, gpu, debug=0): n = float(len(data)) correct = 0 num_1s = 0 correct_1 = 0 false_negative = 0 for batch in chunked_sorted(data, batch_size): batch_obj = Batch([x for x, y in batch], model.embeddings, to_cuda(gpu)) gold = [y for x, y in batch] predicted = model.predict(batch_obj, debug) num_1s += predicted.count(1) correct += sum(1 for pred, gold in zip(predicted, gold) if pred == gold) correct_1 += sum(1 for pred, gold in zip(predicted, gold) if pred == gold and gold == 1) false_negative += sum(1 for pred, gold in zip(predicted, gold) if pred != gold and gold == 1) precision = correct_1 / max(num_1s, 1) recall = correct_1 / (false_negative + correct_1) print("num predicted 1s:", num_1s) print("num gold 1s: ", sum(gold == 1 for _, gold in data)) print("precision", precision) print("recall", recall) print("f1 score", 2 * precision * recall / max(precision + recall, 1)) return correct / n
def pretrain_dis(self): dis_criterion = util.to_cuda(nn.BCEWithLogitsLoss(size_average=False)) dis_optim = torch.optim.Adam(self.discriminator.parameters(), self.lr) print '\nPretrain discriminator......' for epoch in range(self.pre_dis_epochs): util.generate_samples(self.generator, self.batch_size, self.sequence_len, self.generate_sum, self.fake_file) dis_data = DisData(self.real_file, self.fake_file) dis_data_loader = DataLoader(dis_data, batch_size=self.batch_size, shuffle=True, num_workers=8) loss = self.train_epoch(self.discriminator, dis_data_loader, dis_criterion, dis_optim) print 'epoch: [{0:d}], loss: [{1:.4f}]'.format(epoch, loss)
def compute_loss(model, batch, num_classes, gold_output, loss_function, gpu, debug=0, dropout=None): time1 = monotonic() output = model.forward(batch, debug, dropout) if debug: time2 = monotonic() print("Forward total in loss: {}".format(round(time2 - time1, 3))) return loss_function( log_softmax(output).view(batch.size(), num_classes), to_cuda(gpu)(fixed_var(LongTensor(gold_output))) )
def __init__(self, mlp_hidden_dim, num_mlp_layers, num_classes, embeddings, gpu=False): super(DanClassifier, self).__init__() self.to_cuda = to_cuda(gpu) self.embeddings = embeddings self.word_dim = len(embeddings[0]) self.mlp = MLP(self.word_dim, mlp_hidden_dim, num_mlp_layers, num_classes) print("# params:", sum(p.nelement() for p in self.parameters()))
def evaluate_accuracy(model, data, batch_size, gpu, debug=0): n = float(len(data)) correct = 0 num_1s = 0 for batch in chunked_sorted(data, batch_size): batch_obj = Batch([x for x, y in batch], model.embeddings, to_cuda(gpu)) gold = [y for x, y in batch] predicted = model.predict(batch_obj, debug) num_1s += predicted.count(1) correct += sum(1 for pred, gold in zip(predicted, gold) if pred == gold) print("num predicted 1s:", num_1s) print("num gold 1s: ", sum(gold == 1 for _, gold in data)) return correct / n
def pretrain_gen(self): util.generate_samples(self.target_lstm, self.batch_size, self.sequence_len, self.generate_sum, self.real_file) gen_data = GenData(self.real_file) gen_data_loader = DataLoader(gen_data, batch_size=self.batch_size, shuffle=True, num_workers=8) gen_criterion = util.to_cuda(nn.CrossEntropyLoss(size_average=False, reduce=True)) gen_optim = torch.optim.Adam(self.generator.parameters(), self.lr) print '\nPretrain generator......' for epoch in range(self.pre_gen_epochs): loss = self.train_epoch(self.generator, gen_data_loader, gen_criterion, gen_optim) print 'epoch: [{0:d}], model loss: [{1:.4f}]'.format(epoch, loss) util.generate_samples(self.generator, self.batch_size, self.sequence_len, self.generate_sum, self.eval_file) eval_data = GenData(self.eval_file) eval_data_loader = DataLoader(eval_data, batch_size=self.batch_size, shuffle=True, num_workers=8) loss = self.eval_epoch(self.target_lstm, eval_data_loader, gen_criterion) print 'epoch: [{0:d}], true loss: [{1:.4f}]'.format(epoch, loss)
def __init__(self, input_dim, hidden_dim, cell_type=LSTM, gpu=False): super(Rnn, self).__init__() self.hidden_dim = hidden_dim self.to_cuda = to_cuda(gpu) self.input_dim = input_dim self.cell_type = cell_type self.rnn = self.cell_type(input_size=self.input_dim, hidden_size=hidden_dim, num_layers=1, bidirectional=True) self.num_directions = 2 # We're a *bi*LSTM self.start_hidden_state = \ Parameter(self.to_cuda( torch.randn(self.num_directions, 1, self.hidden_dim) )) self.start_cell_state = \ Parameter(self.to_cuda( torch.randn(self.num_directions, 1, self.hidden_dim) ))
def test_same_forward_for_diff_batches(self): """ Test that different batch sizes yield same `forward` results """ # for each batch size, chunk data into batches, run model.forward, # then flatten results into a list (one NUM_CLASSESx1 vec per doc). for model in self.models: forward_results = [ [ fwd for chunk in chunked_sorted(self.data, batch_size) for fwd in model.forward(Batch([x for x, y in chunk], self.embeddings, to_cuda(GPU))).data ] for batch_size in self.batch_sizes ] # transpose, so doc_forwards are all the diff batch sizes for a given doc for doc_forwards in zip(*forward_results): # make sure adjacent batch sizes predict the same probs for batch_size_a, batch_size_b in zip(doc_forwards, doc_forwards[1:]): for y in range(NUM_CLASSES): self.assertAlmostEqual(batch_size_a[y], batch_size_b[y], places=4)
def __init__(self, hidden_dim, mlp_hidden_dim, num_mlp_layers, num_classes, embeddings, cell_type=LSTM, gpu=False): super(AveragingRnnClassifier, self).__init__() self.embeddings = embeddings self.rnn = \ Rnn(len(embeddings[0]), hidden_dim, cell_type=cell_type, gpu=gpu) self.mlp = \ MLP(self.rnn.num_directions * self.rnn.hidden_dim, mlp_hidden_dim, num_mlp_layers, num_classes) self.to_cuda = to_cuda(gpu) print("# params:", sum(p.nelement() for p in self.parameters()))
def train(train_data, dev_data, model, num_classes, model_save_dir, num_iterations, model_file_prefix, learning_rate, batch_size, run_scheduler=False, gpu=False, clip=None, max_len=-1, debug=0, dropout=0, word_dropout=0, patience=1000): """ Train a model on all the given docs """ optimizer = Adam(model.parameters(), lr=learning_rate) loss_function = NLLLoss(None, False) enable_gradient_clipping(model, clip) if dropout: dropout = torch.nn.Dropout(dropout) else: dropout = None debug_print = int(100 / batch_size) + 1 writer = None if model_save_dir is not None: writer = SummaryWriter(os.path.join(model_save_dir, "logs")) if run_scheduler: scheduler = ReduceLROnPlateau(optimizer, 'min', 0.1, 10, True) best_dev_loss = 100000000 best_dev_loss_index = -1 best_dev_acc = -1 start_time = monotonic() for it in range(num_iterations): np.random.shuffle(train_data) loss = 0.0 i = 0 for batch in shuffled_chunked_sorted(train_data, batch_size): batch_obj = Batch([x[0] for x in batch], model.embeddings, to_cuda(gpu), word_dropout, max_len) gold = [x[1] for x in batch] loss += torch.sum( train_batch(model, batch_obj, num_classes, gold, optimizer, loss_function, gpu, debug, dropout)) if i % debug_print == (debug_print - 1): print(".", end="", flush=True) i += 1 if writer is not None: for name, param in model.named_parameters(): writer.add_scalar("parameter_mean/" + name, param.data.mean(), it) writer.add_scalar("parameter_std/" + name, param.data.std(), it) if param.grad is not None: writer.add_scalar("gradient_mean/" + name, param.grad.data.mean(), it) writer.add_scalar("gradient_std/" + name, param.grad.data.std(), it) writer.add_scalar("loss/loss_train", loss, it) dev_loss = 0.0 i = 0 for batch in chunked_sorted(dev_data, batch_size): batch_obj = Batch([x[0] for x in batch], model.embeddings, to_cuda(gpu)) gold = [x[1] for x in batch] dev_loss += torch.sum( compute_loss(model, batch_obj, num_classes, gold, loss_function, gpu, debug).data) if i % debug_print == (debug_print - 1): print(".", end="", flush=True) i += 1 if writer is not None: writer.add_scalar("loss/loss_dev", dev_loss, it) print("\n") finish_iter_time = monotonic() train_acc = evaluate_accuracy(model, train_data[:1000], batch_size, gpu) dev_acc = evaluate_accuracy(model, dev_data, batch_size, gpu) print( "iteration: {:>7,} train time: {:>9,.3f}m, eval time: {:>9,.3f}m " "train loss: {:>12,.3f} train_acc: {:>8,.3f}% " "dev loss: {:>12,.3f} dev_acc: {:>8,.3f}%".format( it, (finish_iter_time - start_time) / 60, (monotonic() - finish_iter_time) / 60, loss / len(train_data), train_acc * 100, dev_loss / len(dev_data), dev_acc * 100)) if dev_loss < best_dev_loss: if dev_acc > best_dev_acc: best_dev_acc = dev_acc print("New best acc!") print("New best dev!") best_dev_loss = dev_loss best_dev_loss_index = 0 if model_save_dir is not None: model_save_file = os.path.join( model_save_dir, "{}_{}.pth".format(model_file_prefix, it)) print("saving model to", model_save_file) torch.save(model.state_dict(), model_save_file) else: best_dev_loss_index += 1 if best_dev_loss_index == patience: print("Reached", patience, "iterations without improving dev loss. Breaking") break if dev_acc > best_dev_acc: best_dev_acc = dev_acc print("New best acc!") if model_save_dir is not None: model_save_file = os.path.join( model_save_dir, "{}_{}.pth".format(model_file_prefix, it)) print("saving model to", model_save_file) torch.save(model.state_dict(), model_save_file) if run_scheduler: scheduler.step(dev_loss) return model
def __init__(self, pattern_specs, mlp_hidden_dim, num_mlp_layers, num_classes, embeddings, vocab, semiring, bias_scale_param, gpu=False, rnn=None, pre_computed_patterns=None, no_sl=False, shared_sl=False, no_eps=False, eps_scale=None, self_loop_scale=None): super(SoftPatternClassifier, self).__init__() self.semiring = semiring self.vocab = vocab self.embeddings = embeddings self.to_cuda = to_cuda(gpu) self.total_num_patterns = sum(pattern_specs.values()) print(self.total_num_patterns, pattern_specs) self.rnn = rnn self.mlp = MLP(self.total_num_patterns, mlp_hidden_dim, num_mlp_layers, num_classes) if self.rnn is None: self.word_dim = len(embeddings[0]) else: self.word_dim = self.rnn.num_directions * self.rnn.hidden_dim self.num_diags = 1 # self-loops and single-forward-steps self.no_sl = no_sl self.shared_sl = shared_sl self.pattern_specs = pattern_specs self.max_pattern_length = max(list(pattern_specs.keys())) self.no_eps = no_eps self.bias_scale_param = bias_scale_param # Shared parameters between main path and self loop. # 1 -- one parameter per state per pattern # 2 -- a single global parameter if self.shared_sl > 0: if self.shared_sl == SHARED_SL_PARAM_PER_STATE_PER_PATTERN: shared_sl_data = randn(self.total_num_patterns, self.max_pattern_length) elif self.shared_sl == SHARED_SL_SINGLE_PARAM: shared_sl_data = randn(1) self.self_loop_scale = Parameter(shared_sl_data) elif not self.no_sl: if self_loop_scale is not None: self.self_loop_scale = self.semiring.from_float( self.to_cuda(fixed_var(FloatTensor([self_loop_scale])))) else: self.self_loop_scale = self.to_cuda(fixed_var(semiring.one(1))) self.num_diags = 2 # end state index for each pattern end_states = [[ end ] for pattern_len, num_patterns in self.pattern_specs.items() for end in num_patterns * [pattern_len - 1]] self.end_states = self.to_cuda(fixed_var(LongTensor(end_states))) diag_data_size = self.total_num_patterns * self.num_diags * self.max_pattern_length diag_data = randn(diag_data_size, self.word_dim) bias_data = randn(diag_data_size, 1) normalize(diag_data) if pre_computed_patterns is not None: diag_data, bias_data = self.load_pre_computed_patterns( pre_computed_patterns, diag_data, bias_data, pattern_specs) self.diags = Parameter(diag_data) # Bias term self.bias = Parameter(bias_data) if not self.no_eps: self.epsilon = Parameter( randn(self.total_num_patterns, self.max_pattern_length - 1)) # TODO: learned? hyperparameter? # since these are currently fixed to `semiring.one`, they are not doing anything. if eps_scale is not None: self.epsilon_scale = self.semiring.from_float( self.to_cuda(fixed_var(FloatTensor([eps_scale])))) else: self.epsilon_scale = self.to_cuda(fixed_var(semiring.one(1))) print("# params:", sum(p.nelement() for p in self.parameters()))
def get_all_precisions(self, mapped_src_emb): # Normalize the embeddings mapped_src_emb = mapped_src_emb / mapped_src_emb.norm(2, 1)[:, None] tgt_emb = self.tgt_emb / self.tgt_emb.norm(2, 1)[:, None] # Calculate r_target if 'csls' in self.methods: print("Calculating r_target...") start_time = time.time() self.r_target = common_csls_step(self.csls_k, mapped_src_emb, tgt_emb) print("Time taken for making r_target: ", time.time() - start_time) adv_mapped_src_emb = mapped_src_emb if 'procrustes' in self.models: procrustes_mapped_src_emb = self.get_procrustes_mapping() if 'with-ref' in self.refine: print("Performing refinement...") start_time = time.time() for _ in range(self.num_refine): mapped_src_emb = self.get_refined_mapping( mapped_src_emb, tgt_emb) mapped_src_emb = mapped_src_emb / mapped_src_emb.norm( 2, 1)[:, None] self.r_target = common_csls_step(self.csls_k, mapped_src_emb, tgt_emb) refined_mapped_src_emb = mapped_src_emb print("Time taken for refinement: ", time.time() - start_time) start_time = time.time() all_precisions = {} buckets = None save = False for it, v in enumerate(self.valid): v['valid_src_word_ids'] = util.to_cuda(v['valid_src_word_ids'], self.use_cuda) if it == 0: key = 'validation' else: key = 'validation-new' all_precisions[key] = {} for mod in self.models: if mod == 'procrustes': mapped_src_emb = procrustes_mapped_src_emb.clone() elif mod == 'adv': mapped_src_emb = adv_mapped_src_emb.clone() else: raise 'Model not implemented: %s' % mod all_precisions[key][mod] = {} for r in self.refine: if r == 'with-ref': if mod == 'procrustes': continue else: mapped_src_emb = refined_mapped_src_emb.clone() mapped_src_emb = mapped_src_emb / mapped_src_emb.norm( 2, 1)[:, None] if 'csls' in self.methods: self.r_source = common_csls_step( self.csls_k, tgt_emb, mapped_src_emb[v['valid_src_word_ids']]) start_time = time.time() self.r_target = common_csls_step( self.csls_k, mapped_src_emb, tgt_emb) all_precisions[key][mod][r] = {} for m in self.methods: all_precisions[key][mod][r][m] = {} for k in self.ks: if key == 'validation-new' and mod == 'adv' and r == 'with-ref' and m == 'csls' and k == 1: buckets = 5 save = True p = self.get_precision_k(k, tgt_emb, mapped_src_emb, v, method=m, buckets=buckets, save=save) if not save: print( "key: %s, model: %s, refine: %s, method: %s, k: %d, prec: %f" % (key, mod, r, m, k, p)) else: print( "key: %s, model: %s, refine: %s, method: %s, k: %d" % (key, mod, r, m, k)) print("precision: ", p) all_precisions[key][mod][r][m][k] = p buckets = None save = False print("Time taken to run main loop: ", time.time() - start_time) print(json.dumps(all_precisions, indent=2)) return all_precisions
D_losses, G_losses = [], [] max_cl = 1.58 for epoch in range(opt.n_epochs): for i, (coords, labels) in enumerate(dataloader): batch_size = coords.shape[0] # Adversarial ground truths valid = Variable(FloatTensor(batch_size, 1).fill_(1.0), requires_grad=False) fake = Variable(FloatTensor(batch_size, 1).fill_(0.0), requires_grad=False) # Configure input real_imgs = Variable(coords.type(FloatTensor)) labels = to_cuda( Variable(torch.reshape(labels.float(), (batch_size, opt.n_classes)))) # ----------------- # Train Generator # ----------------- optimizer_G.zero_grad() # Sample noise and labels as generator input z = Variable( FloatTensor(np.random.normal(0, 1, (batch_size, opt.latent_dim)))) gen_labels = Variable( FloatTensor( max_cl *