def sample_data(self, data_num=100): z = Variable(FloatTensor(np.random.normal(0, 1, (data_num, 3)))) labels = 1.558*np.random.random_sample(size=(data_num, 1)) labels = Variable(FloatTensor(labels)) gen_coords = to_cpu(self.G(z, labels)).detach().numpy() labels = to_cpu(labels).detach().numpy() np.savez("wgan_gp/results/final", labels, self.rev_standardize(gen_coords))
def predict(self, doc, batchsize=None): """Prediction method for use at test time. Returns a lower-triangular score matrix.""" t_phi_a = self.factory.to_device( self._adjust_features(doc.anaphoricity_features.long(), self.eps_model)) t_phi_a_offsets = self.factory.to_device( doc.anaphoricity_offsets.long()) t_phi_p = self.factory.to_device( self._adjust_features(doc.pairwise_features.long(), self.ana_model)) t_phi_p_offsets = self.factory.to_device(doc.pairwise_offsets.long()) phi_a = Variable(t_phi_a, volatile=True) phi_a_offsets = Variable(t_phi_a_offsets, volatile=True) phi_p = Variable(t_phi_p, volatile=True) phi_p_offsets = Variable(t_phi_p_offsets, volatile=True) eps_scores, h_a = self.eps_model(phi_a, phi_a_offsets, batchsize=batchsize) ana_scores = self.ana_model(h_a, phi_p, phi_p_offsets, batchsize=batchsize) scores = self._create_score_matrix(eps_scores.data, ana_scores.data) return to_cpu(scores)
def create_successive_coords(self): """0.01から1.50まで151個のC_L^cと翼形状を生成""" cl_r = [] cl_c = [] gen_coords = [] for cl in range(151): cl /= 100 cl_c.append(cl) labels = Variable(torch.reshape(FloatTensor([cl]), (1, 1))) calc_num = 0 while (True): calc_num += 1 z = Variable( FloatTensor(np.random.normal(0, 1, (1, self.latent_dim)))) gen_coord = self.rev_standardize( to_cpu(self.G(z, labels)).detach().numpy()) clr = get_cl(gen_coord) # cl = 0.1 if not np.isnan(clr): print(cl) cl_r.append(clr) gen_coords.append(gen_coord) break if calc_num == 5: print('not calculated {0}'.format(cl)) cl_r.append(-1) gen_coords.append(gen_coord) break np.savez("normal/results/successive_label", cl_c, cl_r, gen_coords)
def sample_image(epoch=None, data_num=12): # Sample noise z = Variable( FloatTensor(np.random.normal(0, 1, (data_num, opt.latent_dim)))) labels = max_cl * np.random.random_sample(size=(data_num, opt.n_classes)) labels = Variable(FloatTensor(labels)) gen_coords = to_cpu(generator(z, labels)).detach().numpy() labels = to_cpu(labels).detach().numpy() if epoch is not None: save_coords(gen_coords * coord_std + coord_mean, labels, "wgan_gp/coords/epoch_{0}".format(str(epoch).zfill(3))) else: np.savez("wgan_gp/results/final", labels, gen_coords * coord_std + coord_mean) save_coords(gen_coords * coord_std + coord_mean, labels, "wgan_gp/coords/final.png")
def create_coords_by_cl(self, cl_c, data_num=20): z = Variable( FloatTensor(np.random.normal(0, 1, (data_num, self.latent_dim)))) labels = np.array([cl_c] * data_num) labels = Variable(torch.reshape(FloatTensor([labels]), (data_num, 1))) gen_coords = self.rev_standardize( to_cpu(self.G(z, labels)).detach().numpy()) return gen_coords
def save_params(self, file_name=None): if file_name is None: file_name = self.__class__.__name__ + '.pkl' params = [p.astype(np.float16) for p in self.params] if GPU: params = [to_cpu(p) for p in params] with open(file_name, 'wb') as f: pickle.dump(params, f)
def forward(self, phi_p, solutions, sizes): h_p = self.hp_model(phi_p) ana_scores = to_cpu(self.ana_scoring_model(h_p)) loss = Variable(torch.zeros(1)) idx = 0 for sol, sz in zip(solutions, sizes): m_scores = ana_scores[idx:(idx + sz)] idx = idx + sz best_score, best_idx = torch.max(m_scores, 0) if not sol[best_idx].data[0]: best_correct = torch.max(m_scores[sol]) loss += 1.0 + best_correct - best_score return loss
def run(self, schedule, epochs): self.running = True lossMeter = LossMeter() for cb in schedule.callbacks: cb.on_train_begin(self) for epoch in tqdm(range(epochs), desc="Epochs"): if not self.running: break for cb in schedule.callbacks: cb.on_epoch_begin(self) running_loss = 0 for input, label, *_ in tqdm(schedule.data, desc="Steps", leave=False): if not self.running: break for cb in schedule.callbacks: cb.on_batch_begin(self) step_loss, outputs = self.step(input, label) if (self.log): lossMeter.update(util.to_cpu(step_loss), input.shape[0]) for cb in schedule.callbacks: cb.on_batch_end(self, lossMeter, outputs, label) for cb in schedule.callbacks: cb.on_epoch_end(self, lossMeter) for cb in schedule.callbacks: cb.on_train_end(self)
def train(model, train_config, training_set, dev_set, checkpoint=None, cuda=False): """Main training loop.""" epoch_size = len(training_set) dot_interval = max(epoch_size // 80, 1) logging.info('%d documents per epoch' % epoch_size) if cuda: model = model.cuda() embedding_lr, deep_lr = train_config['learning_rate'] embedding_layers = [] deep_layers = [] logging.info('Learning rates:') for name, p in model.named_parameters(): if name.startswith('eps_model.ha_model.') or name.startswith( 'ana_model.hp_model.'): logging.info('%g %s (embedding)' % (embedding_lr, name)) embedding_layers.append(p) else: logging.info('%g %s' % (deep_lr, name)) deep_layers.append(p) opt_params = [{ 'params': embedding_layers, 'lr': embedding_lr }, { 'params': deep_layers, 'lr': deep_lr }] opt = torch.optim.Adagrad(params=opt_params) # training_set, truncated = training_set.truncate_docs(train_config['maxsize_gpu']) # logging.info('Truncated %d/%d documents.' % (truncated, len(training_set))) model.set_error_costs(train_config['error_costs']) logging.info('Starting training...') for epoch in range(train_config['nepochs']): model.train() train_loss_reg = 0.0 train_loss_unreg = 0.0 for i, idx in enumerate(numpy.random.permutation(epoch_size)): if (i + 1) % dot_interval == 0: print('.', end='', flush=True) if training_set[idx].nmentions == 1: logging.info('Skipping document with only one mention.') continue opt.zero_grad() model_loss = model.compute_loss( training_set[idx], batchsize=train_config['batchsize']) reg_loss = to_cpu(sum(p.abs().sum() for p in model.parameters())) loss = model_loss + train_config['l1reg'] * reg_loss train_loss_unreg += model_loss.data[0] / training_set[idx].nmentions train_loss_reg += loss.data[0] / training_set[idx].nmentions loss.backward() opt.step() del loss del model_loss del reg_loss print(flush=True) if checkpoint: logging.info('Saving checkpoint...') with h5py.File('%s-%03d' % (checkpoint, epoch), 'w') as h5: util.save_model(h5, model) logging.info('Computing devset performance...') model.eval() dev_loss = 0.0 dev_correct = 0 dev_total = 0 for doc in dev_set: loss, ncorrect = model.compute_dev_scores( doc, batchsize=train_config['batchsize']) dev_loss += loss dev_correct += ncorrect dev_total += doc.nmentions dev_acc = dev_correct / dev_total logging.info( 'Epoch %d: train_loss_reg %g / train_loss_unreg %g / dev_loss %g / dev_acc %g' % (epoch, train_loss_reg, train_loss_unreg, dev_loss, dev_acc))
def compute_loss(self, doc, batchsize=None): """Compute the training loss. The loss is computed in a two-step procedure that exploits the structure of the objective function, whose value only ever depends on two scores per mention (the highest-scoring predicted and the highest-scoring correct). In the first step, we run the whole network without computing gradients to identify the scores contributing to the loss function. In the second step, we recompute the scores for those items only and do backpropagation.""" t_phi_a = self.factory.to_device( self._adjust_features(doc.anaphoricity_features.long(), self.eps_model)) t_phi_a_offsets = self.factory.to_device( doc.anaphoricity_offsets.long()) t_phi_p = self.factory.to_device( self._adjust_features(doc.pairwise_features.long(), self.ana_model)) t_phi_p_offsets = self.factory.to_device(doc.pairwise_offsets.long()) solution_mask = self.factory.to_device(doc.solution_mask) # First do the full computation without gradients phi_a = Variable(t_phi_a, volatile=True) phi_a_offsets = Variable(t_phi_a_offsets, volatile=True) phi_p = Variable(t_phi_p, volatile=True) phi_p_offsets = Variable(t_phi_p_offsets, volatile=True) all_eps_scores, h_a = self.eps_model(phi_a, phi_a_offsets, batchsize=batchsize) all_ana_scores = self.ana_model(h_a, phi_p, phi_p_offsets, batchsize=batchsize) margin_info = self._find_margin(all_eps_scores.data, all_ana_scores.data, solution_mask) best_correct_idx = margin_info['best_correct_idx'] loss_idx = margin_info['loss_idx'] cost_values = margin_info['cost_values'] loss_per_example = margin_info['loss_per_example'] # Then turn on gradients and run on loss-contributing elements only loss_contributing = torch.gt(loss_per_example, 0.0).unsqueeze(1) if torch.sum(loss_contributing) == 0: return Variable(torch.zeros(1), requires_grad=False) loss_contributing_idx = loss_contributing.nonzero()[:, 0] n_loss_contributing = loss_contributing_idx.size()[0] # In the second run, we just compute the scores for the two elements per example # that contribute to the margin loss. At most one of them can be an epsilon score. # The scores will be put in an nmentions x 2 matrix. The following code determines # which of the entries in this matrix come from the eps and the ana scorer, respectively, # and which examples must be fed to each of the scorers. cand_idx = torch.stack([best_correct_idx, loss_idx], dim=1) example_no = self.factory.long_arange( 0, doc.nmentions).unsqueeze(1).expand_as(cand_idx) is_epsilon = torch.eq(cand_idx, example_no) sub_is_epsilon = is_epsilon[loss_contributing_idx] cand_mask = (1 - is_epsilon) * loss_contributing.expand_as(is_epsilon) sub_cand_mask = cand_mask[loss_contributing_idx] cand_subset = Variable( example_no[:sub_cand_mask.size()[0], :].masked_select( sub_cand_mask)) example_offsets = torch.cumsum( torch.cat([ self.factory.long_zeros(1, 2), example_no[:(doc.nmentions - 1), :] ]), 0) cand_idx_in_doc = cand_idx + example_offsets relevant_cands = cand_idx_in_doc[cand_mask] # Next, we compute the required scores. phi_a = Variable(t_phi_a, volatile=False, requires_grad=False) phi_a_offsets = Variable(t_phi_a_offsets, volatile=False, requires_grad=False) phi_p = Variable(t_phi_p, volatile=False, requires_grad=False) phi_p_offsets = Variable(t_phi_p_offsets, volatile=False, requires_grad=False) sub_phi_a, sub_phi_a_offsets = self._select_features( phi_a, phi_a_offsets, loss_contributing_idx) sub_phi_p, sub_phi_p_offsets = self._select_features( phi_p, phi_p_offsets, relevant_cands) sub_eps_scores, sub_h_a = self.eps_model(sub_phi_a, sub_phi_a_offsets, batchsize=batchsize) sub_ana_scores = self.ana_model(sub_h_a, sub_phi_p, sub_phi_p_offsets, cand_subset=cand_subset, batchsize=batchsize) # Then we store them in the right components of the scores matrix. scores = Variable(self.factory.zeros(n_loss_contributing, 2)) scores[sub_cand_mask] = sub_ana_scores needs_eps = torch.gt(torch.sum(sub_is_epsilon, dim=1), 0) if self.factory.get_single(torch.sum(needs_eps)) > 0: eps_idx = Variable( example_no[:sub_cand_mask.size()[0], :].masked_select( 1 - sub_cand_mask)) scores[1 - sub_cand_mask] = sub_eps_scores[eps_idx] # The applicable rescaling weights can be taken from the first run. We now compute the scores. var_cost_values = Variable(cost_values, requires_grad=False) sub_loss_per_example = var_cost_values[loss_contributing_idx].squeeze( ) * (1.0 - scores[:, 0] + scores[:, 1]) model_loss = to_cpu(torch.sum(sub_loss_per_example)) # The loss values computed in the first and the second run should be equal, since the second # run only serves to obtain the gradients. In rare cases, there seems to be a discrepancy # between the scores. This needs more investigation. # The warning is silenced for nets with dropout until we've implemented consistent dropout masks # in the two-stage scoring process. score_diff = abs( self.factory.get_single(model_loss) - self.factory.get_single(margin_info['loss'])) if score_diff > 1e-4 and self.net_config['dropout_h_comb'] is None: logging.warning('Unexpected score difference: %g' % score_diff) return model_loss
def update(self, outputs, labels): preds = torch.clamp(torch.round(util.to_cpu(outputs).data), 0, 1).numpy().astype(int) labels = util.to_cpu(labels).data.numpy().astype(int) self.update_from_numpy(preds, labels)
def pretrain_hp(model, train_config, training_set, dev_set, checkpoint=None, cuda=False): epoch_size = len(training_set) dot_interval = max(epoch_size // 80, 1) logging.info('%d documents per epoch' % epoch_size) opt = torch.optim.Adagrad(params=model.parameters(), lr=train_config['learning_rate'][1]) logging.info('Filtering corpora for pretraining...') train_features, train_sizes, train_solutions = filter_for_pretrain_hp( training_set) dev_features, dev_sizes, dev_solutions = filter_for_pretrain_hp(dev_set) logging.info('Starting training...') for epoch in range(train_config['nepochs']): train_loss_reg = 0.0 train_loss_unreg = 0.0 for i, idx in enumerate(numpy.random.permutation(epoch_size)): if (i + 1) % dot_interval == 0: print('.', end='', flush=True) if len(train_sizes[idx]) == 0: # no anaphoric mentions in document continue opt.zero_grad() if cuda: phi_p = Variable( train_features[idx].pin_memory()).cuda(async=True) else: phi_p = Variable(train_features[idx]) solutions = [Variable(sol) for sol in train_solutions[idx]] model_loss = model(phi_p, solutions, train_sizes[idx]) reg_loss = to_cpu(sum(p.abs().sum() for p in model.parameters())) loss = model_loss + train_config['l1reg'] * reg_loss train_loss_unreg += model_loss.data[0] / len(train_sizes[idx]) train_loss_reg += loss.data[0] / len(train_sizes[idx]) loss.backward() opt.step() del loss del model_loss del reg_loss print(flush=True) if cuda: cpu_model = copy.deepcopy(model).cpu() else: cpu_model = model if checkpoint: logging.info('Saving checkpoint...') with open('%s-%03d' % (checkpoint, epoch), 'wb') as f: torch.save(cpu_model.state_dict(), f) logging.info('Computing devset performance...') dev_loss = 0.0 for docft, docsz, docsol in zip(dev_features, dev_sizes, dev_solutions): if cuda: phi_p = Variable(docft.pin_memory(), volatile=True).cuda(async=True) else: phi_p = Variable(docft, volatile=True) solutions = [Variable(sol, volatile=True) for sol in docsol] dev_loss += model(phi_p, solutions, docsz).data[0] logging.info( 'Epoch %d: train_loss_reg %g / train_loss_unreg %g / dev_loss %g' % (epoch, train_loss_reg, train_loss_unreg, dev_loss))
def update(self, output, label): _, preds = torch.max(output, 1) batch_correct = util.to_cpu(torch.sum(preds == label).data) self.num_correct += batch_correct self.count += label.shape[0] return (batch_correct.double() / label.shape[0]).item()