def valid_process(global_t, model, valid_loader, valid_config, unlabeled_epoch, tb_writer, logger, cur_best_score): valid_loader.epoch_init(valid_config.batch_size, shuffle=False) model.eval() loss_records = {} while True: batch = valid_loader.next_sentiment_batch() if batch is None: # end of epoch break title, context, target, target_lens, sentiment_label = batch title, context, target, target_lens, sentiment_label = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens), to_tensor( sentiment_label) valid_loss = model.valid_AE(global_t, title, context, target, target_lens, sentiment_label) for loss_name, loss_value in valid_loss: v = loss_records.get(loss_name, []) if loss_name == 'min_valid_loss' and loss_value < cur_best_score[ 'min_valid_loss']: cur_best_score['min_valid_loss'] = loss_value cur_best_score['min_epoch'] = unlabeled_epoch cur_best_score['min_step'] = global_t v.append(loss_value) loss_records[loss_name] = v log = "" for loss_name, loss_values in loss_records.items(): log = log + loss_name + ':%.4f ' % (np.mean(loss_values)) if args.visual: tb_writer.add_scalar(loss_name, np.mean(loss_values), global_t) logger.info(log)
def align(self, valid_loader): self.seq_encoder.eval() self.decoder.eval() choice_statistic = [0.0 for _ in range(self.n_components)] while True: # batch是一个以情感为key的dict batch = valid_loader.next_sentiment_batch() if batch is None: break title, context, target, target_lens, sentiments = batch title, context, target, target_lens = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens) title_last_hidden, _ = self.seq_encoder(title) context_last_hidden, _ = self.seq_encoder(context) c = torch.cat((title_last_hidden, context_last_hidden), 1) # (batch, 2 * hidden_size * 2) current_statistic = self.sample_code_prior_sentiment(c, True) choice_statistic = [ choice_statistic[i] + current_statistic[i] for i in range(self.n_components) ] print("%s distribution: %s" % (valid_loader.name, str(choice_statistic)[1:-1]))
def train_process(global_t, model, train_loader): model.train() loss_records = [] finish_train = False sent_names = ['pos', 'neu', 'neg'] for sent_name in sent_names: batch = train_loader[sent_name].next_sentiment_batch() if batch is None: # end of epoch finish_train = True return model, finish_train, None, global_t title, context, target, target_lens = batch title, context, target, target_lens = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens) loss_AE, global_t = model.train_AE(global_t, title, context, target, target_lens, sent_name=sent_name) loss_records.extend(loss_AE) return model, finish_train, loss_records, global_t
def pre_valid_process(global_t, model, valid_loader, valid_config, tb_writer, logger): sent_names = ['pos', 'neu', 'neg'] for sent_name in sent_names: valid_loader[sent_name].epoch_init(valid_config.batch_size, shuffle=False) model.eval() loss_records = {} while True: batch = valid_loader[sent_name].next_batch() if batch is None: # end of epoch break target, target_lens = batch target, target_lens = to_tensor(target), to_tensor(target_lens) valid_loss, global_t = model.valid_VAE(global_t, target, target_lens, sent_name=sent_name) for loss_name, loss_value in valid_loss: v = loss_records.get(loss_name, []) v.append(loss_value) loss_records[loss_name] = v log = 'Valid: iter {} sentiment {} Validation\n'.format( global_t, sent_name) for loss_name, loss_values in loss_records.items(): log = log + loss_name + ':%.4f ' % (np.mean(loss_values)) if tb_writer: tb_writer.add_scalar(loss_name, np.mean(loss_values), global_t) logger.info(log)
def test_process(model, test_loader, test_config, logger): # 训练完一个epoch,用测试集的标题生成一次诗 test_loader.epoch_init(test_config.batch_size, shuffle=True) poem_count = 0 predict_results = {0: [], 1: [], 2: []} while True: model.eval() batch = test_loader.next_batch_test() # test data使用专门的batch if batch is None: break batch_size = batch.shape[0] poem_count += 1 if poem_count % 10 == 0: print("Predicted {} poems".format(poem_count)) if batch is None: break title_list = batch # batch size是1,一个batch写一首诗 title_tensor = to_tensor(title_list) # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果 for i in range(3): # import pdb # pdb.set_trace() sentiment_label = np.zeros(batch_size, dtype=np.int64) for id in range(batch_size): sentiment_label[id] = int(i) sentiment_label = to_tensor(sentiment_label) output_poem, output_tokens = model.test( title_tensor, title_list, sentiment_label=sentiment_label) if poem_count % 80 == 0: logger.info("Sentiment {} Poem {}\n".format(i, output_poem)) predict_results[i] += (np.array(output_tokens)[:, :7].tolist()) # Predict sentiment use the sort net from collections import defaultdict neg = defaultdict(int) neu = defaultdict(int) pos = defaultdict(int) total = defaultdict(int) for i in range(3): _, neg[i], neu[i], pos[i] = test_sentiment(predict_results[i]) total[i] = neg[i] + neu[i] + pos[i] for i in range(3): logger.info("%d%%\t%d%%\t%d%%" % (neg[i] * 100 / total[i], neu[i] * 100 / total[i], pos[i] * 100 / total[i])) print("Done testing")
def train_process(global_t, model, train_loader, config, sentiment_data=False): model.train() loss_records = [] sentiment_label = None if sentiment_data: batch = train_loader.next_sentiment_batch() finish_train = False if batch is None: # end of epoch finish_train = True return model, finish_train, None, global_t title, context, target, target_lens, sentiment_label = batch title, context, target, target_lens, sentiment_label = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens), to_tensor(sentiment_label) else: batch = train_loader.next_batch() finish_train = False if batch is None: # end of epoch finish_train = True return model, finish_train, None, global_t title, context, target, target_lens = batch title, context, target, target_lens = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens) # global_t, title, context, target, target_lens, loss_AE, global_t = model.train_AE( global_t, title, context, target, target_lens, sentiment_label) # 输入topic,last句,当前句,当前句长度 loss_records.extend(loss_AE) return model, finish_train, loss_records, global_t
def train_D(self, title, context, target, target_lens): self.seq_encoder.eval() self.discriminator.train() self.optimizer_D.zero_grad() batch_size = context.size(0) title_last_hidden, _ = self.seq_encoder(title) context_last_hidden, _ = self.seq_encoder(context) c = torch.cat((title_last_hidden, context_last_hidden), 1) # (batch, 2, hidden_size * 2) x, _ = self.seq_encoder(target[:, 1:], target_lens - 1) post_z = self.sample_code_post(x, c) errD_post = torch.mean( self.discriminator(torch.cat( (post_z.detach(), c.detach()), 1))) * self.n_d_loss errD_post.backward(one) prior_z = self.sample_code_prior(c) errD_prior = torch.mean( self.discriminator(torch.cat( (prior_z.detach(), c.detach()), 1))) * self.n_d_loss errD_prior.backward(minus_one) # import pdb # pdb.set_trace() alpha = to_tensor(torch.rand(batch_size, 1)) alpha = alpha.expand(prior_z.size()) interpolates = alpha * prior_z.data + ((1 - alpha) * post_z.data) interpolates = Variable(interpolates, requires_grad=True) d_input = torch.cat((interpolates, c.detach()), 1) disc_interpolates = torch.mean(self.discriminator(d_input)) gradients = torch.autograd.grad( outputs=disc_interpolates, inputs=interpolates, grad_outputs=to_tensor(torch.ones(disc_interpolates.size())), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ( (gradients.contiguous().view(gradients.size(0), -1).norm(2, dim=1) - 1)**2).mean() * self.lambda_gp gradient_penalty.backward() self.optimizer_D.step() costD = -(errD_prior - errD_post) + gradient_penalty return [('train_loss_D', costD.item())]
def forward(self, inputs, input_lens=None, noise=False): # if self.embedding is not None: inputs = self.embedding(inputs) # 过embedding batch_size, seq_len, emb_size = inputs.size( ) # (batch, len, emb_size) len是12,即标题的最大长度 # inputs = F.dropout(inputs, 0.5, self.training) # embedding先不做dropout if input_lens is not None: input_lens_sorted, indices = input_lens.sort(descending=True) inputs_sorted = inputs.index_select(0, indices) inputs = pack_padded_sequence(inputs_sorted, input_lens_sorted.data.tolist(), batch_first=True) # inputs: (batch, len, emb_dim) # init_hidden: (2, batch, n_hidden) init_hidden = to_tensor( torch.zeros(self.n_layers * (1 + self.bidirectional), batch_size, self.hidden_size)) # hids: (batch, len, 2 * n_hidden) # h_n: (2, batch, n_hidden) # self.rnn.flatten_parameters() hids, h_n = self.rnn(inputs, init_hidden) if input_lens is not None: _, inv_indices = indices.sort() hids, lens = pad_packed_sequence(hids, batch_first=True) hids = hids.index_select(0, inv_indices) h_n = h_n.index_select(1, inv_indices) # h_n (1, 2, batch, n_hidden) 按层排列 h_n = h_n.view(self.n_layers, (1 + self.bidirectional), batch_size, self.hidden_size) # 取最后一层 (2, batch, n_hidden) h_n = h_n[-1] # 取last hidden的最后一层作为encoder的last hidden并返回 # (batch_size, 1 * 2 * hidden_size) 后面全给弄到一起 enc = h_n.transpose(1, 0).contiguous().view(batch_size, -1) if noise and self.noise_radius > 0: gauss_noise = to_tensor( torch.normal(means=torch.zeros(enc.size()), std=self.noise_radius)) enc = enc + gauss_noise return enc, hids
def testing(self, init_hidden, maxlen, go_id, mode="greedy"): batch_size = init_hidden.size(0) assert batch_size == 1 decoder_input = to_tensor(torch.LongTensor([[go_id]]).view(1, 1)) # (batch, 1) # input: (batch=1, len=1, emb_size) decoder_input = self.embedding(decoder_input) # (batch, 1, emb_dim) # hidden: (batch=1, 2, hidden_size * 2) decoder_hidden = init_hidden.unsqueeze(0) # (1, batch, 4*hidden+z_size) pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64) for di in range(maxlen - 1): # decode要的是从<s>后一位开始,因此总长度是max_len-1 # 输入decoder decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden) # (1, 1, hidden) decoder_output = self.out(decoder_output.contiguous().view(-1, self.hidden_size)) # (1, vocab_size) # import pdb # pdb.set_trace() if mode == "greedy": topi = decoder_output.max(1, keepdim=True)[1] else: topi = decoder_output.max(1, keepdim=True)[1] topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1) # 拿到pred_outs以返回 ni = topi.squeeze().cpu().numpy() pred_outs[:, di] = ni # 为下一次decode准备输入字 decoder_input = self.embedding(topi) # 结束for完成一句诗的token预测 return pred_outs
def testing(self, init_hidden, encoder_output, maxlen, go_id, mode="greedy"): batch_size = init_hidden.size(0) assert batch_size == 1 decoder_input = to_tensor(torch.LongTensor([go_id])) # (batch) decoder_hidden = init_hidden pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64) for di in range(maxlen - 1): # 从第一个字decode到</s> 共maxlen-1位 embedded = self.embedder(decoder_input).unsqueeze(1) # (batch, 1, emb_dim) # embedded = self.drop_out(embedded) mutual_info = torch.cat((decoder_hidden, embedded), dim=2) # (batch, 1, emb_dim + n_hidden) attn_weight = self.attn(mutual_info) # (batch, 1, 10) attn_weight = F.softmax(attn_weight, dim=2) # (batch, 1, 10) attn_applied = torch.bmm(attn_weight, encoder_output) # (batch, 1, n_hidden) rnn_input = torch.cat((attn_applied, embedded), dim=2) # (batch, 1, n_hidden+emb_dim) rnn_input = self.attn_combine(rnn_input) # (batch, 1, n_hidden) rnn_input = F.relu(rnn_input) # (batch, 1, n_hidden) decoder_output, decoder_hidden = self.rnn(rnn_input, decoder_hidden.contiguous()) decoder_output = self.soft(self.out(decoder_output.contiguous().squeeze(1))) if mode == "greedy": topi = decoder_output.max(1, keepdim=True)[1] # [0]是概率,[1]是idx else: topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1) ni = topi.squeeze().cpu().numpy() pred_outs[:, di] = ni decoder_input = topi[0] return pred_outs
def sampling(self, init_hidden, maxlen, go_id, eos_id, mode='greedy'): batch_size = init_hidden.size(0) # batch_size等于调用时候的repeat sample_lens = np.zeros(batch_size, dtype=np.int) # (batch中每一个测试样本的生成句的长度) decoder_input = to_tensor(torch.LongTensor([[go_id] * batch_size]).view(batch_size,1)) decoder_input = self.embedding(decoder_input) decoder_hidden = init_hidden.unsqueeze(0) pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64) # import pdb # pdb.set_trace() for di in range(maxlen - 1): # 为什么相同的decoder_input(重复了10遍)输入同一个decoder得到的结果不一样呢?? # self.rnn.flatten_parameters() decoder_output, decoder_hidden = self.rnn(decoder_input, decoder_hidden) decoder_output = self.out(decoder_output.contiguous().view(-1, self.hidden_size)) # (batch, vocab_size) if mode == 'greedy': topi = decoder_output.max(1, keepdim=True)[1] elif mode == 'sample': topi = torch.multinomial(F.softmax(decoder_output[:,-1], dim=1), 1) ni = topi.squeeze().cpu().numpy() pred_outs[:, di] = ni decoder_input = self.embedding(topi) # 结束for生成了一句话 for i in range(batch_size): for word in pred_outs[i]: if word == eos_id: break sample_lens[i] = sample_lens[i] + 1 return pred_outs, sample_lens
def test_process(model, test_loader, test_config, logger): # 训练完一个epoch,用测试集的标题生成一次诗 test_loader.epoch_init(test_config.batch_size, shuffle=False) poem_count = 0 import random sent_labels = [] for _ in range(4): sent_labels.append(random.sample(['0', '1', '2'], k=1)[0]) logger.info("Random choose sentiment {}".format(" ".join(sent_labels))) while True: model.eval() batch = test_loader.next_batch_test() # test data使用专门的batch poem_count += 1 if poem_count % 10 == 0: print("Predicted {} poems".format(poem_count)) if batch is None: break title_list = batch # batch size是1,一个batch写一首诗 title_tensor = to_tensor(title_list) # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果 output_poem, output_tokens = model.test(title_tensor, title_list, sent_labels=sent_labels) logger.info(output_poem) print("Done testing")
def test(self, title_tensor, title_words, sent_labels): self.eval() name_dict = {'0': 'neg', '1': 'neu', '2': 'pos'} batch_size = title_tensor.size(0) assert batch_size == 1 tem = [[2, 3] + [0] * (self.maxlen - 2)] pred_poems = [] # 过滤掉标题中的<s> </s> 0,只为了打印 title_tokens = [self.vocab[e] for e in title_words[0].tolist() if e not in [0, self.eos_id, self.go_id]] pred_poems.append(title_tokens) gen_words = "" gen_tokens = [] for i in range(4): tem = to_tensor(np.array(tem)) context = tem if i == 0: context_last_hidden, _ = self.layers["seq_encoder"](title_tensor) else: context_last_hidden, _ = self.layers["seq_encoder"](context) title_last_hidden, _ = self.layers["seq_encoder"](title_tensor) z = to_tensor(torch.randn([batch_size, self.z_size])) final_info = torch.cat([title_last_hidden, context_last_hidden, z], dim=1) pred_tokens = self.layers["vae_decoder_{}".format(name_dict[sent_labels[i]])].testing(init_hidden=self.layers["init_decoder"](final_info), maxlen=self.maxlen, go_id=self.go_id, mode="greedy") pred_tokens = pred_tokens[0].tolist() if len(pred_tokens) >= self.maxlen: tem = [pred_tokens[0: self.maxlen]] else: tem = [[0] * (self.maxlen - len(pred_tokens)) + pred_tokens] pred_words = [self.vocab[e] for e in pred_tokens[:-1] if e != self.eos_id and e != 0 and e != self.go_id] pred_poems.append(pred_words) gen_tokens.append(pred_tokens) for i in range(5): if i == 0: cur_line = " ".join(pred_poems[i]) else: cur_line = " ".join(pred_poems[i]) + sent_labels[i-1] gen_words = gen_words + cur_line + '\n' return gen_words, gen_tokens
def forward(self, target): batch_size = target.size(0) target = self.fc(target) mu = self.target_to_mu(target) logsigma = self.target_to_logsigma(target) std = torch.exp(0.5 * logsigma) epsilon = to_tensor(torch.randn([batch_size, self.z_size])) z = epsilon * std + mu return mu, logsigma, z
def test(self, title_tensor, title_words, sentiment_label): self.seq_encoder.eval() self.decoder.eval() assert title_tensor.size(0) == 1 tem = [[2, 3] + [0] * (self.maxlen - 2)] pred_poems = [] # 过滤掉标题中的<s> </s> 0,只为了打印 title_tokens = [ self.vocab[e] for e in title_words[0].tolist() if e not in [0, self.eos_id, self.go_id] ] pred_poems.append(title_tokens) gen_words = "\n" gen_tokens = [] for i in range(4): tem = to_tensor(np.array(tem)) context = tem if i == 0: context_last_hidden, _ = self.seq_encoder(title_tensor) else: context_last_hidden, _ = self.seq_encoder(context) title_last_hidden, _ = self.seq_encoder(title_tensor) sentiment = self.sent_embedder(sentiment_label) condition_prior = torch.cat( (title_last_hidden, context_last_hidden), dim=1) # z_prior, prior_mu, prior_logvar, _, _ = self.sample_code_prior(condition_prior, mask_type=mask_type) z_prior, prior_mu, prior_logvar = self.sample_code_prior( condition_prior, sentiment_label) final_info = torch.cat((z_prior, condition_prior, sentiment), 1) pred_tokens = self.decoder.testing( init_hidden=self.init_decoder_hidden(final_info), maxlen=self.maxlen, go_id=self.go_id, mode="greedy") pred_tokens = pred_tokens[0].tolist() if len(pred_tokens) >= self.maxlen: tem = [pred_tokens[0:self.maxlen]] else: tem = [[0] * (self.maxlen - len(pred_tokens)) + pred_tokens] pred_words = [ self.vocab[e] for e in pred_tokens[:-1] if e != self.eos_id and e != 0 and e != self.go_id ] pred_poems.append(pred_words) gen_tokens.append(pred_tokens) for line in pred_poems: cur_line = " ".join(line) gen_words = gen_words + cur_line + '\n' return gen_words, gen_tokens
def forward(self, context): batch_size, _ = context.size() # prior: (batch, 4 * hidden) context = self.fc(context) mu = self.context_to_mu(context) logsigma = self.context_to_logsigma(context) std = torch.exp(0.5 * logsigma) epsilon = to_tensor(torch.randn([batch_size, self.z_size])) z = epsilon * std + mu return z, mu, logsigma
def apply_gaussian_blur(adv_img, kernel_size=5, sigma=0): #if sigma=0, cv2.GaussianBlur caluclates sigma from kernel size. N, ch, H, W = adv_img.shape for i in range(N): img = helper.to_numpy(adv_img[i]) img = cv2.GaussianBlur(img, (kernel_size, kernel_size), sigmaX=sigma) adv_img[i] = helper.to_tensor(img) return adv_img
def test_VAE(self, sent_name, batch_size=1): z = to_tensor(torch.randn([batch_size, self.z_size])) pred_tokens = self.layers["vae_decoder_{}".format(sent_name)].testing( init_hidden=self.layers["init_decoder_hidden"](z), maxlen=self.maxlen, go_id=self.go_id, mode="greedy") pred_words = [] # import pdb # pdb.set_trace() for b_id in range(pred_tokens.shape[0]): pred_words.append([self.vocab[e] for e in pred_tokens[b_id][:-1] if e != self.eos_id and e != 0 and e != self.go_id]) return pred_words
def valid_process_sentiment(model, valid_poem_loader, valid_config, global_iter, num, tb_writer, logger, cur_best_score_labeled): valid_poem_loader.epoch_init(valid_config.batch_size, shuffle=False) model.eval() loss_records = {} while True: batch = valid_poem_loader.next_sentiment_batch() if batch is None: # end of epoch break title, context, target, target_lens, sentiment_mask = batch title, context, target, target_lens, sentiment_mask = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens), to_tensor(sentiment_mask) valid_loss = model.valid(title, context, target, target_lens, sentiment_mask) for loss_name, loss_value in valid_loss: v = loss_records.get(loss_name, []) v.append(loss_value) loss_records[loss_name] = v log = 'Valid: Global iter {} Validation\n'.format(global_iter) for loss_name, loss_values in loss_records.items(): # import pdb # pdb.set_trace() if loss_name == 'valid_loss_AE' and np.mean( loss_values) < cur_best_score_labeled['min_valid_loss_label']: log += "\nFOUND a new best valid loss in global %d, num %d\n" % ( global_iter, num) cur_best_score_labeled['min_valid_loss_label'] = np.mean( loss_values) cur_best_score_labeled['min_global_itr_label'] = global_iter cur_best_score_labeled['min_num_label'] = num log = log + loss_name + ':%.4f ' % (np.mean(loss_values)) if args.visual: tb_writer.add_scalar(loss_name, np.mean(loss_values), global_iter) logger.info(log)
def sample_from_specific_latent_area(self, sentiment_label): batch_size = sentiment_label.size(0) z_origin = to_tensor(torch.randn([batch_size, self.z_size])) # 使用sentiment_label mask掉每一个未使用当前情感的句子,最后再组合即得到所有采样 mask_pos = sentiment_label.gt(1).view(-1, 1).expand(batch_size, self.z_size) mask_neu = sentiment_label.eq(1).view(-1, 1).expand(batch_size, self.z_size) mask_neg = sentiment_label.lt(1).view(-1, 1).expand(batch_size, self.z_size) z_pos = z_origin.mul(mask_pos) z_neu = z_origin.mul(mask_neu) z_neg = z_origin.mul(mask_neg) return {'pos': z_pos, 'neu': z_neu, 'neg': z_neg}
def valid_process(model, valid_loader, valid_config, global_iter, unlabeled_epoch, batch_idx, tb_writer, logger, cur_best_score): valid_loader.epoch_init(valid_config.batch_size, shuffle=False) model.eval() loss_records = {} while True: batch = valid_loader.next_batch() if batch is None: # end of epoch break title, context, target, target_lens = batch title, context, target, target_lens = \ to_tensor(title), to_tensor(context), to_tensor(target), to_tensor(target_lens) valid_loss = model.valid(title, context, target, target_lens) for loss_name, loss_value in valid_loss: v = loss_records.get(loss_name, []) if loss_name == 'min_valid_loss' and loss_value < cur_best_score[ 'min_valid_loss']: cur_best_score['min_valid_loss'] = loss_value cur_best_score['min_global_itr'] = global_iter cur_best_score['min_epoch'] = unlabeled_epoch cur_best_score['min_itr'] = batch_idx v.append(loss_value) loss_records[loss_name] = v log = 'Global iter {} Validation:'.format(global_iter) for loss_name, loss_values in loss_records.items(): # import pdb # pdb.set_trace() log = log + loss_name + ':%.4f ' % (np.mean(loss_values)) if args.visual: tb_writer.add_scalar(loss_name, np.mean(loss_values), global_iter) logger.info(log)
def forward(self, context, sent_label): # import pdb # pdb.set_trace() sentiment = self.sent_embedder(sent_label) batch_size, _ = context.size() # prior: (batch, 4 * hidden) cond = torch.cat([context, sentiment], dim=1) context = self.fc(cond) mu = self.context_to_mu(context) logsigma = self.context_to_logsigma(context) std = torch.exp(0.5 * logsigma) epsilon = to_tensor(torch.randn([batch_size, self.z_size])) z = epsilon * std + mu return z, mu, logsigma
def test(self, title, title_list, batch_size): self.encoder.eval() self.decoder.eval() assert title.size(0) == 1 tem = title[0][0: self.maxlen].unsqueeze(0) pred_poems = [] title_tokens = [self.vocab[e] for e in title_list[0].tolist() if e not in [0, self.eos_id, self.go_id]] pred_poems.append(title_tokens) for sent_id in range(4): context = tem if type(context) is list: vec_context = np.zeros((batch_size, self.maxlen), dtype=np.int64) for b_id in range(batch_size): vec_context[b_id, :] = np.array(context[b_id]) context = to_tensor(vec_context) encoder_last_hidden, encoder_output = self.encoder(context) batch_size = encoder_last_hidden.size(0) hidden_size = encoder_last_hidden.size(1) // 2 # (1, 1, n_hidden) last_hidden = encoder_last_hidden.view(batch_size, 2, -1)[:, -1, :].unsqueeze(0) # (batch, len, n_hidden) encoder_output = encoder_output.view(batch_size, -1, 2, hidden_size)[:, :, -1] # decode_words 是完整的一句诗 decode_words = self.decoder.testing(init_hidden=last_hidden, encoder_output=encoder_output, maxlen=self.maxlen, go_id=self.go_id, mode="greedy") decode_words = decode_words[0].tolist() # import pdb # pdb.set_trace() if len(decode_words) > self.maxlen: tem = [decode_words[0: self.maxlen]] else: tem = [[0] * (self.maxlen - len(decode_words)) + decode_words] pred_tokens = [self.vocab[e] for e in decode_words[:-1] if e != self.eos_id and e != 0] pred_poems.append(pred_tokens) gen = '' for line in pred_poems: true_str = " ".join(line) gen = gen + true_str + '\n' return gen
def sampling(self, init_hidden, encoder_output, maxlen, go_id, eos_id, mode='greedy'): batch_size = init_hidden.size(1) decoder_input = to_tensor(torch.LongTensor(batch_size * [go_id])) # (batch, 1) decoder_hidden = init_hidden # (1, batch, hidden) pred_outs = np.zeros((batch_size, maxlen), dtype=np.int64) sample_lens = np.zeros(batch_size, dtype=np.int64) for di in range(maxlen - 1): # 从第一个字decode到</s> 共maxlen-1位 embedded = self.embedder(decoder_input).unsqueeze(1) # (batch, 1, emb_dim) # embedded = self.drop_out(embedded) mutual_info = torch.cat((decoder_hidden.squeeze(0).unsqueeze(1), embedded), dim=2) # (batch, 1, emb_dim + n_hidden) attn_weight = self.attn(mutual_info) # (batch, 1, 10) attn_weight = F.softmax(attn_weight, dim=2) # (batch, 1, 10) attn_applied = torch.bmm(attn_weight, encoder_output) # (batch, 1, n_hidden) rnn_input = torch.cat((attn_applied, embedded), dim=2) # (batch, 1, n_hidden+emb_dim) rnn_input = self.attn_combine(rnn_input) # (batch, 1, n_hidden) rnn_input = F.relu(rnn_input) # (batch, 1, n_hidden) decoder_output, decoder_hidden = self.rnn(rnn_input, decoder_hidden.contiguous()) decoder_output = self.soft(self.out(decoder_output.contiguous().squeeze(1))) if mode == "greedy": topi = decoder_output.max(1, keepdim=True)[1] # [0]是概率,[1]是idx else: topi = torch.multinomial(F.softmax(decoder_output[:, -1], dim=1), 1) ni = topi.squeeze().cpu().numpy() pred_outs[:, di] = ni decoder_input = topi.squeeze(1) # import pdb # pdb.set_trace() for i in range(batch_size): for word in pred_outs[i]: if word == eos_id: break sample_lens[i] = sample_lens[i] + 1 return pred_outs, sample_lens
def forward(self, context, sent_label): # import pdb # pdb.set_trace() sentiment = self.sent_embedder(sent_label) batch_size, _ = context.size() # prior: (batch, 4 * hidden) cond = torch.cat([context, sentiment], dim=1) context = self.fc(cond) # 接下来。不必再指定某一个高斯分布了,直接当做一个多维的高斯分布来做 # 情感的指定由最初的输入来做 # 需要想办法把情感当做condition的一部分,做embedding,才能最大限度地做到可控 # 而并不是分成若干个不同的先验分布 mus = self.context_to_mus(context) logsigmas = self.context_to_logsigmas(context) # 再对选出的高斯分布进行采样 stds = torch.exp(0.5 * logsigmas) # (batch, 5 * z_size) epsilons = to_tensor(torch.randn([batch_size, self.z_size])) z = epsilons * stds + mus # (batch, 5, z_size) return z, mus, logsigmas
def test_process(model, test_loader, test_config, logger): # 训练完一个epoch,用测试集的标题生成一次诗 # mask_types = ['negative', 'positive', 'neutral'] model.eval() output_poems = "" test_loader.epoch_init(test_config.batch_size, shuffle=False) while True: model.eval() # eval()主要影响BatchNorm, dropout等操作 batch = test_loader.next_batch_test() # test data使用专门的batch if batch is None: break title_list, headers = batch # batch size是1,一个batch写一首诗 title_tensor = to_tensor(title_list) # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果 # output_poem = 'Global iter: {}\n'.format(global_iter) output_poem = model.test(title_tensor=title_tensor, title_words=title_list, headers=headers) output_poems += output_poem logger.info(output_poems) print("Done testing")
def test(self, title_tensor, title_words, headers): self.seq_encoder.eval() self.discriminator.eval() self.decoder.eval() # tem初始化为[2,3,0,0,0,0,0,0,0] tem = [[2, 3] + [0] * (self.maxlen - 2)] pred_poems = [] title_tokens = [ self.vocab[e] for e in title_words[0].tolist() if e not in [0, self.eos_id, self.go_id] ] pred_poems.append(title_tokens) for sent_id in range(4): tem = to_tensor(np.array(tem)) context = tem # vec_context = np.zeros((batch_size, self.maxlen), dtype=np.int64) # for b_id in range(batch_size): # vec_context[b_id, :] = np.array(context[b_id]) # context = to_tensor(vec_context) title_last_hidden, _ = self.seq_encoder( title_tensor) # (batch=1, 2*hidden) if sent_id == 0: context_last_hidden, _ = self.seq_encoder( title_tensor) # (batch=1, 2*hidden) else: context_last_hidden, _ = self.seq_encoder( context) # (batch=1, 2*hidden) c = torch.cat((title_last_hidden, context_last_hidden), 1) # (batch, 4*hidden_size) # 由于一次只有一首诗,batch_size = 1,因此不必repeat prior_z = self.sample_code_prior(c) # decode_words 是完整的一句诗 decode_words = self.decoder.testing( init_hidden=self.init_decoder_hidden(torch.cat((prior_z, c), 1)), maxlen=self.maxlen, go_id=self.go_id, mode="greedy", header=headers[sent_id]) decode_words = decode_words[0].tolist() # import pdb # pdb.set_trace() if len(decode_words) > self.maxlen: tem = [decode_words[0:self.maxlen]] else: tem = [[0] * (self.maxlen - len(decode_words)) + decode_words] pred_tokens = [ self.vocab[e] for e in decode_words[:-1] if e != self.eos_id and e != 0 ] pred_poems.append(pred_tokens) gen = '' for line in pred_poems: true_str = " ".join(line) gen = gen + true_str + '\n' return gen
def main(): # config for training config = Config() print("Normal train config:") pp(config) valid_config = Config() valid_config.dropout = 0 valid_config.batch_size = 20 # config for test test_config = Config() test_config.dropout = 0 test_config.batch_size = 1 with_sentiment = config.with_sentiment ############################################################################### # Load data ############################################################################### # sentiment data path: ../ final_data / poem_with_sentiment.txt # 该path必须命令行显示输入LoadPoem,因为defaultNonehjk # 处理pretrain数据和完整诗歌数据 # api = LoadPoem(args.train_data_dir, args.test_data_dir, args.max_vocab_size) api = LoadPoem(corpus_path=args.train_data_dir, test_path=args.test_data_dir, max_vocab_cnt=config.max_vocab_cnt, with_sentiment=with_sentiment) # 交替训练,准备大数据集 poem_corpus = api.get_tokenized_poem_corpus( type=1 + int(with_sentiment)) # corpus for training and validation test_data = api.get_tokenized_test_corpus() # 测试数据 # 三个list,每个list中的每一个元素都是 [topic, last_sentence, current_sentence] train_poem, valid_poem, test_poem = poem_corpus["train"], poem_corpus[ "valid"], test_data["test"] train_loader = SWDADataLoader("Train", train_poem, config) valid_loader = SWDADataLoader("Valid", valid_poem, config) test_loader = SWDADataLoader("Test", test_poem, config) print("Finish Poem data loading, not pretraining or alignment test") if not args.forward_only: # LOG # log_start_time = str(datetime.now().strftime('%Y%m%d%H%M')) if not os.path.isdir('./output'): os.makedirs('./output') if not os.path.isdir('./output/{}'.format(args.expname)): os.makedirs('./output/{}'.format(args.expname)) if not os.path.isdir('./output/{}/{}'.format(args.expname, log_start_time)): os.makedirs('./output/{}/{}'.format(args.expname, log_start_time)) # save arguments json.dump( vars(args), open( './output/{}/{}/args.json'.format(args.expname, log_start_time), 'w')) logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG, format="%(message)s") fh = logging.FileHandler("./output/{}/{}/logs.txt".format( args.expname, log_start_time)) # add the handlers to the logger logger.addHandler(fh) logger.info(vars(args)) tb_writer = SummaryWriter("./output/{}/{}/tb_logs".format( args.expname, log_start_time)) if args.visual else None if config.reload_model: model = load_model(config.model_name) else: if args.model == "mCVAE": model = CVAE_GMP(config=config, api=api) elif args.model == 'CVAE': model = CVAE(config=config, api=api) else: model = Seq2Seq(config=config, api=api) if use_cuda: model = model.cuda() # if corpus.word2vec is not None and args.reload_from<0: # print("Loaded word2vec") # model.embedder.weight.data.copy_(torch.from_numpy(corpus.word2vec)) # model.embedder.weight.data[0].fill_(0) ############################################################################### # Start training ############################################################################### # model依然是PoemWAE_GMP保持不变,只不过,用这部分数据强制训练其中一个高斯先验分布 # pretrain = True cur_best_score = { 'min_valid_loss': 100, 'min_global_itr': 0, 'min_epoch': 0, 'min_itr': 0 } train_loader.epoch_init(config.batch_size, shuffle=True) # model = load_model(3, 3) epoch_id = 0 global_t = 0 while epoch_id < config.epochs: while True: # loop through all batches in training data # train一个batch model, finish_train, loss_records, global_t = \ train_process(global_t=global_t, model=model, train_loader=train_loader, config=config, sentiment_data=with_sentiment) if finish_train: test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) # evaluate_process(model=model, valid_loader=valid_loader, log_start_time=log_start_time, global_t=global_t, epoch=epoch_id, logger=logger, tb_writer=tb_writer, api=api) # save model after each epoch save_model(model=model, epoch=epoch_id, global_t=global_t, log_start_time=log_start_time) logger.info( 'Finish epoch %d, current min valid loss: %.4f \ correspond epoch: %d itr: %d \n\n' % (cur_best_score['min_valid_loss'], cur_best_score['min_global_itr'], cur_best_score['min_epoch'], cur_best_score['min_itr'])) # 初始化下一个unlabeled data epoch的训练 # unlabeled_epoch += 1 epoch_id += 1 train_loader.epoch_init(config.batch_size, shuffle=True) break # elif batch_idx >= start_batch + config.n_batch_every_iter: # print("Finish unlabel epoch %d batch %d to %d" % # (unlabeled_epoch, start_batch, start_batch + config.n_batch_every_iter)) # start_batch += config.n_batch_every_iter # break # 写一下log if global_t % config.log_every == 0: log = 'Epoch id %d: step: %d/%d: ' \ % (epoch_id, global_t % train_loader.num_batch, train_loader.num_batch) for loss_name, loss_value in loss_records: if loss_name == 'avg_lead_loss': continue log = log + loss_name + ':%.4f ' % loss_value if args.visual: tb_writer.add_scalar(loss_name, loss_value, global_t) logger.info(log) # valid if global_t % config.valid_every == 0: # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) valid_process( global_t=global_t, model=model, valid_loader=valid_loader, valid_config=valid_config, unlabeled_epoch= epoch_id, # 如果sample_rate_unlabeled不是1,这里要在最后加一个1 tb_writer=tb_writer, logger=logger, cur_best_score=cur_best_score) # if batch_idx % (train_loader.num_batch // 3) == 0: # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) if global_t % config.test_every == 0: test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) # forward_only 测试 else: expname = 'sentInput' time = '202101191105' model = load_model( './output/{}/{}/model_global_t_13596_epoch3.pckl'.format( expname, time)) test_loader.epoch_init(test_config.batch_size, shuffle=False) if not os.path.exists('./output/{}/{}/test/'.format(expname, time)): os.mkdir('./output/{}/{}/test/'.format(expname, time)) output_file = [ open('./output/{}/{}/test/output_0.txt'.format(expname, time), 'w'), open('./output/{}/{}/test/output_1.txt'.format(expname, time), 'w'), open('./output/{}/{}/test/output_2.txt'.format(expname, time), 'w') ] poem_count = 0 predict_results = {0: [], 1: [], 2: []} titles = {0: [], 1: [], 2: []} sentiment_result = {0: [], 1: [], 2: []} # Get all poem predictions while True: model.eval() batch = test_loader.next_batch_test() # test data使用专门的batch poem_count += 1 if poem_count % 10 == 0: print("Predicted {} poems".format(poem_count)) if batch is None: break title_list = batch # batch size是1,一个batch写一首诗 title_tensor = to_tensor(title_list) # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果 for i in range(3): sentiment_label = np.zeros(1, dtype=np.int64) sentiment_label[0] = int(i) sentiment_label = to_tensor(sentiment_label) output_poem, output_tokens = model.test( title_tensor, title_list, sentiment_label=sentiment_label) titles[i].append(output_poem.strip().split('\n')[0]) predict_results[i] += (np.array(output_tokens)[:, :7].tolist()) # Predict sentiment use the sort net from collections import defaultdict neg = defaultdict(int) neu = defaultdict(int) pos = defaultdict(int) total = defaultdict(int) for i in range(3): _, neg[i], neu[i], pos[i] = test_sentiment(predict_results[i]) total[i] = neg[i] + neu[i] + pos[i] for i in range(3): print("%d%%\t%d%%\t%d%%" % (neg * 100 / total, neu * 100 / total, pos * 100 / total)) for i in range(3): write_predict_result_to_file(titles[i], predict_results[i], sentiment_result[i], output_file[i]) output_file[i].close() print("Done testing")
def main(): # config for training config = Config() print("Normal train config:") pp(config) valid_config = Config() valid_config.dropout = 0 valid_config.batch_size = 20 # config for test test_config = Config() test_config.dropout = 0 test_config.batch_size = 1 with_sentiment = config.with_sentiment pretrain = False ############################################################################### # Logs ############################################################################### log_start_time = str(datetime.now().strftime('%Y%m%d%H%M')) if not os.path.isdir('./output'): os.makedirs('./output') if not os.path.isdir('./output/{}'.format(args.expname)): os.makedirs('./output/{}'.format(args.expname)) if not os.path.isdir('./output/{}/{}'.format(args.expname, log_start_time)): os.makedirs('./output/{}/{}'.format(args.expname, log_start_time)) # save arguments json.dump( vars(args), open('./output/{}/{}/args.json'.format(args.expname, log_start_time), 'w')) logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG, format="%(message)s") fh = logging.FileHandler("./output/{}/{}/logs.txt".format( args.expname, log_start_time)) # add the handlers to the logger logger.addHandler(fh) logger.info(vars(args)) tb_writer = SummaryWriter("./output/{}/{}/tb_logs".format( args.expname, log_start_time)) if args.visual else None ############################################################################### # Model ############################################################################### # vocab and rev_vocab with open(args.vocab_path) as vocab_file: vocab = vocab_file.read().strip().split('\n') rev_vocab = {vocab[idx]: idx for idx in range(len(vocab))} if not pretrain: pass # assert config.reload_model # model = load_model(config.model_name) else: if args.model == "multiVAE": model = multiVAE(config=config, vocab=vocab, rev_vocab=rev_vocab) else: model = CVAE(config=config, vocab=vocab, rev_vocab=rev_vocab) if use_cuda: model = model.cuda() ############################################################################### # Load data ############################################################################### if pretrain: from collections import defaultdict api = LoadPretrainPoem(corpus_path=args.pretrain_data_dir, vocab_path="data/vocab.txt") train_corpus, valid_corpus = defaultdict(list), defaultdict(list) divide = 50000 train_corpus['pos'], valid_corpus['pos'] = api.data[ 'pos'][:divide], api.data['pos'][divide:] train_corpus['neu'], valid_corpus['neu'] = api.data[ 'neu'][:divide], api.data['neu'][divide:] train_corpus['neg'], valid_corpus['neg'] = api.data[ 'neg'][:divide], api.data['neg'][divide:] token_corpus = defaultdict(dict) token_corpus['pos'], token_corpus['neu'], token_corpus['neg'] = \ api.get_tokenized_poem_corpus(train_corpus['pos'], valid_corpus['pos']), \ api.get_tokenized_poem_corpus(train_corpus['neu'], valid_corpus['neu']), \ api.get_tokenized_poem_corpus(train_corpus['neg'], valid_corpus['neg']), # train_loader_dict = {'pos': } train_loader = { 'pos': SWDADataLoader("Train", token_corpus['pos']['train'], config), 'neu': SWDADataLoader("Train", token_corpus['neu']['train'], config), 'neg': SWDADataLoader("Train", token_corpus['neg']['train'], config) } valid_loader = { 'pos': SWDADataLoader("Train", token_corpus['pos']['valid'], config), 'neu': SWDADataLoader("Train", token_corpus['neu']['valid'], config), 'neg': SWDADataLoader("Train", token_corpus['neg']['valid'], config) } ############################################################################### # Pretrain three VAEs ############################################################################### epoch_id = 0 global_t = 0 init_train_loaders(train_loader, config) while epoch_id < config.epochs: while True: # loop through all batches in training data # train一个batch model, finish_train, loss_records, global_t = \ pre_train_process(global_t=global_t, model=model, train_loader=train_loader) if finish_train: if epoch_id > 5: save_model(model=model, epoch=epoch_id, global_t=global_t, log_start_time=log_start_time) epoch_id += 1 init_train_loaders(train_loader, config) break # 写一下log if global_t % config.log_every == 0: pre_log_process(epoch_id=epoch_id, global_t=global_t, train_loader=train_loader, loss_records=loss_records, logger=logger, tb_writer=tb_writer) # valid if global_t % config.valid_every == 0: # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) pre_valid_process(global_t=global_t, model=model, valid_loader=valid_loader, valid_config=valid_config, tb_writer=tb_writer, logger=logger) if global_t % config.test_every == 0: pre_test_process(model=model, logger=logger) ############################################################################### # Train the big model ############################################################################### api = LoadPoem(corpus_path=args.train_data_dir, vocab_path="data/vocab.txt", test_path=args.test_data_dir, max_vocab_cnt=config.max_vocab_cnt, with_sentiment=with_sentiment) from collections import defaultdict token_corpus = defaultdict(dict) token_corpus['pos'], token_corpus['neu'], token_corpus['neg'] = \ api.get_tokenized_poem_corpus(api.train_corpus['pos'], api.valid_corpus['pos']), \ api.get_tokenized_poem_corpus(api.train_corpus['neu'], api.valid_corpus['neu']), \ api.get_tokenized_poem_corpus(api.train_corpus['neg'], api.valid_corpus['neg']), train_loader = { 'pos': SWDADataLoader("Train", token_corpus['pos']['train'], config), 'neu': SWDADataLoader("Train", token_corpus['neu']['train'], config), 'neg': SWDADataLoader("Train", token_corpus['neg']['train'], config) } valid_loader = { 'pos': SWDADataLoader("Train", token_corpus['pos']['valid'], config), 'neu': SWDADataLoader("Train", token_corpus['neu']['valid'], config), 'neg': SWDADataLoader("Train", token_corpus['neg']['valid'], config) } test_poem = api.get_tokenized_test_corpus()['test'] # 测试数据 test_loader = SWDADataLoader("Test", test_poem, config) print("Finish Poem data loading, not pretraining or alignment test") if not args.forward_only: # model依然是PoemWAE_GMP保持不变,只不过,用这部分数据强制训练其中一个高斯先验分布 # pretrain = True cur_best_score = { 'min_valid_loss': 100, 'min_global_itr': 0, 'min_epoch': 0, 'min_itr': 0 } # model = load_model(3, 3) epoch_id = 0 global_t = 0 init_train_loaders(train_loader, config) while epoch_id < config.epochs: while True: # loop through all batches in training data # train一个batch model, finish_train, loss_records, global_t = \ train_process(global_t=global_t, model=model, train_loader=train_loader) if finish_train: if epoch_id > 5: save_model(model=model, epoch=epoch_id, global_t=global_t, log_start_time=log_start_time) epoch_id += 1 init_train_loaders(train_loader, config) break # 写一下log if global_t % config.log_every == 0: pre_log_process(epoch_id=epoch_id, global_t=global_t, train_loader=train_loader, loss_records=loss_records, logger=logger, tb_writer=tb_writer) # valid if global_t % config.valid_every == 0: valid_process(global_t=global_t, model=model, valid_loader=valid_loader, valid_config=valid_config, tb_writer=tb_writer, logger=logger) # if batch_idx % (train_loader.num_batch // 3) == 0: # test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) if global_t % config.test_every == 0: test_process(model=model, test_loader=test_loader, test_config=test_config, logger=logger) # forward_only 测试 else: expname = 'trainVAE' time = '202101231631' model = load_model( './output/{}/{}/model_global_t_26250_epoch9.pckl'.format( expname, time)) test_loader.epoch_init(test_config.batch_size, shuffle=False) if not os.path.exists('./output/{}/{}/test/'.format(expname, time)): os.mkdir('./output/{}/{}/test/'.format(expname, time)) output_file = [ open('./output/{}/{}/test/output_0.txt'.format(expname, time), 'w'), open('./output/{}/{}/test/output_1.txt'.format(expname, time), 'w'), open('./output/{}/{}/test/output_2.txt'.format(expname, time), 'w') ] poem_count = 0 predict_results = {0: [], 1: [], 2: []} titles = {0: [], 1: [], 2: []} sentiment_result = {0: [], 1: [], 2: []} # sent_dict = {0: ['0', '1', '1', '0'], 1: ['2', '1', '2', '2'], 2: ['1', '0', '1', '2']} sent_dict = { 0: ['0', '0', '0', '0'], 1: ['1', '1', '1', '1'], 2: ['2', '2', '2', '2'] } # Get all poem predictions while True: model.eval() batch = test_loader.next_batch_test() # test data使用专门的batch poem_count += 1 if poem_count % 10 == 0: print("Predicted {} poems".format(poem_count)) if batch is None: break title_list = batch # batch size是1,一个batch写一首诗 title_tensor = to_tensor(title_list) # test函数将当前batch对应的这首诗decode出来,记住每次decode的输入context是上一次的结果 for i in range(3): sent_labels = sent_dict[i] for _ in range(4): sent_labels.append(str(i)) output_poem, output_tokens = model.test( title_tensor, title_list, sent_labels=sent_labels) titles[i].append(output_poem.strip().split('\n')[0]) predict_results[i] += (np.array(output_tokens)[:, :7].tolist()) # Predict sentiment use the sort net from collections import defaultdict neg = defaultdict(int) neu = defaultdict(int) pos = defaultdict(int) total = defaultdict(int) for i in range(3): cur_sent_result, neg[i], neu[i], pos[i] = test_sentiment( predict_results[i]) sentiment_result[i] = cur_sent_result total[i] = neg[i] + neu[i] + pos[i] for i in range(3): print("%d%%\t%d%%\t%d%%" % (neg[i] * 100 / total[i], neu[i] * 100 / total[i], pos[i] * 100 / total[i])) for i in range(3): write_predict_result_to_file(titles[i], predict_results[i], sentiment_result[i], output_file[i]) output_file[i].close() print("Done testing")
def forward(self, context, sentiment_mask=None, mask_type=None): batch_size, _ = context.size() context = self.fc(context) mus = self.context_to_mus(context) logsigmas = self.context_to_logsigmas(context) stds = torch.exp(0.5 * logsigmas) # (batch, 5 * z_size) # epsilons (batch, 5 * z_size) epsilons = to_tensor( torch.randn([batch_size, self.n_components * self.z_size])) zi = (epsilons * stds + mus).view(batch_size, self.n_components, self.z_size) # (batch, 5, z_size) pi = None pi_final = None if sentiment_mask is None: # import pdb # pdb.set_trace() if mask_type is not None: # pi = torch.zeros(batch_size, 5) pi = torch.zeros(batch_size, 3) # if mask_type == "negative": # pi[:, 0:2] = torch.zeros(batch_size, 2) # # mask正向 # elif mask_type == "positive": # pi[:, 3:5] = torch.zeros(batch_size, 2) # # 强行去掉中性情感 # # mask中性 # elif mask_type == "neutral": # pi[:, 2] = torch.zeros(batch_size) if mask_type == "0": pi[:, 0] = 1 elif mask_type == "1": pi[:, 1] = 1 elif mask_type == "2": pi[:, 2] = 1 # elif mask_type == "3": # pi[:, 3] = 1 # elif mask_type == "4": # pi[:, 4] = 1 else: print("Mask type invalid") pi_final = pi.cuda() else: pi = self.pi_net(context) # (batch, 5) pi_hard = F.gumbel_softmax(pi, tau=self.gumbel_temp, hard=True, eps=1e-10) pi_soft = F.gumbel_softmax(pi, tau=self.gumbel_temp, hard=False, eps=1e-10) pi_final = pi_hard - pi_soft.detach() + pi_soft pi_final = pi_final.unsqueeze(1) # (batch, 1, 5) z = torch.bmm(pi_final, zi).squeeze( 1) # (batch, 1, z_size) --> (batch, z_size) mu = torch.bmm(pi_final, mus.view(batch_size, self.n_components, self.z_size)) # (batch, z_size) logsigma = torch.bmm( pi_final, logsigmas.view(batch_size, self.n_components, self.z_size)) # (batch, z_size) else: # mu = self.candidates_mu[force_choice](context) # (batch, z_size) # logsigma = self.candidates_sigma[force_choice](context) # (batch, z_size) # std = torch.exp(0.5 * logsigma) # (batch, z_size) # epsilon = to_tensor(torch.randn([batch_size, self.z_size])) # z = epsilon * std + mu # (batch, z_size) # import pdb # pdb.set_trace() sentiment_mask = sentiment_mask.unsqueeze(1) z = torch.bmm(sentiment_mask.float(), zi).squeeze(1) mu = torch.bmm( sentiment_mask.float(), mus.view(batch_size, self.n_components, self.z_size)) logsigma = torch.bmm( sentiment_mask.float(), logsigmas.view(batch_size, self.n_components, self.z_size)) # (batch, z_size) return z, mu, logsigma, pi, pi_final