def evaluate_switch(model, linear_layer, data, my_vocab): all_paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in data ] all_paragraph_lengths = [len(this_sample) for this_sample in data] sentence_cands = [] for i in range(2000): sentence_cands += all_paragraphs[i][0] total_corrects = 0 total_samples = 0 for current_batch in range(int((len(data) - 1) / batch_size) + 1): paragraphs = all_paragraphs[current_batch * batch_size:(current_batch + 1) * batch_size] paragraph_lengths = all_paragraph_lengths[current_batch * batch_size:(current_batch + 1) * batch_size] masked_paragraphs, masks = switch_sentence(paragraphs, sentence_cands) embeds = model(masked_paragraphs) #print(len(pos_score), len(neg_score)) this_batch_size, doc_size, embed_dim = embeds.size() embeds = embeds.view(-1, embed_dim) sigmoid = nn.Sigmoid() scores = sigmoid(linear_layer(embeds).view(this_batch_size, doc_size)) labels = torch.cat(masks).long().to(device) scores = filter_output(scores.view(-1), paragraph_lengths) preds = scores.ge(0.5).long().to(device) corrects = torch.sum(labels == preds) total_corrects += corrects total_samples += len(preds) return float(total_corrects) / total_samples
def train_replace(model, classification_layer, paragraphs, paragraph_lengths, sentence_cands): loss_function = nn.MSELoss() masked_paragraphs, masks = replace_sentence(paragraphs, sentence_cands) embeds = model(masked_paragraphs) #print(len(pos_score), len(neg_score)) batch_size, doc_size, embed_dim = embeds.size() embeds = embeds.view(-1, embed_dim) sigmoid = nn.Sigmoid() scores = sigmoid(classification_layer(embeds).view(batch_size, doc_size)) #labels = masks.view(batch_size, doc_size).long().to(device) #labels = filter_output(labels.view(-1), paragraph_lengths) labels = torch.cat(masks).float().to(device) scores = filter_output(scores.view(-1), paragraph_lengths) loss = loss_function(scores, labels) return loss
def train(train_data, dev_data, my_vocab, train_target, dev_target, dev_target_txt): #model = None embed_model = MyModel(my_vocab) #model = nn.DataParallel(model) embed_model = embed_model if summarizer_embed_model_path is not None: embed_model = torch.load(summarizer_embed_model_path) #load_embed_model = torch.load(summarizer_embed_model_path) #sent_enc_stat = load_embed_model.sentence_encoder.state_dict() #embed_model.sentence_encoder.load_state_dict(sent_enc_stat) #criteria = torch.nn.CrossEntropyLoss() model = SummarizeModel(embed_model, hidden_dim * 2) model = model.to(device) criteria = torch.nn.MSELoss() model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) best_acc = -1 writer = SummaryWriter(exp_name) #print(len(train_data)) #all_paragraphs = [build_paragraph(this_sample, my_vocab) # for this_sample in train_data] #all_paragraph_lengths = [len(this_sample) for this_sample in train_data] train_idx = list(range(len(train_data))) for epoch_i in range(num_epoch): total_loss = 0 total_batch = 0 train_data = [train_data[i] for i in train_idx] #all_paragraphs = [all_paragraphs[i] for i in train_idx] #all_paragraph_lengths = [all_paragraph_lengths[i] for i in train_idx] train_target = [train_target[i] for i in train_idx] random.shuffle(train_idx) for current_batch in range( int((len(train_data) - 1) / batch_size) + 1): if current_batch % 100 == 0: print(current_batch) model_optim.zero_grad() batch_data = train_data[current_batch * batch_size:(current_batch + 1) * batch_size] targets = train_target[current_batch * batch_size:(current_batch + 1) * batch_size] paragraphs = [ build_paragraph(this_sample, my_vocab) for this_sample in batch_data ] paragraph_lengths = [ len(this_sample) for this_sample in batch_data ] #paragraphs = all_paragraphs[current_batch*batch_size: # (current_batch+1)*batch_size] #paragraph_lengths = all_paragraph_lengths[current_batch*batch_size: # (current_batch+1)*batch_size] scores = model(paragraphs) num_doc, doc_size = scores.size() labels = torch.zeros(num_doc, doc_size).to(device) for i, this_target in enumerate(targets): #print(labels[i], this_target) #print(this_target) #print(i) #print(labels.size()) #print(labels) #print(labels[i]) if len(this_target) > 0: labels[i][this_target] = 1 labels = filter_output(labels.view(-1), paragraph_lengths) scores = filter_output(scores.view(-1), paragraph_lengths) loss = criteria(scores, labels) #print(loss) total_loss += loss.item() total_batch += 1 loss.backward() model_optim.step() acc, recall, scores = evaluate_summarizer(model, dev_data, dev_target, my_vocab, dev_target_txt) with open(score_path + '_' + str(epoch_i) + '_score', 'w') as f_out: json.dump(scores, f_out) scores = [ scores['rouge_1_f_score'], scores['rouge_2_f_score'], scores['rouge_l_f_score'] ] torch.save(model, summarizer_model_path + '_' + str(epoch_i) + '.pt') writer.add_scalar('accuracy', acc, epoch_i) writer.add_scalar('recall', recall, epoch_i) writer.add_scalar('avg_loss', total_loss / total_batch, epoch_i) writer.add_scalar('rouge_1', scores[0], epoch_i) writer.add_scalar('rouge_2', scores[1], epoch_i) writer.add_scalar('rouge_l', scores[2], epoch_i)