def save_submission_file(predictions, path): out = list() for pred in predictions: out.append({ "id": pred["id"], "predicted_label": pred["predicted_label"], "predicted_evidence": pred["predicted_evidence"] }) save_jsonl(out, path)
def save_predictions(instances, preds_list, path, scores_for_all_candidates=True): ''' 保存预测的结果 ''' store = list() assert len(instances) == len(preds_list) for instance, preds in zip(instances, preds_list): cid = instance['id'] claim = instance['claim'] pred_sents = instance['evidence'] # 取每个样例预测的所有标签及其得分 if scores_for_all_candidates: pred_labels_list = [[pred.text for pred in preds_instance] for preds_instance in preds] scores = [[float(pred.score) for pred in preds_instance] for preds_instance in preds] # 取每个样例的第一个标签及得分 else: pred_labels = [pred[0].text for pred in preds] scores = [float(pred[0].score) for pred in preds] # 保存为字典形式 dic = { 'id': cid, 'scores': scores, 'claim': claim, 'predicted_sentences': pred_sents } if 'label' in instance: dic['label'] = instance['label'] if scores_for_all_candidates: dic['predicted_labels'] = [[ convert_label(pred_label, inverse=True) for pred_label in pred_labels ] for pred_labels in pred_labels_list], else: dic['predicted_labels'] = [ convert_label(pred_label, inverse=True) for pred_label in pred_labels ] # scores of ir part if 'scored_sentences' in instance: dic['ev_scores'] = instance['scored_sentences'] store.append(dic) save_jsonl(store, path)
def save_predictions_preprocessed(instances, all_settings, preds_list, path, n_sentences=5, scores_for_all_candidates=True): store = {} print('prepare dictionary...') for instance in instances: id = instance["id"] claim = instance["claim"] pred_sents = instance["evidence"] if scores_for_all_candidates: scores = [[float(0)] * 3] * len(pred_sents) pred_labels_list = [['NOT ENOUGH INFO'] * 3] * len(pred_sents) else: scores = [float(0)] * len(pred_sents) pred_labels_list = ["NOT ENOUGH INFO" for pred in pred_sents] dic = { "id": id, "scores": scores, "claim": claim, "predicted_sentences": pred_sents, "predicted_labels": pred_labels_list } if "label" in instance: dic["label"] = instance["label"] if "scored_sentences" in instance: dic["ev_scores"] = instance["scored_sentences"] store[id] = dic assert len(all_settings) == len(preds_list) print('index entries...') for (setting, _), pred in zip(all_settings, preds_list): q_id = int(setting.id.split('-')[0]) s_id = int(setting.id.split('-')[1]) if s_id >= n_sentences: continue print("====", s_id, len(store[q_id]["scores"]), len(pred)) store[q_id]["scores"][s_id] = [float(p.score) for p in pred] if scores_for_all_candidates: store[q_id]["predicted_labels"][s_id] = [ convert_label(p.text, inverse=True) for p in pred ] else: store[q_id]["predicted_labels"][s_id] = convert_label( pred[0].label) store = [v for k, v in store.items()] save_jsonl(store, path)
def save_predictions(instances, preds_list, path, scores_for_all_candidates=True): store = list() assert len(instances) == len(preds_list) for instance, preds in zip(instances, preds_list): id = instance["id"] claim = instance["claim"] pred_sents = instance["evidence"] # refer to read_ir_result if scores_for_all_candidates: pred_labels_list = [[pred.text for pred in preds_instance] for preds_instance in preds] scores = [[float(pred.score) for pred in preds_instance] for preds_instance in preds] else: pred_labels = [pred[0].text for pred in preds] scores = [float(pred[0].score) for pred in preds] dic = { "id": id, "scores": scores, "claim": claim, "predicted_sentences": pred_sents } if "label" in instance: dic["label"] = instance["label"] if scores_for_all_candidates: dic["predicted_labels"] = [[ convert_label(pred_label, inverse=True) for pred_label in pred_labels ] for pred_labels in pred_labels_list] else: dic["predicted_labels"] = [ convert_label(pred_label, inverse=True) for pred_label in pred_labels ] # scores of ir part if "scored_sentences" in instance: dic["ev_scores"] = instance["scored_sentences"] store.append(dic) save_jsonl(store, path)
def run_aggregator(config): train_set = Predicted_Labels_Dataset(config['train_file'], config['n_sentences'], sampling=config['sampling'], use_ev_scores=config['evi_scores']) dev_set = Predicted_Labels_Dataset(config['dev_file'], config['n_sentences'], use_ev_scores=config['evi_scores']) train_dataloader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=0) dev_dataloader = DataLoader(dev_set, batch_size=64, shuffle=False, num_workers=0) model = Net(layers=[int(width) for width in config['layers']]) class_weights = [1.0, 1.0, 1.0] label2freq = Counter((instance['label'] for instance in train_set.instances)) total = sum(label2freq.values()) for label in label2freq: class_weights[label2idx[label]] = 1.0 / (label2freq[label]) * total criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights)) optimizer = optim.Adam(model.parameters()) dev_results = [] for epoch in range(config['epochs']): running_loss = 0.0 for i, (labels, inputs) in enumerate(train_dataloader): optimizer.zero_grad() outputs = model(inputs.float()) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 1000 == 999: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000)) running_loss = 0.0 dev_results.append(simple_test(dev_dataloader, model)) print('Finished Training.') performance = max(dev_results) print('dev set:', performance) train_result = predict(train_dataloader, model) dev_results = predict(dev_dataloader, model) save_jsonl(train_result, config['train_predicted_labels']) save_jsonl(dev_results, config['dev_predicted_labels'])
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("src") parser.add_argument("tar") parser.add_argument("--use_ir_pred", action="store_true") parser.add_argument("--prependlinum", action="store_true") parser.add_argument("--prependtitle", action="store_true") parser.add_argument("--convert_test", action="store_true") # parser.add_argument("--testset", help="turn on when you convert test data", action="store_true") args = parser.parse_args() print(args) if args.convert_test: test_in = '''[{"id": 15812, "verifiable": "VERIFIABLE", "label": "REFUTES", "claim": "Peggy Sue Got Married is a Egyptian film released in 1986.", "evidence": [[[31205, 37902, "Peggy_Sue_Got_Married", 0], [31205, 37902, "Francis_Ford_Coppola", 0]], [[31211, 37908, "Peggy_Sue_Got_Married", 0]]], "predicted_pages": ["Peggy_Sue_Got_Married_-LRB-musical-RRB-", "Peggy_Sue_Got_Married_-LRB-song-RRB-", "Peggy_Sue_Got_Married", "Peggy_Sue", "Peggy_Sue_-LRB-band-RRB-"], "predicted_sentences": [["Peggy_Sue_Got_Married", 0], ["Peggy_Sue_Got_Married_-LRB-musical-RRB-", 0], ["Peggy_Sue_Got_Married_-LRB-song-RRB-", 0], ["Peggy_Sue", 0], ["Peggy_Sue_Got_Married_-LRB-musical-RRB-", 2]]}, {"id": 229289, "verifiable": "NOT VERIFIABLE", "label": "NOT ENOUGH INFO", "claim": "Neal Schon was named in 1954.", "evidence": [[[273626, null, null, null]]], "predicted_pages": ["Neal_Schon", "Neal", "Named", "Was_-LRB-Not_Was-RRB-", "Was"], "predicted_sentences": [["Neal_Schon", 0], ["Neal_Schon", 6], ["Neal_Schon", 5], ["Neal_Schon", 1], ["Neal_Schon", 2]]}]''' print("input:\n", test_in) fever_format = json.loads(test_in) snli_format_instances = convert(fever_format, prependlinum=args.prependlinum, prependtitle=args.prependtitle, use_ir_prediction=args.use_ir_pred) print("\noutput:\n", json.dumps(snli_format_instances, indent=4)) else: assert not os.path.exists(args.tar), "file {} alreadly exists".format( args.tar) keyerr_count = 0 instances = read_jsonl(args.src) snli_format_instances = convert(instances, prependlinum=args.prependlinum, prependtitle=args.prependtitle, use_ir_prediction=args.use_ir_pred) save_jsonl(snli_format_instances, args.tar)
# forward + backward + optimize outputs = net(inputs.float()) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 1000 == 999: # print every 1000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000)) running_loss = 0.0 # monitor dev loss throughout training dev_results_throughout_training.append(simple_test(dev_dataloader)) print('Finished Training') print("dev set:") performance = simple_test(dev_dataloader) #hyperparameter2performance[n_sentences] = performance hyperparameter2performance[n_sentences] = max( dev_results_throughout_training) for k, v in sorted(hyperparameter2performance.items()): print(v) dev_results = predict(dev_dataloader) test_results = predict(test_dataloader) save_jsonl(dev_results, args.predicted_labels) save_jsonl(test_results, args.test_predicted_labels)
pattern = re.compile('\w+|[^\w\s]') if args.convert_test: test_in = '''[{"id": 15812, "verifiable": "VERIFIABLE", "label": "REFUTES", "claim": "Peggy Sue Got Married is a Egyptian film released in 1986.", "evidence": [[[31205, 37902, "Peggy_Sue_Got_Married", 0], [31205, 37902, "Francis_Ford_Coppola", 0]], [[31211, 37908, "Peggy_Sue_Got_Married", 0]]], "predicted_pages": ["Peggy_Sue_Got_Married_-LRB-musical-RRB-", "Peggy_Sue_Got_Married_-LRB-song-RRB-", "Peggy_Sue_Got_Married", "Peggy_Sue", "Peggy_Sue_-LRB-band-RRB-"], "predicted_sentences": [["Peggy_Sue_Got_Married", 0], ["Peggy_Sue_Got_Married_-LRB-musical-RRB-", 0], ["Peggy_Sue_Got_Married_-LRB-song-RRB-", 0], ["Peggy_Sue", 0], ["Peggy_Sue_Got_Married_-LRB-musical-RRB-", 2]]}, {"id": 229289, "verifiable": "NOT VERIFIABLE", "label": "NOT ENOUGH INFO", "claim": "Neal Schon was named in 1954.", "evidence": [[[273626, null, null, null]]], "predicted_pages": ["Neal_Schon", "Neal", "Named", "Was_-LRB-Not_Was-RRB-", "Was"], "predicted_sentences": [["Neal_Schon", 0], ["Neal_Schon", 6], ["Neal_Schon", 5], ["Neal_Schon", 1], ["Neal_Schon", 2]]}, {"id": 15711, "verifiable": "VERIFIABLE", "label": "SUPPORTS", "claim": "Liverpool F.C. was valued at $1.55 billion at one point.", "evidence": [[[31112, 37788, "Liverpool_F.C.", 11]]], "predicted_pages": ["Liverpool_F.C.", "Liverpool_F.C._-LRB-Montevideo-RRB-", "Liverpool_F.C._-LRB-Superleague_Formula_team-RRB-", "Liverpool_F.C._-LRB-disambiguation-RRB-", "Liverpool"], "predicted_sentences": [["Liverpool_F.C.", 11], ["Liverpool", 0], ["Liverpool", 9], ["Liverpool", 10], ["Liverpool", 8]]}]''' print("input:\n", test_in) fever_format = json.loads(test_in) snli_format_instances = convert( fever_format, prependlinum=args.prependlinum, prependtitle=args.prependtitle, use_ir_prediction=args.use_ir_pred, n_sentences=args.n_sentences, depparse_batch_size=args.depparse_batch_size) print("\noutput:\n", json.dumps(snli_format_instances, indent=4)) else: if os.path.exists(args.tar): print("WARNING: file {} alreadly exists".format(args.tar)) keyerr_count = 0 instances = read_jsonl(args.src) snli_format_instances = convert( instances, prependlinum=args.prependlinum, prependtitle=args.prependtitle, use_ir_prediction=args.use_ir_pred, n_sentences=args.n_sentences, depparse_batch_size=args.depparse_batch_size) save_jsonl(snli_format_instances, args.tar, skip_if_exists=False)
results = list() preds_length = list() all_settings = list() instances = read_ir_result(args.in_file, prependlinum=args.prependlinum, concatev=args.concatev) for instance in instances: evidence_list = instance["evidence"] claim = instance["claim"] settings = [QASetting(question=claim, support=[evidence]) for evidence in evidence_list] all_settings.append(settings) # pointer loops from 0 to less than (or equal to) len(all_settings) with step args.batch_size preds_list = list() for pointer in tqdm(range(0, len(all_settings), args.batch_size)): batch_settings = all_settings[pointer: pointer + args.batch_size] n_settings = [len(settings_) for settings_ in batch_settings] preds_list.extend(reshape(dam_reader(flatten(batch_settings)), n_settings)) results = list() for instance, preds in zip(instances, preds_list): prediction, scores, prediction_list = aggregate_preds(preds, args.only_use_topev) results.append({ "actual": instance["label"], "predicted": convert_label(prediction, inverse=True), "scores": scores, "prediction_list": [convert_label(pred, inverse=True) for pred in prediction_list] }) save_jsonl(results, abs_path(args.out_file))