list(train["passage"].values), 9) logging.info("Initlizing the Tokenizer") tokenizer = BertTokenizer.from_pretrained('bert-base-cased') special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) #Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader(train_df=train, val_df=valid, test_df=valid, tokenizer=tokenizer, negative_sampler_train=ns_train, negative_sampler_val=ns_val, task_type='classification', train_batch_size=24, val_batch_size=24, max_seq_len=512, sample_data=-1, cache_path="../data") logging.info("Initlizing the DataLoader") train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders() #Use BERT to rank responses model = BertForSequenceClassification.from_pretrained('bert-base-cased') # we added [UTTERANCE_SEP] and [TURN_SEP] to the vocabulary so we need to resize the token embeddings model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer(model=model,
def run_experiment(args): args.run_id = str(ex.current_run._id) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) #Load datasets ## Conversation Response Ranking if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: add_turn_separator = (args.task != "ubuntu_dstc8") # Ubuntu data has several utterances from same user in the context. train = preprocess_crr.read_crr_tsv_as_df(args.data_folder+args.task+"/train.tsv", args.sample_data, add_turn_separator) valid = preprocess_crr.read_crr_tsv_as_df(args.data_folder+args.task+"/valid.tsv", args.sample_data, add_turn_separator) special_tokens_dict = {'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) ## Similar Question Retrieval and Passage Retrieval elif args.task in ["qqp", "linkso", "trec2020pr"]: if args.sample_data == -1: args.sample_data=None train = pd.read_csv(args.data_folder+args.task+"/train.tsv", sep="\t", nrows=args.sample_data) valid = pd.read_csv(args.data_folder+args.task+"/valid.tsv", sep="\t", nrows=args.sample_data) elif args.task=="scisumm": train, valid = preprocess_scisumm.transform_to_dfs("../data/Training-Set-2019/Task1/From-Training-Set-2018/") #Choose the negative candidate sampler document_col = train.columns[1] if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler(list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini(list(train[document_col].values), args.num_ns_train, args.data_folder+"/"+args.task+"/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler(list(train[document_col].values), args.num_ns_train, args.data_folder+"/"+args.task+"/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder+"/"+args.task+"/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler(list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder+"/"+args.task+"/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) #Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader(train, valid, valid, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + args.task) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders() #Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained(args.transformer_model) model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer(model, train_loader, val_loader, test_loader, args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_instances, args.num_epochs, args.lr, args.sacred_ex) #Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() #Predict for test logging.info("Predicting") preds, labels = trainer.test() res = results_analyses_tools.evaluate_and_aggregate(preds, labels, ['R_10@1','R_10@1', 'R_10@2', 'R_10@5', 'R_2@1']) for metric, v in res.items(): logging.info("Test {} : {:4f}".format(metric, v)) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=["prediction_"+str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir+"/"+args.run_id+"/predictions.csv", index=False) labels_df = pd.DataFrame(labels, columns=["label_"+str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir+"/"+args.run_id+"/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save(model.state_dict(), args.output_dir+"/"+args.run_id+"/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") preds, uncertainties, labels, foward_passes_preds = trainer.test_with_dropout(args.num_foward_prediction_passes) res = results_analyses_tools.evaluate_and_aggregate(preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info("Test (w. dropout and {} foward passes) {} : {:4f}".format(args.num_foward_prediction_passes, metric, v)) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=["prediction_"+str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir+"/"+args.run_id+"/predictions_with_dropout.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=["prediction_"+str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir+"/"+args.run_id+"/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame(labels, columns=["label_"+str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir+"/"+args.run_id+"/labels.csv", index=False) uncertainties_df = pd.DataFrame(uncertainties, columns=["uncertainty_"+str(i) for i in range(max_preds_column)]) uncertainties_df.to_csv(args.output_dir+"/"+args.run_id+"/uncertainties.csv", index=False) return trainer.best_ndcg
def run_experiment(args): args.run_id = str(ex.current_run._id) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) # Conversation Response Ranking datasets needs special tokens if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) #Load datasets train = pd.read_csv( args.data_folder + args.task + "/train.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) valid = pd.read_csv( args.data_folder + args.task + "/valid.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) #Choose the negative candidate sampler document_col = train.columns[1] if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler( list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini( list(train[document_col].values), args.num_ns_train, args.data_folder + args.task + "/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler( list(train[document_col].values), args.num_ns_train, args.data_folder + args.task + "/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) #Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader( train, valid, valid, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + args.task) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( ) #Instantiate transformer model to be used model = pointwise_bert.BertForPointwiseLearning.from_pretrained( args.transformer_model, loss_function=args.loss_function, smoothing=args.smoothing) model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model, train_loader, val_loader, test_loader, args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_batches, args.num_epochs, args.lr, args.sacred_ex, args.validate_every_steps, validation_metric='R_10@1', num_training_instances=args.num_training_instances) #Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() #Predict for test logging.info("Predicting for the validation set.") preds, labels, softmax_logits = trainer.test() res = results_analyses_tools.evaluate_and_aggregate( preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info("Test {} : {:3f}".format(metric, v)) wandb.log({'step': 0, "dev_" + metric: v}) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_softmax.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save(model.state_dict(), args.output_dir + "/" + args.run_id + "/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with MC dropout for the validation set.") preds, labels, softmax_logits, foward_passes_preds, uncertainties = trainer.test_with_dropout( args.num_foward_prediction_passes) res = results_analyses_tools.evaluate_and_aggregate( preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info( "Test (w. dropout and {} foward passes) {} : {:3f}".format( args.num_foward_prediction_passes, metric, v)) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_with_dropout.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_with_dropout_softmax.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + args.run_id + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=["uncertainty_" + str(i) for i in range(max_preds_column)]) uncertainties_df.to_csv(args.output_dir + "/" + args.run_id + "/uncertainties.csv", index=False) return trainer.best_eval_metric
def main(): logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler()]) task = 'qqp' data_folder = "../../data/" logging.info("Starting downloader for task {}".format(task)) dataDownloader = downloader.DataDownloader(task, data_folder) dataDownloader.download_and_preprocess() train = pd.read_csv("{}/{}/train.tsv".format(data_folder, task), sep="\t") valid = pd.read_csv("{}/{}/valid.tsv".format(data_folder, task), sep="\t") # Random negative samplers ns_train = negative_sampling.RandomNegativeSampler( list(train["question1"].values), 1) ns_val = negative_sampling.RandomNegativeSampler(list(valid["question1"].values) + \ list(train["question1"].values), 1) tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased') #Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader( train_df=train, val_df=valid, test_df=valid, tokenizer=tokenizer, negative_sampler_train=ns_train, negative_sampler_val=ns_val, task_type='classification', train_batch_size=6, val_batch_size=6, max_seq_len=100, sample_data=-1, cache_path="{}/{}".format(data_folder, task)) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( ) model = pointwise_bert.BertForPointwiseLearning.from_pretrained( 'bert-base-cased') #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model=model, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader, num_ns_eval=9, task_type="classification", tokenizer=tokenizer, validate_every_epochs=1, num_validation_batches=-1, num_epochs=1, lr=0.0005, sacred_ex=None, validate_every_steps=100) #Train the model logging.info("Fitting pointwise BERT for {}".format(task)) trainer.fit() #Predict for test (in our example the validation set) logging.info("Predicting") preds, labels, _ = trainer.test() res = results_analyses_tools.\ evaluate_and_aggregate(preds, labels, ['ndcg_cut_10']) for metric, v in res.items(): logging.info("Test {} : {:4f}".format(metric, v))
def run_experiment(args): args.run_id = str(ex.current_run._id) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) # Load datasets ## Conversation Response Ranking if args.task in ["mantis", "msdialog", "ubuntu_dstc8"]: add_turn_separator = ( args.task != "ubuntu_dstc8" ) # Ubuntu data has several utterances from same user in the context. train = preprocess_crr.read_crr_tsv_as_df( args.data_folder + args.task + "/train.tsv", args.sample_data, add_turn_separator) valid = preprocess_crr.read_crr_tsv_as_df( args.data_folder + args.task + "/valid.tsv", args.sample_data, add_turn_separator) special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) ## Similar Question Retrieval and Passage Retrieval elif args.task in ["qqp", "linkso", "trec2020pr"]: if args.sample_data == -1: args.sample_data = None train = pd.read_csv(args.data_folder + args.task + "/train.tsv", sep="\t", nrows=args.sample_data) valid = pd.read_csv(args.data_folder + args.task + "/valid.tsv", sep="\t", nrows=args.sample_data) elif args.task == "scisumm": train, valid = preprocess_scisumm.transform_to_dfs( "../data/Training-Set-2019/Task1/From-Training-Set-2018/") elif args.task == "scisumm_ranked": train, valid, test = preprocess_scisumm_ranked.transform_to_dfs( args.path_to_ranked_file, args.path_to_ranked_test, args.path_to_ranked_dev) # Choose the negative candidate sampler document_col = train.columns[1] ns_train = None ns_val = None if args.train_negative_sampler == 'random': ns_train = negative_sampling.RandomNegativeSampler( list(train[document_col].values), args.num_ns_train) elif args.train_negative_sampler == 'bm25': ns_train = negative_sampling.BM25NegativeSamplerPyserini( list(train[document_col].values), args.num_ns_train, args.data_folder + "/" + args.task + "/anserini_train/", args.sample_data, args.anserini_folder) elif args.train_negative_sampler == 'sentenceBERT': ns_train = negative_sampling.SentenceBERTNegativeSampler( list(train[document_col].values), args.num_ns_train, args.data_folder + "/" + args.task + "/train_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) if args.test_negative_sampler == 'random': ns_val = negative_sampling.RandomNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) elif args.test_negative_sampler == 'bm25': ns_val = negative_sampling.BM25NegativeSamplerPyserini( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + "/" + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder) elif args.test_negative_sampler == 'sentenceBERT': ns_val = negative_sampling.SentenceBERTNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + "/" + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) # Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader( train, valid, test, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + "/" + args.task) if args.task == "scisumm_ranked": with_ranked_list = True else: with_ranked_list = False train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( with_ranked_list) # Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained( args.transformer_model) model.resize_token_embeddings(len(dataloader.tokenizer)) # Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model, train_loader, val_loader, test_loader, args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_instances, args.num_epochs, args.lr, args.sacred_ex) # Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() # Predict for test logging.info("Predicting") preds, labels, doc_ids, all_queries, preds_without_acc = trainer.validate() res = results_analyses_tools.evaluate_and_aggregate( preds, labels, [ 'R_10@1', 'R_10@2', 'R_10@5', 'R_2@1', 'accuracy_0.3', 'accuracy_0.3_upto_1', 'precision_0.3', 'recall_0.3', 'f_score_0.3', 'accuracy_0.4', 'accuracy_0.4_upto_1', 'precision_0.4', 'recall_0.4', 'f_score_0.4', 'accuracy_0.5', 'accuracy_0.5_upto_1', 'precision_0.5', 'recall_0.5', 'f_score_0.5' ]) for metric, v in res.items(): logging.info("Test {} : {:4f}".format(metric, v)) # Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) new_preds = list((np.array(preds_without_acc) > 0.3).astype(int)) d = { 'query': all_queries, 'doc_id': doc_ids, 'label': new_preds, 'similiarity': preds_without_acc } df_doc_ids = pd.DataFrame(d) df_doc_ids_ones = df_doc_ids[df_doc_ids['label'] == 1] df_doc_ids_ones = df_doc_ids_ones.groupby('query').agg(list).reset_index() df_doc_ids_non_ones = df_doc_ids.groupby('query').agg(list).reset_index() new_df = [] for i, row in df_doc_ids_non_ones.iterrows(): if all([v == 0 for v in row['label']]): highest_value = [ x for _, x in sorted(zip(row['similiarity'], row['doc_id']), key=lambda pair: pair[0]) ] highest_value_sim = [x for x in sorted(row['similiarity'])] row['label'] = [1] row['doc_id'] = [highest_value[0]] row['similiarity'] = [highest_value_sim[0]] new_df.append(row) result = pd.concat([df_doc_ids_ones, pd.DataFrame(new_df)]) result.to_csv(args.output_dir + "/" + args.run_id + "/doc_ids_dev.csv", index=False, sep='\t') # predict on the test set preds, labels, doc_ids, all_queries, preds_without_acc = trainer.test() new_preds = list((np.array(preds_without_acc) > 0.3).astype(int)) d = { 'query': all_queries, 'doc_id': doc_ids, 'label': new_preds, 'similiarity': preds_without_acc } df_doc_ids = pd.DataFrame(d) df_doc_ids_ones = df_doc_ids[df_doc_ids['label'] == 1] df_doc_ids_ones = df_doc_ids_ones.groupby('query').agg(list).reset_index() df_doc_ids_non_ones = df_doc_ids.groupby('query').agg(list).reset_index() new_df = [] for i, row in df_doc_ids_non_ones.iterrows(): if all([v == 0 for v in row['label']]): highest_value = [ x for _, x in sorted(zip(row['similiarity'], row['doc_id']), key=lambda pair: pair[0]) ] highest_value_sim = [x for x in sorted(row['similiarity'])] row['label'] = [1] row['doc_id'] = [highest_value[0]] row['similiarity'] = [highest_value_sim[0]] new_df.append(row) result = pd.concat([df_doc_ids_ones, pd.DataFrame(new_df)]) result.to_csv(args.output_dir + "/" + args.run_id + "/doc_ids_test.csv", index=False, sep='\t') # Saving model to a file if args.save_model: torch.save(model.state_dict(), args.output_dir + "/" + args.run_id + "/model") # In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") preds, uncertainties, labels, foward_passes_preds = trainer.test_with_dropout( args.num_foward_prediction_passes) res = results_analyses_tools.evaluate_and_aggregate( preds, labels, ['R_10@1']) for metric, v in res.items(): logging.info( "Test (w. dropout and {} foward passes) {} : {:4f}".format( args.num_foward_prediction_passes, metric, v)) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions_with_dropout.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + args.run_id + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=["uncertainty_" + str(i) for i in range(max_preds_column)]) uncertainties_df.to_csv(args.output_dir + "/" + args.run_id + "/uncertainties.csv", index=False) return trainer.best_ndcg
def run_experiment(args): args.run_id = str(ex.current_run._id) tokenizer = BertTokenizer.from_pretrained('bert-base-cased') train, valid, test = preprocess_scisumm_ranked.transform_to_dfs( args.path_to_ranked_file, args.path_to_ranked_test, args.path_to_ranked_dev) # Choose the negative candidate sampler ns_train = None ns_val = None # Create the loaders for the datasets, with the respective negative samplers dataloader = dataset.QueryDocumentDataLoader( train, valid, test, tokenizer, ns_train, ns_val, 'classification', args.val_batch_size, args.val_batch_size, 512, 0, args.data_folder + "/scisumm_ranked") with_ranked_list = True train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( with_ranked_list) # Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained('bert-base-cased') model.resize_token_embeddings(len(dataloader.tokenizer)) e = torch.load(args.model_dir) model.load_state_dict(e) model.eval() # Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer(model, train_loader, val_loader, test_loader, 0, "classification", tokenizer, False, 0, 0, 0, 0) # Predict for test logging.info("Predicting") preds, labels, doc_ids, all_queries, preds_without_acc = trainer.test() # res = results_analyses_tools.evaluate_and_aggregate(preds, labels, ['R_10@1', # 'R_10@2', # 'R_10@5', # 'R_2@1', # 'accuracy_0.3', # 'accuracy_0.3_upto_1', # 'precision_0.3', # 'recall_0.3', # 'f_score_0.3', # 'accuracy_0.4', # 'accuracy_0.4_upto_1', # 'precision_0.4', # 'recall_0.4', # 'f_score_0.4', # 'accuracy_0.5', # 'accuracy_0.5_upto_1', # 'precision_0.5', # 'recall_0.5', # 'f_score_0.5' # ]) # for metric, v in res.items(): # logging.info("Test {} : {:4f}".format(metric, v)) # # Saving predictions and labels to a file # max_preds_column = max([len(l) for l in preds]) # preds_df = pd.DataFrame(preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) # preds_df.to_csv(args.output_dir + "/" + args.run_id + "/predictions.csv", index=False) # # labels_df = pd.DataFrame(labels, columns=["label_" + str(i) for i in range(max_preds_column)]) # labels_df.to_csv(args.output_dir + "/" + args.run_id + "/labels.csv", index=False) # # predict on the test set # preds, labels, doc_ids, all_queries, preds_without_acc = trainer.test() new_preds = list((np.array(preds_without_acc) > 0.4).astype(int)) d = { 'query': all_queries, 'doc_id': doc_ids, 'label': new_preds, 'similiarity': preds_without_acc } df_doc_ids = pd.DataFrame(d) import pdb pdb.set_trace() df_doc_ids = df_doc_ids.groupby('query').agg(list).reset_index() # df_doc_ids_ones = df_doc_ids[df_doc_ids['label']==1] # df_doc_ids_ones = df_doc_ids_ones.groupby('query').agg(list).reset_index() # df_doc_ids_non_ones = df_doc_ids.groupby('query').agg(list).reset_index() # new_df=[] # for i,row in df_doc_ids_non_ones.iterrows(): # if all([v == 0 for v in row['label']]): # highest_value=[x for _, x in sorted(zip(row['similiarity'], row['doc_id']), key=lambda pair: pair[0])] # highest_value_sim=[x for x in sorted(row['similiarity'])] # # row['label'] = [1] # row[ 'doc_id'] = [highest_value[0]] # row[ 'similiarity'] = [highest_value_sim[0]] # # new_df.append(row) # result = pd.concat([df_doc_ids,pd.DataFrame(new_df)]) df_doc_ids.to_csv(args.output_dir + "/" + args.run_id + "/doc_ids_test_all_results.csv", index=False, sep='\t') return trainer.best_ndcg
def run_experiment(args): args.run_id = str(ex.current_run._id) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) tokenizer = BertTokenizer.from_pretrained(args.transformer_model) #Load datasets train = pd.read_csv( args.data_folder + args.task + "/train_test.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) valid = pd.read_csv( args.data_folder + args.task + "/valid_test.tsv", sep="\t", nrows=args.sample_data if args.sample_data != -1 else None) special_tokens_dict = { 'additional_special_tokens': ['[UTTERANCE_SEP]', '[TURN_SEP]'] } tokenizer.add_special_tokens(special_tokens_dict) #Choose the negative candidate sampler document_col = train.columns[1] ns_train = negative_sampling.BM25NegativeSamplerPyserini( list(train[document_col].values), args.num_ns_train, args.data_folder + args.task + "/anserini_train/", args.sample_data, args.anserini_folder) ns_val_random = negative_sampling.RandomNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval) ns_val_bm25 = negative_sampling.BM25NegativeSamplerPyserini( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/anserini_valid/", args.sample_data, args.anserini_folder) ns_val_bert_sentence = negative_sampling.SentenceBERTNegativeSampler( list(valid[document_col].values) + list(train[document_col].values), args.num_ns_eval, args.data_folder + args.task + "/valid_sentenceBERTembeds", args.sample_data, args.bert_sentence_model) #Create the loaders for the datasets, with the respective negative samplers cross_ns_val = {} cross_ns_train = {} for (ns_name, ns_val) in [("random", ns_val_random), ("bm25", ns_val_bm25), ("sentenceBERT", ns_val_bert_sentence)]: dataloader = dataset.QueryDocumentDataLoader( train, valid, valid, tokenizer, ns_train, ns_val, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + args.task) train_loader, val_loader, test_loader = dataloader.get_pytorch_dataloaders( ) cross_ns_val[ns_name] = val_loader cross_ns_train[ns_name] = train_loader #Instantiate transformer model to be used model = BertForSequenceClassification.from_pretrained( args.transformer_model) model.resize_token_embeddings(len(dataloader.tokenizer)) #Instantiate trainer that handles fitting. trainer = transformer_trainer.TransformerTrainer( model, cross_ns_train["bm25"], cross_ns_val["bm25"], cross_ns_val["bm25"], args.num_ns_eval, "classification", tokenizer, args.validate_every_epochs, args.num_validation_batches, args.num_epochs, args.lr, args.sacred_ex) #Train model_name = model.__class__.__name__ logging.info("Fitting {} for {}{}".format(model_name, args.data_folder, args.task)) trainer.fit() #Cross-NS predictions for ns_index, ns_name in enumerate(["random", "bm25", "sentenceBERT"]): logging.info("Predicting for NS {}".format(ns_name)) os.makedirs(args.output_dir + "/" + str(int(args.run_id) + ns_index), exist_ok=True) with open( args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/config.json", "w") as f: config_w = {'args': vars(args)} config_w['args']['test_dataset'] = args.task config_w['args']['train_negative_sampler'] = 'bm25' config_w['args']['test_negative_sampler'] = ns_name if 'sacred_ex' in config_w['args']: del config_w['args']['sacred_ex'] json.dump(config_w, f, indent=4) # preds, labels, softmax_logits = trainer.test() trainer.num_validation_batches = -1 # no sample preds, labels, softmax_logits = trainer.predict(cross_ns_val[ns_name]) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_softmax.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save( model.state_dict(), args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") trainer.num_validation_batches = -1 # no sample preds, labels, softmax_logits, foward_passes_preds, uncertainties = \ trainer.predict_with_uncertainty(cross_ns_val[ns_name], args.num_foward_prediction_passes) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_with_dropout.csv", index=False) softmax_df = pd.DataFrame(softmax_logits, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) softmax_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_with_dropout_softmax.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=[ "uncertainty_" + str(i) for i in range(max_preds_column) ]) uncertainties_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index) + "/uncertainties.csv", index=False) #Cross-dataset predictions cross_datasets = set(["msdialog", "ubuntu_dstc8", "mantis"]) - set( [args.task]) cross_datasets = sorted(list(cross_datasets)) cross_data_val_dataloader = {} for cross_task in cross_datasets: train_cross = preprocess_crr.read_crr_tsv_as_df( args.data_folder + cross_task + "/train.tsv", args.sample_data, add_turn_separator) valid_cross = preprocess_crr.read_crr_tsv_as_df( args.data_folder + cross_task + "/valid.tsv", args.sample_data, add_turn_separator) ns_train_cross = negative_sampling.BM25NegativeSamplerPyserini( list(train_cross[document_col].values), args.num_ns_train, args.data_folder + cross_task + "/anserini_train/", args.sample_data, args.anserini_folder) ns_val_bm25_cross = negative_sampling.BM25NegativeSamplerPyserini( list(valid_cross[document_col].values) + list(train_cross[document_col].values), args.num_ns_eval, args.data_folder + cross_task + "/anserini_valid/", args.sample_data, args.anserini_folder) dataloader = dataset.QueryDocumentDataLoader( train_cross, valid_cross, valid_cross, tokenizer, ns_train_cross, ns_val_bm25_cross, 'classification', args.train_batch_size, args.val_batch_size, args.max_seq_len, args.sample_data, args.data_folder + cross_task) _, val_loader, _ = dataloader.get_pytorch_dataloaders() cross_data_val_dataloader[cross_task] = val_loader for task_index, cross_task in enumerate(cross_datasets): logging.info("Predicting for dataset {}".format(cross_task)) os.makedirs(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1), exist_ok=True) with open( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/config.json", "w") as f: config_w = {'args': vars(args)} config_w['args']['test_dataset'] = cross_task config_w['args']['train_negative_sampler'] = 'bm25' config_w['args']['test_negative_sampler'] = 'bm25' if 'sacred_ex' in config_w['args']: del config_w['args']['sacred_ex'] json.dump(config_w, f, indent=4) # preds, labels, softmax_logits = trainer.test() trainer.num_validation_batches = -1 # no sample preds, labels, softmax_logits = trainer.predict( cross_data_val_dataloader[cross_task]) #Saving predictions and labels to a file max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame( preds, columns=["prediction_" + str(i) for i in range(max_preds_column)]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions.csv", index=False) softmax_df = pd.DataFrame( softmax_logits, columns=["prediction_" + str(i) for i in range(max_preds_column)]) softmax_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_softmax.csv", index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/labels.csv", index=False) #Saving model to a file if args.save_model: torch.save( model.state_dict(), args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/model") #In case we want to get uncertainty estimations at prediction time if args.predict_with_uncertainty_estimation: logging.info("Predicting with dropout.") preds, labels, softmax_logits, foward_passes_preds, uncertainties = \ trainer.predict_with_uncertainty(cross_data_val_dataloader[cross_task], args.num_foward_prediction_passes) max_preds_column = max([len(l) for l in preds]) preds_df = pd.DataFrame(preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv(args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_with_dropout.csv", index=False) softmax_df = pd.DataFrame(softmax_logits, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) softmax_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_with_dropout_softmax.csv", index=False) for i, f_pass_preds in enumerate(foward_passes_preds): preds_df = pd.DataFrame(f_pass_preds, columns=[ "prediction_" + str(i) for i in range(max_preds_column) ]) preds_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/predictions_with_dropout_f_pass_{}.csv".format(i), index=False) labels_df = pd.DataFrame( labels, columns=["label_" + str(i) for i in range(max_preds_column)]) labels_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/labels.csv", index=False) uncertainties_df = pd.DataFrame( uncertainties, columns=[ "uncertainty_" + str(i) for i in range(max_preds_column) ]) uncertainties_df.to_csv( args.output_dir + "/" + str(int(args.run_id) + ns_index + task_index + 1) + "/uncertainties.csv", index=False) return 0.0