def run_hyperparam_search(questions_to_run, directory, q_docpartitions, bertmodel, sents_embed_dir, question_gpu_map, attention): hyperparam_search_dir = create_directory('hyperparam_search', directory) hyperparam_model_search_parallel(questions_to_run, q_docpartitions, bertmodel, sents_embed_dir, hyperparam_search_dir, question_gpu_map, fdtype=torch.float32, num_epochs=15, prob_interval_truemax=0.05, prob_estim=0.95, random_seed=42, attention=attention)
def write_sents_embeddings(directory, bertmodel, sents_embed_dir_name, docs_data_tensor): # === Generate sents embedding === # load BertModel # define BertEmbedder bert_config = {'bert_train_flag': False, 'bert_all_output': False} bertembeder = BertEmbedder(bertmodel, bert_config) sents_embed_dir = create_directory(sents_embed_dir_name, directory) fdtype = torch.float32 # generate and dump bert embedding for the tokens inside the specificed embedding directory bert_proc_docs = generate_sents_embeds_from_docs(docs_data_tensor, bertembeder, sents_embed_dir, fdtype) ReaderWriter.dump_data(bert_proc_docs, os.path.join(sents_embed_dir, 'bert_proc_docs.pkl'))
def run_predict(q_docpartitions, q_fold_config_map, bertmodel, q_state_dict_path_map, results_dir, sents_embed_dir, question_fold_map, to_gpu, gpu_index, num_epochs=1) -> Dict: q_predictions = {} for question in q_fold_config_map: mconfig, options, __ = q_fold_config_map[question] options[ 'num_epochs'] = num_epochs # override number of epochs using user specified value # update options fold num to the current fold options['fold_num'] = question_fold_map[question] data_partition = q_docpartitions[question][options['fold_num']] results_path = os.path.join(results_dir, 'question_{}'.format(question), 'fold_{}'.format(options['fold_num'])) results_wrk_dir = create_directory(results_path) q_predictions[question] = predict_neural_discern( data_partition, bertmodel, mconfig, options, results_wrk_dir, sents_embed_dir, state_dict_dir=q_state_dict_path_map[question], to_gpu=to_gpu, gpu_index=gpu_index) return q_predictions
def evaluate_on_test_set(directory, q_docpartitions, q_config_map, bertmodel, train_val_dir, sents_embed_dir, gpu_index): test_dir = create_directory('test', directory) test_run(q_docpartitions, q_config_map, bertmodel, train_val_dir, test_dir, sents_embed_dir, gpu_index, num_epochs=1) return test_dir
def run_training(directory, q_docpartitions, q_config_map, bertmodel, sents_embed_dir, question_gpu_map, num_epochs, max_folds): train_val_dir = create_directory('train_validation', directory) train_val_run(q_docpartitions, q_config_map, bertmodel, train_val_dir, sents_embed_dir, question_gpu_map, num_epochs, max_folds) return train_val_dir
"The pre-built experiment_to_rerun will be used. Hyperparam search will not be run.") # under test mode (for faster debugging), run a smaller set of partitions and epochs, and no hyper param search if config['test_mode']: config['max_folds'] = 1 # max number of data partition folds to run (for faster testing) config['num_epochs'] = 1 config['run_hyper_param_search'] = False time_stamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') if config['experiment_to_rerun']: if config['copy_exp_dir']: from distutils.dir_util import copy_tree rerun_dir_name = '{}_{}_{}'.format(config['experiment_to_rerun'], 'rerun', time_stamp) orig_exp_dir = os.path.join(config['base_dir'], 'experiments', config['experiment_to_rerun']) exp_dir = os.path.join(config['base_dir'], 'experiments', rerun_dir_name) create_directory(exp_dir) # copy contents of original exp dir so experiment re-run has everything it needs print("copying experiment for re-run in {}...".format(exp_dir)) copy_tree(orig_exp_dir, exp_dir) print("... complete") else: exp_dir = exp_dir = os.path.join(config['base_dir'], 'experiments', config['experiment_to_rerun']) else: if config['test_mode']: exp_dir = os.path.join(config['base_dir'], 'experiments', 'tests', time_stamp) else: exp_dir = os.path.join(config['base_dir'], 'experiments', time_stamp) create_directory(exp_dir) config['exp_dir'] = exp_dir if config['biobert']: