def the_main_function(config_dir='config', update_dict=None): # read config from yaml file cfname = 'config_mlstm.yaml' _n = -1 if args.t: # put a _model.yaml in config dir with smaller network # cfname = '_' + cfname _n = 100 TRAIN_SIZE = _n VALID_SIZE = _n TEST_SIZE = _n # read configurations from config file config_file = os.path.join(config_dir, cfname) with open(config_file) as reader: model_config = yaml.safe_load(reader) # the dataset is basically all the things about squad data # dataset is built by calling the SquadDataset class # it takes the data in tokenized_squad_v1.1.2 directory # then convert them into a single .h5 file dataset = SquadDataset(dataset_h5=model_config['dataset']['h5'], data_path='tokenized_squad_v1.1.2/', ignore_case=True) # divide data into 3 parts train_data, valid_data, test_data = dataset.get_data(train_size=TRAIN_SIZE, valid_size=VALID_SIZE, test_size=TEST_SIZE) print_shape_info(train_data) if False: print('---------------------------------- printing out data shape') print_data_samples(dataset, train_data, 12, 15) exit(0) # Set the random seed manually for reproducibility. torch.manual_seed(model_config['scheduling']['cuda_seed']) if torch.cuda.is_available(): if not model_config['scheduling']['enable_cuda']: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(model_config['scheduling']['cuda_seed']) else: print ("WARNING: no CUDA available, enable_cuda is disabled.") model_config['scheduling']['enable_cuda'] = False # init model _model = MatchLSTMModel(model_config=model_config, data_specs=dataset.meta_data) if model_config['scheduling']['enable_cuda']: _model.cuda() criterion = StandardNLL() if model_config['scheduling']['enable_cuda']: criterion = criterion.cuda() # print out a summarization of the model logger.info('finished loading models') logger.info(torch_model_summarize(_model)) # get optimizer / lr init_learning_rate = model_config['optimizer']['learning_rate'] parameters = filter(lambda p: p.requires_grad, _model.parameters()) if model_config['optimizer']['step_rule'] == 'sgd': optimizer = torch.optim.SGD(parameters, lr=init_learning_rate) elif model_config['optimizer']['step_rule'] == 'adam': optimizer = torch.optim.Adam(parameters, lr=init_learning_rate) input_keys = ['input_story', 'input_question', 'input_story_char', 'input_question_char'] output_keys = ['answer_ranges'] batch_size = model_config['scheduling']['batch_size'] valid_batch_size = model_config['scheduling']['valid_batch_size'] _f = h5py.File(dataset.dataset_h5, 'r') word_vocab = _f['words_flatten'][0].split('\n') word_vocab = list(word_vocab) # word2id = dict(zip(word_vocab, range(len(word_vocab)))) char_vocab = _f['words_flatten_char'][0].split('\n') char_vocab = list(char_vocab) char_word2id = {} for i, ch in enumerate(char_vocab): char_word2id[ch] = i # the generator uses yield to give batches of data, similar to what taught in class train_batch_generator = random_generator(data_dict=train_data, batch_size=batch_size, input_keys=input_keys, output_keys=output_keys, trim_function=squad_trim, sort_by='input_story', char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, enable_cuda=model_config['scheduling']['enable_cuda']) # train number_batch = (train_data['input_story'].shape[0] + batch_size - 1) // batch_size data_queue, _ = generator_queue(train_batch_generator, max_q_size=20) learning_rate = init_learning_rate best_val_f1 = None be_patient = 0 # Load the best saved model. with open(model_config['dataset']['model_save_path'], 'rb') as save_f: _model = torch.load(save_f) # Run on test data. logger.info("loading model for evaluation------------------------------------------------------------------\n") test_f1, test_em, test_nll_loss = evaluate(model=_model, data=test_data, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("------------------------------------------------------------------------------------\n") logger.info("nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) return
def the_main_function(config_dir='config', update_dict=None): # read config from yaml file cfname = 'config_mlstm.yaml' _n = -1 if args.t: # put a _model.yaml in config dir with smaller network # cfname = '_' + cfname _n = 100 TRAIN_SIZE = _n VALID_SIZE = _n TEST_SIZE = _n config_file = os.path.join(config_dir, cfname) with open(config_file) as reader: model_config = yaml.safe_load(reader) dataset = SquadDataset(dataset_h5=model_config['dataset']['h5'], data_path='tokenized_squad_v1.1.2/', ignore_case=True) train_data, valid_data, test_data = dataset.get_data(train_size=TRAIN_SIZE, valid_size=VALID_SIZE, test_size=TEST_SIZE) print_shape_info(train_data) if False: print('---------------------------------- printing out data shape') print_data_samples(dataset, train_data, 12, 15) exit(0) # Set the random seed manually for reproducibility. torch.manual_seed(model_config['scheduling']['cuda_seed']) if torch.cuda.is_available(): if not model_config['scheduling']['enable_cuda']: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: torch.cuda.manual_seed(model_config['scheduling']['cuda_seed']) _model = MatchLSTMModel(model_config=model_config, data_specs=dataset.meta_data) if model_config['scheduling']['enable_cuda']: _model.cuda() criterion = StandardNLL() if model_config['scheduling']['enable_cuda']: criterion = criterion.cuda() logger.info('finished loading models') logger.info(torch_model_summarize(_model)) # get optimizer / lr init_learning_rate = model_config['optimizer']['learning_rate'] parameters = filter(lambda p: p.requires_grad, _model.parameters()) if model_config['optimizer']['step_rule'] == 'sgd': optimizer = torch.optim.SGD(parameters, lr=init_learning_rate) elif model_config['optimizer']['step_rule'] == 'adam': optimizer = torch.optim.Adam(parameters, lr=init_learning_rate) input_keys = [ 'input_story', 'input_question', 'input_story_char', 'input_question_char' ] output_keys = ['answer_ranges'] batch_size = model_config['scheduling']['batch_size'] valid_batch_size = model_config['scheduling']['valid_batch_size'] _f = h5py.File(dataset.dataset_h5, 'r') word_vocab = _f['words_flatten'][0].split('\n') word_vocab = list(word_vocab) # word2id = dict(zip(word_vocab, range(len(word_vocab)))) char_vocab = _f['words_flatten_char'][0].split('\n') char_vocab = list(char_vocab) char_word2id = {} for i, ch in enumerate(char_vocab): char_word2id[ch] = i train_batch_generator = random_generator( data_dict=train_data, batch_size=batch_size, input_keys=input_keys, output_keys=output_keys, trim_function=squad_trim, sort_by='input_story', char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, enable_cuda=model_config['scheduling']['enable_cuda']) # train number_batch = (train_data['input_story'].shape[0] + batch_size - 1) // batch_size data_queue, _ = generator_queue(train_batch_generator, max_q_size=20) learning_rate = init_learning_rate best_val_f1 = None be_patient = 0 try: for epoch in range(model_config['scheduling']['epoch']): _model.train() sum_loss = 0.0 with tqdm(total=number_batch, leave=True, ncols=160, ascii=True) as pbar: for i in range(number_batch): # qgen train one batch generator_output = None while True: if not data_queue.empty(): generator_output = data_queue.get() break else: time.sleep(wait_time) input_story, input_question, input_story_char, input_question_char, answer_ranges = generator_output optimizer.zero_grad() _model.zero_grad() preds = _model.forward( input_story, input_question, input_story_char, input_question_char) # batch x time x 2 # loss loss = criterion(preds, answer_ranges) loss = torch.mean(loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm( _model.parameters(), model_config['optimizer']['clip_grad_norm']) optimizer.step() # apply gradients preds = torch.max( preds, 1)[1].cpu().data.numpy().squeeze() # batch x 2 batch_loss = loss.cpu().data.numpy() sum_loss += batch_loss * batch_size pbar.set_description( 'epoch=%d, batch=%d, avg_loss=%.5f, batch_loss=%.5f, lr=%.6f' % (epoch, i, sum_loss / float(batch_size * (i + 1)), batch_loss, learning_rate)) pbar.update(1) # eval on valid set val_f1, val_em, val_nll_loss = evaluate( model=_model, data=valid_data, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info( "epoch=%d, valid nll loss=%.5f, valid f1=%.5f, valid em=%.5f, lr=%.6f" % (epoch, val_nll_loss, val_f1, val_em, learning_rate)) # Save the model if the validation loss is the best we've seen so far. if not best_val_f1 or val_f1 > best_val_f1: with open(model_config['dataset']['model_save_path'], 'wb') as save_f: torch.save(_model, save_f) best_val_f1 = val_f1 be_patient = 0 else: if epoch >= model_config['optimizer'][ 'learning_rate_decay_from_this_epoch']: if be_patient >= model_config['optimizer'][ 'learning_rate_decay_patience']: if learning_rate * model_config['optimizer'][ 'learning_rate_decay_ratio'] > model_config[ 'optimizer'][ 'learning_rate_cut_lowerbound'] * model_config[ 'optimizer']['learning_rate']: # Anneal the learning rate if no improvement has been seen in the validation dataset. logger.info( 'cutting learning rate from %.5f to %.5f' % (learning_rate, learning_rate * model_config['optimizer'] ['learning_rate_decay_ratio'])) learning_rate *= model_config['optimizer'][ 'learning_rate_decay_ratio'] for param_group in optimizer.param_groups: param_group['lr'] = learning_rate else: logger.info( 'learning rate %.5f reached lower bound' % (learning_rate)) be_patient += 1 test_f1, test_em, test_nll_loss = evaluate( model=_model, data=test_data, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("test: nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) logger.info( "========================================================================\n" ) # At any point you can hit Ctrl + C to break out of training early. except KeyboardInterrupt: logger.info('--------------------------------------------\n') logger.info('Exiting from training early\n') # Load the best saved model. with open(model_config['dataset']['model_save_path'], 'rb') as save_f: _model = torch.load(save_f) # Run on test data. logger.info( "loading best model------------------------------------------------------------------\n" ) test_f1, test_em, test_nll_loss = evaluate( model=_model, data=test_data, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info( "------------------------------------------------------------------------------------\n" ) logger.info("nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) return
def the_main_function(name_of_model,config_dir='config', update_dict=None,data_folder_path=DEFAULT_DATA_FOLDER_PATH,config_filename = DEFAULT_CONFIG_FILENAME,h5filename = DEFAULT_H5_FILENAME): # read config from yaml file print "data folder path: "+data_folder_path cfname = config_filename timestring = time.strftime("%m%d%H%M%S") # read configurations from config file config_file = os.path.join(config_dir, cfname) with open(config_file) as reader: model_config = yaml.safe_load(reader) # _n = - 1 means use all data _n = -1 if args.t: # if doing tiny test # then only use 100 data # also, only run for 2 epoches _n = 100 model_config['scheduling']['epoch'] = 15 if args.forcedatasize>0: _n = args.forcedatasize TRAIN_SIZE = _n VALID_SIZE = _n TEST_SIZE = _n # the model will be saved here, the name of the save file can be specified using -name model_save_path = os.path.join(model_config['dataset']['model_save_folder'],name_of_model)+".pt" # open plotlog file to do some extra logging for easy plotting later # this file records the validation result at end of each epoch plotlog_filename = name_of_model+"_"+timestring+"_plot.tsv" plotlog_path = os.path.join(PLOTLOG_FOLDER,plotlog_filename) if not args.evaluationOnly: # basically if it's evalution only, then don't write training traces (there will be no training traces) plotlog_fpt = open(plotlog_path,'w') plotlog_fpt.write(name_of_model+"\n") plotlog_fpt.write("epoch\tnll_loss\tf1\tem\tlr\ttime\n") # this file is used to log the test set evaluation results testplotlog_filename = name_of_model+"_"+timestring+"_test.tsv" testplotlog_path = os.path.join(PLOTLOG_FOLDER,testplotlog_filename) # the dataset is basically all the things about squad data # dataset is built by calling the SquadDataset class # it takes the data in tokenized_squad_v1.1.2 directory # then convert them into a single .h5 file dataset = SquadDataset(dataset_h5=h5filename, data_path=data_folder_path+"/", ignore_case=True) # divide data into 3 parts train_data, valid_data, test_data = dataset.get_data(train_size=TRAIN_SIZE, valid_size=VALID_SIZE, test_size=TEST_SIZE) print_shape_info(train_data) if False: print('---------------------------------- printing out data shape') print_data_samples(dataset, train_data, 12, 15) exit(0) ########################################################################################################## # after read in the train, valid and dev set # train and valid have settings specified by folder name, dev set for each of 6 models is the original squad dataset # now for the following part, we get 3 extra testsets # add_any_4, add one sent and add best sent testdataset = SquadTestDataset(dataset_h5='squad_testset.1.0.h5', data_path='test_data' + "/", ignore_case=True) # after the model finished training, we will test its performance on these 3 datasets add_any_4_testdata, add_one_sent_testdata, add_best_sent_testdata = testdataset.get_data(train_size=TEST_SIZE,valid_size=TEST_SIZE, test_size=TEST_SIZE) # each is a data dict ########################################################################################################### # Set the random seed manually for reproducibility. torch.manual_seed(model_config['scheduling']['cuda_seed']) if torch.cuda.is_available(): if not model_config['scheduling']['enable_cuda']: print("WARNING: You have a CUDA device, so you should probably run with --cuda") else: torch.cuda.manual_seed(model_config['scheduling']['cuda_seed']) else: print ("WARNING: no CUDA available, enable_cuda is disabled.") model_config['scheduling']['enable_cuda'] = False if args.fng: model_config['scheduling']['enable_cuda'] = False # we have the option of continue training the model # in evaluationOnly mode we also just load the exisitng model if args.evaluationOnly and (not os.path.isfile(model_save_path)): logger.info('##### MODEL NOT FOUND IN EVALUTION ONLY MODEL, QUIT PROGRAM #####') return if (args.r or args.evaluationOnly) and (os.path.isfile(model_save_path)): if args.r: logger.info('##### MODEL EXIST, CONTINUE TRAINING EXISTING MODEL #####') if args.evaluationOnly: logger.info('##### !!!!USING EXISTING MODEL FOR EVALUTION!!!! #####\n') with open(model_save_path, 'rb') as save_f: _model = torch.load(save_f) else: logger.info('##### INIT UNTRAINED MODEL #####') # init untrained model _model = MatchLSTMModel(model_config=model_config, data_specs=dataset.meta_data) if model_config['scheduling']['enable_cuda']: _model.cuda() criterion = StandardNLL() if model_config['scheduling']['enable_cuda']: criterion = criterion.cuda() # print out a summarization of the model logger.info('finished loading models') logger.info(torch_model_summarize(_model)) # get optimizer / lr init_learning_rate = model_config['optimizer']['learning_rate'] parameters = filter(lambda p: p.requires_grad, _model.parameters()) if model_config['optimizer']['step_rule'] == 'sgd': optimizer = torch.optim.SGD(parameters, lr=init_learning_rate) elif model_config['optimizer']['step_rule'] == 'adam': optimizer = torch.optim.Adam(parameters, lr=init_learning_rate) input_keys = ['input_story', 'input_question', 'input_story_char', 'input_question_char'] output_keys = ['answer_ranges'] if args.forcebatchsize > 0: batch_size = args.forcebatchsize valid_batch_size = args.forcebatchsize else: batch_size = model_config['scheduling']['batch_size'] valid_batch_size = model_config['scheduling']['valid_batch_size'] _f = h5py.File(dataset.dataset_h5, 'r') word_vocab = _f['words_flatten'][0].split('\n') word_vocab = list(word_vocab) # word2id = dict(zip(word_vocab, range(len(word_vocab)))) char_vocab = _f['words_flatten_char'][0].split('\n') char_vocab = list(char_vocab) char_word2id = {} for i, ch in enumerate(char_vocab): char_word2id[ch] = i # this part is about things we need for the any4, addsent and addonesent dataset _f_test = h5py.File(testdataset.dataset_h5, 'r') word_vocab_test = _f_test['words_flatten'][0].split('\n') word_vocab_test = list(word_vocab_test) char_vocab_test = _f_test['words_flatten_char'][0].split('\n') char_vocab_test = list(char_vocab_test) char_word2id_test = {} for i, ch in enumerate(char_vocab_test): char_word2id_test[ch] = i # the generator uses yield to give batches of data, similar to what taught in class train_batch_generator = random_generator(data_dict=train_data, batch_size=batch_size, input_keys=input_keys, output_keys=output_keys, trim_function=squad_trim, sort_by='input_story', char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, enable_cuda=model_config['scheduling']['enable_cuda']) # train number_batch = (train_data['input_story'].shape[0] + batch_size - 1) // batch_size data_queue, _ = generator_queue(train_batch_generator, max_q_size=20) learning_rate = init_learning_rate best_val_f1 = None be_patient = 0 starttime = time.time() # this is used to count how much time it takes to run a single epoch startEpoch = args.startEpoch # for example, if first run trained 12 epoches, then in the second run, use "-e 12" to tell the program # that you are running starting from epoch 12. if args.forceepoch>0: number_of_epoch = args.forceepoch else: number_of_epoch = model_config['scheduling']['epoch'] if args.evaluationOnly: # in evalution only mode we don't want any training! so set the epoch to zero number_of_epoch = 0 try: for epoch in range(startEpoch,startEpoch+number_of_epoch): _model.train() sum_loss = 0.0 with tqdm(total=number_batch, leave=True, ncols=160, ascii=True) as pbar: for i in range(number_batch): # qgen train one batch generator_output = None while True: if not data_queue.empty(): generator_output = data_queue.get() break else: time.sleep(wait_time) input_story, input_question, input_story_char, input_question_char, answer_ranges = generator_output optimizer.zero_grad() _model.zero_grad() preds = _model.forward(input_story, input_question, input_story_char, input_question_char) # batch x time x 2 # loss loss = criterion(preds, answer_ranges) loss = torch.mean(loss) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(_model.parameters(), model_config['optimizer']['clip_grad_norm']) optimizer.step() # apply gradients preds = torch.max(preds, 1)[1].cpu().data.numpy().squeeze() # batch x 2 batch_loss = loss.cpu().data.numpy() sum_loss += batch_loss * batch_size pbar.set_description('epoch=%d, batch=%d, avg_loss=%.5f, batch_loss=%.5f, lr=%.6f' % (epoch, i, sum_loss / float(batch_size * (i + 1)), batch_loss, learning_rate)) pbar.update(1) # eval on valid set val_f1, val_em, val_nll_loss = evaluate(model=_model, data=valid_data, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("epoch=%d, valid nll loss=%.5f, valid f1=%.5f, valid em=%.5f, lr=%.6f, timespent=%d" % (epoch, val_nll_loss, val_f1, val_em, learning_rate,time.time()-starttime)) # also log to plotlog file plotlog_fpt.write(str(epoch)+"\t"+str(val_nll_loss)+"\t"+str(val_f1)+"\t"+str(val_em)+"\t"+str(learning_rate)+"\t"+str(time.time()-starttime)+"\n") plotlog_fpt.flush() # Save the model if the validation loss is the best we've seen so far. if not best_val_f1 or val_f1 > best_val_f1: with open(model_save_path, 'wb') as save_f: torch.save(_model, save_f) best_val_f1 = val_f1 be_patient = 0 else: if epoch >= model_config['optimizer']['learning_rate_decay_from_this_epoch']: if be_patient >= model_config['optimizer']['learning_rate_decay_patience']: if learning_rate * model_config['optimizer']['learning_rate_decay_ratio'] > model_config['optimizer']['learning_rate_cut_lowerbound'] * model_config['optimizer']['learning_rate']: # Anneal the learning rate if no improvement has been seen in the validation dataset. logger.info('cutting learning rate from %.5f to %.5f' % (learning_rate, learning_rate * model_config['optimizer']['learning_rate_decay_ratio'])) learning_rate *= model_config['optimizer']['learning_rate_decay_ratio'] for param_group in optimizer.param_groups: param_group['lr'] = learning_rate else: logger.info('learning rate %.5f reached lower bound' % (learning_rate)) be_patient += 1 # shouldn't look at test set # commenting out this part of the original implementation # test_f1, test_em, test_nll_loss = evaluate(model=_model, data=test_data, criterion=criterion, # trim_function=squad_trim, char_level_func=add_char_level_stuff, # word_id2word=word_vocab, char_word2id=char_word2id, # batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) # logger.info("test: nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) logger.info("========================================================================\n") # At any point you can hit Ctrl + C to break out of training early. except KeyboardInterrupt: logger.info('--------------------------------------------\n') logger.info('Exiting from training early\n') # Load the best saved model. with open(model_save_path, 'rb') as save_f: _model = torch.load(save_f) # write evalution results to disk testplotlog_fpt = open(testplotlog_path,'w') testplotlog_fpt.write(name_of_model+"\n") testplotlog_fpt.write("testset\tnll_loss\tf1\tem\n") # Run on test data. logger.info("loading best model and evaluate on original squad test (dev) sets------------------------------------------------------------------\n") test_f1, test_em, test_nll_loss = evaluate(model=_model, data=test_data, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab, char_word2id=char_word2id, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("------------------------------------------------------------------------------------\n") logger.info("nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) testplotlog_fpt.write("OriginalSquad\t"+str(test_nll_loss) + "\t" + str(test_f1) + "\t" + str(test_em) + "\n") testplotlog_fpt.flush() # here we should change the word_vocab etc so that the model knows it's going to do testing on other test sets!!!!!! # otherwise the model gets errors logger.info("evaluate on add any 4 test set------------------------------------------------------------------\n") test_f1, test_em, test_nll_loss = evaluate(model=_model, data=add_any_4_testdata, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab_test, char_word2id=char_word2id_test, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("------------------------------------------------------------------------------------\n") logger.info("nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) testplotlog_fpt.write("AddAny4\t"+str(test_nll_loss) + "\t" + str(test_f1) + "\t" + str(test_em) + "\n") testplotlog_fpt.flush() logger.info("evaluate on add one sent test set------------------------------------------------------------------\n") test_f1, test_em, test_nll_loss = evaluate(model=_model, data=add_one_sent_testdata, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab_test, char_word2id=char_word2id_test, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("------------------------------------------------------------------------------------\n") logger.info("nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) testplotlog_fpt.write("AddOneSent\t"+str(test_nll_loss) + "\t" + str(test_f1) + "\t" + str(test_em) + "\n") testplotlog_fpt.flush() logger.info("evaluate on add best sent test set------------------------------------------------------------------\n") test_f1, test_em, test_nll_loss = evaluate(model=_model, data=add_best_sent_testdata, criterion=criterion, trim_function=squad_trim, char_level_func=add_char_level_stuff, word_id2word=word_vocab_test, char_word2id=char_word2id_test, batch_size=valid_batch_size, enable_cuda=model_config['scheduling']['enable_cuda']) logger.info("------------------------------------------------------------------------------------\n") logger.info("nll loss=%.5f, f1=%.5f, em=%.5f" % (test_nll_loss, test_f1, test_em)) testplotlog_fpt.write("AddBestSent\t"+str(test_nll_loss) + "\t" + str(test_f1) + "\t" + str(test_em) + "\n") testplotlog_fpt.flush() return