def main(pre_model_path, tar_model_path): logger.info('loading config file...') global_config = read_config() logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model = BaseModel(dataset_h5_path, model_config=read_config('config/base_model.yaml')) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) logging.info("transforming model from '%s' to '%s'..." % (pre_model_path, tar_model_path)) transform(pre_model_path, tar_model_path, model) logging.info('finished.')
def main(pre_model_path, tar_model_path): logger.info('loading config file...') global_config = read_config() logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model = BaseModel(dataset_h5_path, model_config=read_config('config/base_model.yaml')) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) logging.info("transforming model from '%s' to '%s'..." % (pre_model_path, tar_model_path)) transform(pre_model_path, tar_model_path, model) logging.info('finished.')
def main(): logger.info('------------Analysis SQuAD dataset--------------') logger.info('loading config file...') global_config = read_config() logger.info('reading squad dataset...') dataset = SquadDataset(global_config) train_context_len_cnt, train_context_len = dataset.gather_context_seq_len('train') dev_context_len_cnt, dev_context_len = dataset.gather_context_seq_len('dev') train_answer_len = dataset.gather_answer_seq_len('train', max_len=9) dev_answer_len = dataset.gather_answer_seq_len('dev', max_len=9) logging.info('train context length cnt: ' + str(train_context_len_cnt)) logging.info('dev context length cnt: ' + str(dev_context_len_cnt)) sns.set() train_context_len.plot.scatter('length', 'cnt', title='train') plt.xlabel('passage length') dev_context_len.plot.scatter('length', 'cnt', title='dev') plt.xlabel('passage length') train_answer_len.plot.line('length', 'cnt', marker='o', title='train') plt.xticks(range(len(train_answer_len['length'])), train_answer_len['length']) plt.xlabel('answer length') dev_answer_len.plot.line('length', 'cnt', marker='o', title='dev') plt.xticks(range(len(dev_answer_len['length'])), dev_answer_len['length']) plt.xlabel('answer length') plt.show()
def preprocess(config_path): logger.info('------------Preprocess SQuAD dataset--------------') logger.info('loading config file...') global_config = read_config(config_path) logger.info('preprocess data...') pdata = PreprocessData(global_config) pdata.run()
def main(pre_model_path, tar_model_path): logger.info('loading config file...') global_config = read_config() logger.info('constructing model...') model = MatchLSTMModel(global_config) logging.info("transforming model from '%s' to '%s'..." % (pre_model_path, tar_model_path)) transform(pre_model_path, tar_model_path, model) logging.info('finished.')
def main(config_path, pre_model_path, tar_model_path): logger.info('loading config file...') global_config = read_config(config_path) logger.info('constructing model...') dataset_h5_path = global_config['data']['dataset_h5'] model = MatchLSTMPlus(dataset_h5_path) logging.info("transforming model from '%s' to '%s'..." % (pre_model_path, tar_model_path)) transform(pre_model_path, tar_model_path, model) logging.info('finished.')
def main(): logger.info('------------Analysis SQuAD dataset--------------') logger.info('loading config file...') global_config = read_config() logger.info('reading squad dataset...') dataset = SquadDataset(global_config) train_context_len_cnt, train_context_len = dataset.gather_context_seq_len('train') dev_context_len_cnt, dev_context_len = dataset.gather_context_seq_len('dev') logging.info('train context length cnt: ' + str(train_context_len_cnt)) logging.info('dev context length cnt: ' + str(dev_context_len_cnt)) train_context_len.plot.scatter('length', 'cnt', title='train') dev_context_len.plot.scatter('length', 'cnt', title='dev') plt.show()
def test(config_path, out_path): logger.info('------------MODEL PREDICT--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled(False) # make sure all tensors below have require_grad=False, logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) # forward logger.info('forwarding...') batch_size = global_config['test']['batch_size'] num_workers = global_config['global']['num_data_workers'] batch_dev_data = dataset.get_dataloader_dev(batch_size, num_workers) # to just evaluate score or write answer to file if out_path is None: criterion = MyNLLLoss() score_em, score_f1, sum_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=None, device=device) logger.info("test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss)) else: context_right_space = dataset.get_all_ct_right_space_dev() predict_ans = predict_on_model(model=model, batch_data=batch_dev_data, device=device, id_to_word_func=dataset.sentence_id2word, right_space=context_right_space) samples_id = dataset.get_all_samples_id_dev() ans_with_id = dict(zip(samples_id, predict_ans)) logging.info('writing predict answer to file %s' % out_path) with open(out_path, 'w') as f: json.dump(ans_with_id, f) logging.info('finished.')
def test(config_path, experiment_info): logger.info('------------ flower classification Train --------------') logger.info('------------ loading config file ------------') global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info('------------ config file info above ------------') # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") cudnn.benchmark = True torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False, logger.info('constructing dataset...') test_filelist_path = global_config['data']['dataset']['test_path'] test_dataset = filelist_DataSet( test_filelist_path, transform=transforms.Compose([ transforms.Resize([224, 224]), # transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])) assert test_dataset test_batch_size = global_config['test']['test_batch_size'] test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=4) logger.info('constructing model...') model_choose = global_config['global']['model'] logger.info('Using model is: %s ' % model_choose) model = globals()[model_choose]() gpu_nums = torch.cuda.device_count() logger.info('dataParallel using %d GPU.....' % gpu_nums) if gpu_nums > 1: model = torch.nn.DataParallel(model) model = model.to(device) model.eval() # let training = False, make sure right dropout # load model weight weight_path = global_config['test']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight............') if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) else: raise ValueError("invalid weight path !!!") # forward logger.info('evaluate forwarding...') out_path = global_config['test'][ 'output_file_path'] + experiment_info + "_result.csv" # to just evaluate score or write answer to file if out_path is not None: predict_on_model(model=model, batch_data=test_loader, device=device, out_path=out_path) logging.info('finished.')
def train(config_path): logger.info('------------MODEL TRAIN--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) criterion = MyNLLLoss() # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] num_workers = global_config['global']['num_data_workers'] batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers) clip_grad_max = global_config['train']['clip_grad_norm'] best_avg = 0. # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model(model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('finished.')
def test(config_path, experiment_info): logger.info('------------MedQA v1.0 Evaluate--------------') logger.info('============================loading config file... print config file =========================') global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info('^^^^^^^^^^^^^^^^^^^^^^ config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^') # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled(False) # make sure all tensors below have require_grad=False, ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['test']['model'] logger.info("model choose is: " + model_choose) dataset_h5_path = global_config['test']['dataset_h5'] if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) print_network(model) logger.info('dataParallel using %d GPU.....' % torch.cuda.device_count()) model = torch.nn.DataParallel(model) model = model.to(device) model.eval() # let training = False, make sure right dropout global init_embedding_weight init_embedding_weight = model.state_dict()['module.embedding.embedding_layer.weight'] # criterion task_criterion = CrossEntropyLoss(weight=torch.tensor([0.2, 0.8]).to(device)).to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = Embedding_reg_L21_Loss().to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # testing arguments logger.info('get test data loader ...') test_batch_size = global_config['test']['test_batch_size'] batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) if not global_config['test']['keep_embedding']: del weight['module.embedding.embedding_layer.weight'] # 删除掉embedding层的参数 ,避免尺寸不对的问题 model.load_state_dict(weight, strict=False) # forward logger.info('evaluate forwarding...') out_path=global_config['test']['output_file_path']+experiment_info+"_result.csv" # to just evaluate score or write answer to file if out_path is not None: predict_on_model(model=model,batch_data=batch_test_data,device=device,out_path=out_path) logging.info('finished.')
def test_5c(config_path, experiment_info): logger.info('------------MedQA v1.0 Evaluate--------------') logger.info( '============================loading config file... print config file =========================' ) global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info( '^^^^^^^^^^^^^^^^^^^^^^ config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^' ) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False, ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['test']['model'] dataset_h5_path = global_config['test']['dataset_h5'] logger.info('Using dataset path is : %s' % dataset_h5_path) logger.info('### Using model is: %s ###' % model_choose) if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) elif model_choose == 'TestModel': model = TestModel(dataset_h5_path, device) elif model_choose == 'cnn_model': model = cnn_model(dataset_h5_path, device) elif model_choose == 'SeaReader_5c': model = SeaReader_5c(dataset_h5_path, device) elif model_choose == 'SeaReader_v2': model = SeaReader_v2(dataset_h5_path, device) elif model_choose == 'SeaReader_v3': model = SeaReader_v3(dataset_h5_path, device) elif model_choose == 'SeaReader_v4': model = SeaReader_v4(dataset_h5_path, device) elif model_choose == 'No_content_model': model = No_content_model(dataset_h5_path) else: raise ValueError('model "%s" in config file not recognized' % model_choose) print_network(model) logger.info('dataParallel using %d GPU.....' % torch.cuda.device_count()) model = model.to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['test']['model_path'] assert os.path.exists( model_weight_path ), "not found model weight file on '%s'" % model_weight_path if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) # testing arguments logger.info('get test data loader ...') test_batch_size = global_config['test']['test_batch_size'] batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) # forward logger.info('evaluate forwarding...') out_path = global_config['test'][ 'output_file_path'] + experiment_info + "_result.csv" logger.info("result output path is: %s" % out_path) # to just evaluate score or write answer to file if out_path is not None: predict_on_model(model=model, batch_data=batch_test_data, device=device, out_path=out_path) logging.info('finished.')
def train(config_path): logger.info('------------MODEL TRAIN--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) #set default gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"]) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path, global_config['preprocess']['use_domain_tag']) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) criterion = MyNLLLoss() # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] num_workers = global_config['global']['num_data_workers'] batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers) clip_grad_max = global_config['train']['clip_grad_norm'] best_avg = 0. # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info( "epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model( model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('pretraining finished.') if global_config['global']['finetune']: batch_train_data = dataset.get_dataloader_train2( train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev2(valid_batch_size, num_workers) for epoch in range(global_config['train']['finetune_epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('finetune epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info( "finetune epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model( model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg if global_config['global']['finetune2']: batch_train_data = dataset.get_dataloader_train3( train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev3(valid_batch_size, num_workers) for epoch in range(global_config['train']['finetune_epoch2']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device) logger.info('finetune2 epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info( "finetune2 epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: save_model( model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('finished.')
def main(): logger.info('------------Match-LSTM TEST INPUT--------------') logger.info('loading config file...') global_config = read_config() # set random seed seed = global_config['model']['global']['random_seed'] torch.manual_seed(seed) torch.no_grad() # make sure all tensors below have require_grad=False logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model = MatchLSTMModel(global_config) model.eval() # let training = False, make sure right dropout logging.info('model parameters count: %d' % count_parameters(model)) # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] is_exist_model_weight = os.path.exists(model_weight_path) assert is_exist_model_weight, "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) # manual input qa context = "In 1870, Tesla moved to Karlovac, to attend school at the Higher Real Gymnasium, where he was " \ "profoundly influenced by a math teacher Martin Sekuli\u0107.:32 The classes were held in German, " \ "as it was a school within the Austro-Hungarian Military Frontier. Tesla was able to perform integral " \ "calculus in his head, which prompted his teachers to believe that he was cheating. He finished a " \ "four-year term in three years, graduating in 1873.:33 " question1 = "What language were classes held in at Tesla's school?" answer1 = ["German"] question2 = "Who was Tesla influenced by while in school?" answer2 = ["Martin Sekuli\u0107"] question3 = "Why did Tesla go to Karlovac?" answer3 = [ "attend school at the Higher Real Gymnasium", 'to attend school' ] # change here to select questions question = question1 answer = answer1[0] # preprocess context_token = nltk.word_tokenize(context) question_token = nltk.word_tokenize(question) a = np.array(context_token) context_id = dataset.sentence_word2id(context_token) question_id = dataset.sentence_word2id(question_token) context_id_char = dataset.sentence_char2id(context_token) question_id_char = dataset.sentence_char2id(question_token) context_var, question_var, context_var_char, question_var_char = [ to_long_tensor(x).unsqueeze(0) for x in [context_id, question_id, context_id_char, question_id_char] ] out_ans_prop, out_ans_range, vis_param = model.forward( context_var, question_var, context_var_char, question_var_char) out_ans_range = out_ans_range.cpu().data.numpy() start = out_ans_range[0][0] end = out_ans_range[0][1] + 1 out_answer_id = context_id[start:end] out_answer = dataset.sentence_id2word(out_answer_id) logging.info('Predict Answer: ' + ' '.join(out_answer)) # to show on visdom s = 0 e = 48 x_left = vis_param['match']['left'][0, :, s:e].cpu().data.numpy() x_right = vis_param['match']['right'][0, :, s:e].cpu().data.numpy() draw_heatmap_sea(x_left, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-left.png', bottom=0.45) draw_heatmap_sea(x_right, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-right.png', bottom=0.45) if global_config['model']['interaction']['enable_self_match']: x_self_left = vis_param['self']['left'][0, s:e, s:e].cpu().data.numpy() x_self_right = vis_param['self']['right'][0, s:e, s:e].cpu().data.numpy() draw_heatmap_sea(x_self_left, xlabels=context_token[s:e], ylabels=context_token[s:e], answer=answer, save_path='data/test-self-left.png', inches=(11, 11), bottom=0.2) draw_heatmap_sea(x_self_right, xlabels=context_token[s:e], ylabels=context_token[s:e], answer=answer, save_path='data/test-self-right.png', inches=(11, 11), bottom=0.2)
def main(config_path, out_path): logger.info('------------Match-LSTM Evaluate--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['model']['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") torch.no_grad() # make sure all tensors below have require_grad=False logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model = MatchLSTMModel(global_config).to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) # forward logger.info('forwarding...') enable_char = global_config['model']['encoder']['enable_char'] batch_size = global_config['test']['batch_size'] # batch_dev_data = dataset.get_dataloader_dev(batch_size) batch_dev_data = list(dataset.get_batch_dev(batch_size)) # to just evaluate score or write answer to file if out_path is None: criterion = MyNLLLoss() score_em, score_f1, sum_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=None, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) logger.info("test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss)) else: predict_ans = predict_on_model(model=model, batch_data=batch_dev_data, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char, id_to_word_func=dataset.sentence_id2word) samples_id = dataset.get_all_samples_id_dev() ans_with_id = dict(zip(samples_id, predict_ans)) logging.info('writing predict answer to file %s' % out_path) with open(out_path, 'w') as f: json.dump(ans_with_id, f) logging.info('finished.')
def __init__(self, config_file="/mnt/sdb/cjm/Match-LSTM/config/id19.yaml"): logger.info('------------MODEL TEST INPUT--------------') logger.info('loading config file...') #self.global_config = read_config("config/id19.yaml") self.global_config = read_config(config_file) # set random seed seed = self.global_config['global']['random_seed'] torch.manual_seed(seed) torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False # set default gpu os.environ["CUDA_VISIBLE_DEVICES"] = str( self.global_config['test']["gpu_id"]) enable_cuda = self.global_config['test']['enable_cuda'] self.device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning( "CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') self.dataset = SquadDataset(self.global_config) logger.info('constructing model...') model_choose = self.global_config['global']['model'] dataset_h5_path = self.global_config['data']['dataset_h5'] if model_choose == 'base': model = BaseModel( dataset_h5_path, model_config=read_config('config/base_model.yaml')) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus( dataset_h5_path, self.global_config['preprocess']['use_domain_tag']) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(self.device) model.eval() # let training = False, make sure right dropout logging.info('model parameters count: %d' % count_parameters(model)) # load model weight logger.info('loading model weight...') model_weight_path = self.global_config['data']['model_path'] is_exist_model_weight = os.path.exists(model_weight_path) assert is_exist_model_weight, "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load( model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) self.model = model self.nlp = spacy.load('en') self.metadata = { k: v.tolist() for k, v in self.dataset.meta_data.items() }
def train_no_content(config_path, experiment_info, thread_queue): logger.info('------------MedQA v1.0 Train--------------') logger.info( '============================loading config file... print config file =========================' ) global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info( '^^^^^^^^^^^^^^^^^^^^^^ config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^' ) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) global gpu_nums, init_embedding_weight, batch_test_data, batch_dev_data, tensorboard_writer, test_epoch, embedding_layer_name, val_epoch, global_config, best_valid_acc test_epoch = 0 val_epoch = 0 enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] logger.info('Using dataset path is : %s' % dataset_h5_path) logger.info('### Using model is: %s ###' % model_choose) if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) elif model_choose == 'TestModel': model = TestModel(dataset_h5_path, device) elif model_choose == 'cnn_model': model = cnn_model(dataset_h5_path, device) elif model_choose == 'SeaReader_5c': model = SeaReader_5c(dataset_h5_path, device) elif model_choose == 'SeaReader_v2': model = SeaReader_v2(dataset_h5_path, device) elif model_choose == 'SeaReader_v3': model = SeaReader_v3(dataset_h5_path, device) elif model_choose == 'No_content_model': model = No_content_model(dataset_h5_path) else: raise ValueError('model "%s" in config file not recognized' % model_choose) print_network(model) gpu_nums = torch.cuda.device_count() logger.info('dataParallel using %d GPU.....' % gpu_nums) if gpu_nums > 1: model = torch.nn.DataParallel(model) model = model.to(device) # weights_init(model) # embedding_layer_name = 'module.embedding.embedding_layer.weight' # for name in model.state_dict().keys(): # if 'embedding_layer.weight' in name: # embedding_layer_name=name # break # init_embedding_weight = model.state_dict()[embedding_layer_name].clone() task_criterion = SVM_loss().to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = delta_embedding_Loss(c=1).to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_eps = float(global_config['train']['eps']) optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param, lr=optimizer_lr, eps=optimizer_eps) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path) and global_config['train']['continue']: logger.info('loading existing weight............') if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight # todo 之后的版本可能不需要这句了 if not global_config['train']['keep_embedding']: del weight[ 'module.embedding.embedding_layer.weight'] # 删除掉embedding层的参数 ,避免尺寸不对的问题 model.load_state_dict(weight, strict=False) # training arguments logger.info('start training............................................') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] test_batch_size = global_config['train']['test_batch_size'] batch_train_data = dataset.get_dataloader_train(train_batch_size, shuffle=False) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, shuffle=False) batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) clip_grad_max = global_config['train']['clip_grad_norm'] # tensorboardX writer tensorboard_writer = SummaryWriter( log_dir=os.path.join('tensorboard_logdir', experiment_info)) # save_cur_experiment_code_path = "savedcodes/" + experiment_info # save_current_codes(save_cur_experiment_code_path, global_config) best_valid_acc = None # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout train_avg_loss, train_avg_problem_acc = train_on_model( model=model, criterion=all_criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, thread_queue=thread_queue, experiment_info=experiment_info) # # evaluate # with torch.no_grad(): # model.eval() # let training = False, make sure right dropout # val_avg_loss, val_avg_problem_acc = eval_on_model_5c(model=model, # criterion=all_criterion, # batch_data=batch_dev_data, # epoch=epoch, # device=device, # init_embedding_weight=init_embedding_weight, # eval_dataset='dev') # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_test_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) tensorboard_writer.add_scalar("train/lr", optimizer.param_groups[0]['lr'], epoch) tensorboard_writer.add_scalar("train/avg_loss", train_avg_loss, epoch) tensorboard_writer.add_scalar("train/problem_acc", train_avg_problem_acc, epoch) # tensorboard_writer.add_scalar("test/avg_loss", test_avg_loss, epoch) # tensorboard_writer.add_scalar("test/binary_acc", test_avg_binary_acc, epoch) # tensorboard_writer.add_scalar("test/problem_acc", test_avg_problem_acc, epoch) # adjust learning rate scheduler.step(train_avg_loss) logger.info('finished.................................') tensorboard_writer.close()
def debug(config_path, experiment_info): logger.info('------------MedQA v1.0 Train--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is available, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not available, please unable CUDA in config file") ############################### 获取数据集 ############################ logger.info('reading MedQA h5file dataset...') dataset = MedQADataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] logger.info('Using dataset path is : %s' % dataset_h5_path) logger.info('### Using model is: %s ###' % model_choose) if model_choose == 'SeaReader': model = SeaReader(dataset_h5_path, device) elif model_choose == 'SimpleSeaReader': model = SimpleSeaReader(dataset_h5_path, device) elif model_choose == 'TestModel': model = TestModel(dataset_h5_path, device) elif model_choose == 'cnn_model': model = cnn_model(dataset_h5_path, device) elif model_choose == 'SeaReader_v2': model = SeaReader_v2(dataset_h5_path, device) elif model_choose == 'SeaReader_v3': model = SeaReader_v3(dataset_h5_path, device) elif model_choose == 'SeaReader_v4': model = SeaReader_v4(dataset_h5_path, device) elif model_choose == 'SeaReader_v4_5': model = SeaReader_v4_5(dataset_h5_path, device) elif model_choose == 'SeaReader_v5': model = SeaReader_v5(dataset_h5_path, device) elif model_choose == 'SeaReader_v6': model = SeaReader_v6(dataset_h5_path, device) elif model_choose == 'No_content_model': model = No_content_model(dataset_h5_path) elif model_choose == 'SeaReader_attention': model = SeaReader_attention(dataset_h5_path, device) else: raise ValueError('model "%s" in config file not recognized' % model_choose) print_network(model) logger.info('dataParallel using %d GPU.....' % torch.cuda.device_count()) if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) model = model.to(device) task_criterion = CrossEntropyLoss( weight=torch.tensor([0.2, 0.8]).to(device)).to(device) gate_criterion = gate_Loss().to(device) embedding_criterion = Embedding_reg_L21_Loss().to(device) all_criterion = [task_criterion, gate_criterion, embedding_criterion] # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param, lr=optimizer_lr) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=5, verbose=True) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path) and global_config['train']['continue']: logger.info('loading existing weight............') if enable_cuda: weight = torch.load( weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) embed() # training arguments logger.info('start training............................................') train_batch_size = 10 valid_batch_size = 10 test_batch_size = 10 batch_train_data = dataset.get_dataloader_train(train_batch_size, shuffle=True) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, shuffle=False) global batch_test_data batch_test_data = dataset.get_dataloader_test(test_batch_size, shuffle=False) clip_grad_max = global_config['train']['clip_grad_norm'] enable_char = False best_valid_acc = None # every epoch for epoch in range(1): # train model.train() # set training = True, make sure right dropout train_avg_loss, train_avg_binary_acc = train_on_model( model=model, criterion=all_criterion, optimizer=optimizer, batch_data=batch_dev_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) # evaluate # with torch.no_grad(): # model.eval() # let training = False, make sure right dropout # val_avg_loss, val_avg_binary_acc, val_avg_problem_acc = eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_dev_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model, # criterion=all_criterion, # batch_data=batch_test_data, # epoch=epoch, # device=device, # enable_char=enable_char, # batch_char_func=dataset.gen_batch_with_char, # init_embedding_weight=init_embedding_weight) # # save model when best f1 score # if best_valid_acc is None or val_avg_problem_acc > best_valid_acc: # epoch_info = 'epoch=%d, val_binary_acc=%.4f, val_problem_acc=%.4f' % ( # epoch, val_avg_binary_acc, val_avg_problem_acc) # save_model(model, # epoch_info=epoch_info, # model_weight_path=global_config['data']['model_weight_dir']+experiment_info+"_model_weight.pt", # checkpoint_path=global_config['data']['checkpoint_path']+experiment_info+"_save.log") # logger.info("========= saving model weight on epoch=%d =======" % epoch) # best_valid_acc = val_avg_problem_acc # tensorboard_writer.add_scalar("train/problem_acc", train_avg_problem_acc, epoch) # tensorboard_writer.add_scalar("val/avg_loss", val_avg_loss, epoch) # tensorboard_writer.add_scalar("val/binary_acc", val_avg_binary_acc, epoch) # tensorboard_writer.add_scalar("val/problem_acc", val_avg_problem_acc, epoch) # tensorboard_writer.add_scalar("test/avg_loss", test_avg_loss, epoch) # tensorboard_writer.add_scalar("test/binary_acc", test_avg_binary_acc, epoch) # tensorboard_writer.add_scalar("test/problem_acc", test_avg_problem_acc, epoch) # adjust learning rate scheduler.step(train_avg_loss) logger.info('finished.................................')
def main(): logger.info('------------MODEL TEST INPUT--------------') logger.info('loading config file...') global_config = read_config() # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model = BaseModel(dataset_h5_path, model_config=read_config('config/base_model.yaml')) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model.eval() # let training = False, make sure right dropout logging.info('model parameters count: %d' % count_parameters(model)) # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] is_exist_model_weight = os.path.exists(model_weight_path) assert is_exist_model_weight, "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) # manual input qa context = "In 1870, Tesla moved to Karlovac, to attend school at the Higher Real Gymnasium, where he was " \ "profoundly influenced by a math teacher Martin Sekuli\u0107.:32 The classes were held in German, " \ "as it was a school within the Austro-Hungarian Military Frontier. Tesla was able to perform integral " \ "calculus in his head, which prompted his teachers to believe that he was cheating. He finished a " \ "four-year term in three years, graduating in 1873.:33 " question1 = "What language were classes held in at Tesla's school?" answer1 = ["German"] question2 = "Who was Tesla influenced by while in school?" answer2 = ["Martin Sekuli\u0107"] question3 = "Why did Tesla go to Karlovac?" answer3 = [ "attend school at the Higher Real Gymnasium", 'to attend school' ] # change here to select questions question = question1 answer = answer1[0] # preprocess nlp = spacy.load('en') context_doc = DocText(nlp, context, global_config['preprocess']) question_doc = DocText(nlp, question, global_config['preprocess']) context_doc.update_em(question_doc) question_doc.update_em(context_doc) context_token = context_doc.token question_token = question_doc.token context_id_char = to_long_tensor(dataset.sentence_char2id(context_token)) question_id_char = to_long_tensor(dataset.sentence_char2id(question_token)) context_id, context_f = context_doc.to_id(dataset.meta_data) question_id, question_f = question_doc.to_id(dataset.meta_data) bat_input = [ context_id, question_id, context_id_char, question_id_char, context_f, question_f ] bat_input = [x.unsqueeze(0) if x is not None else x for x in bat_input] out_ans_prop, out_ans_range, vis_param = model.forward(*bat_input) out_ans_range = out_ans_range.numpy() start = out_ans_range[0][0] end = out_ans_range[0][1] + 1 out_answer_id = context_id[start:end] out_answer = dataset.sentence_id2word(out_answer_id) logging.info('Predict Answer: ' + ' '.join(out_answer)) # to show on visdom s = 0 e = 48 x_left = vis_param['match']['left']['alpha'][0, :, s:e].numpy() x_right = vis_param['match']['right']['alpha'][0, :, s:e].numpy() x_left_gated = vis_param['match']['left']['gated'][0, :, s:e].numpy() x_right_gated = vis_param['match']['right']['gated'][0, :, s:e].numpy() draw_heatmap_sea(x_left, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-left.png', bottom=0.45) draw_heatmap_sea(x_right, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-right.png', bottom=0.45) enable_self_match = False if enable_self_match: x_self_left = vis_param['self']['left']['alpha'][0, s:e, s:e].numpy() x_self_right = vis_param['self']['right']['alpha'][0, s:e, s:e].numpy() draw_heatmap_sea(x_self_left, xlabels=context_token[s:e], ylabels=context_token[s:e], answer=answer, save_path='data/test-self-left.png', inches=(11, 11), bottom=0.2) draw_heatmap_sea(x_self_right, xlabels=context_token[s:e], ylabels=context_token[s:e], answer=answer, save_path='data/test-self-right.png', inches=(11, 11), bottom=0.2)
def test(config_path, out_path): logger.info('------------MODEL PREDICT--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) #set default gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"]) enable_cuda = global_config['test']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError( "CUDA is not abaliable, please unable CUDA in config file") torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False, logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model_config = read_config('config/base_model.yaml') model = BaseModel(dataset_h5_path, model_config) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path, global_config['preprocess']['use_domain_tag']) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model = model.to(device) model.eval() # let training = False, make sure right dropout # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] assert os.path.exists( model_weight_path ), "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda()) model.load_state_dict(weight, strict=False) # forward logger.info('forwarding...') batch_size = global_config['test']['batch_size'] num_workers = global_config['global']['num_data_workers'] if 'test_path' not in global_config['data']['dataset']: batch_test_data = dataset.get_dataloader_dev(batch_size, num_workers) else: batch_test_data = dataset.get_dataloader_test(batch_size, num_workers) # to just evaluate score or write answer to file if out_path is None: criterion = MyNLLLoss() score_em, score_f1, sum_loss = eval_on_model( model=model, criterion=criterion, batch_data=batch_test_data, epoch=None, device=device) logger.info( "test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss)) else: #context_right_space = dataset.get_all_ct_right_space_dev() context_right_space = dataset.get_all_ct_right_space_test() predict_ans = predict_on_model( model=model, batch_data=batch_test_data, device=device, id_to_word_func=dataset.sentence_id2word, right_space=context_right_space) #samples_id = dataset.get_all_samples_id_dev() samples_id = dataset.get_all_samples_id_test() ans_with_id = dict(zip(samples_id, predict_ans)) logging.info('writing predict answer to file %s' % out_path) with open(out_path, 'w') as f: json.dump(ans_with_id, f) logging.info('finished.')
def main(): logger.info('------------MODEL TEST INPUT--------------') logger.info('loading config file...') # manual set global_config = read_config('config/global_config.yaml') # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) torch.set_grad_enabled( False) # make sure all tensors below have require_grad=False logger.info('reading dataset...') dataset = Dataset(global_config) logger.info('constructing model...') dataset_h5_path = global_config['data']['dataset_h5'] model = MatchLSTMPlus(dataset_h5_path) model.eval() # let training = False, make sure right dropout logging.info('model parameters count: %d' % count_parameters(model)) model_rerank = None rank_k = global_config['global']['rank_k'] if global_config['global']['enable_rerank']: model_rerank = ReRanker(dataset_h5_path) model_rerank.eval() logging.info('rerank model parameters count: %d' % count_parameters(model_rerank)) # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] is_exist_model_weight = os.path.exists(model_weight_path) assert is_exist_model_weight, "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) if global_config['global']['enable_rerank']: rerank_weight_path = global_config['data']['rerank_model_path'] assert os.path.exists( rerank_weight_path ), "not found rerank model weight file on '%s'" % rerank_weight_path logger.info('loading rerank model weight...') weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage) model_rerank.load_state_dict(weight, strict=False) context = "《战国无双3》()是由光荣和ω-force开发的战国无双系列的正统第三续作。本作以三大故事为主轴,分别是以武田信玄等人为主的《关东三国志》,织田信长等人为主的《战国三杰》,石田三成等人为主的《关原的年轻武者》,丰富游戏内的剧情。此部份专门介绍角色,欲知武器情报、奥义字或擅长攻击类型等,请至战国无双系列1.由于乡里大辅先生因故去世,不得不寻找其他声优接手。从猛将传 and Z开始。2.战国无双 编年史的原创男女主角亦有专属声优。此模式是任天堂游戏谜之村雨城改编的新增模式。本作中共有20张战场地图(不含村雨城),后来发行的猛将传再新增3张战场地图。但游戏内战役数量繁多,部分地图会有兼用的状况,战役虚实则是以光荣发行的2本「战国无双3 人物真书」内容为主,以下是相关介绍。(注:前方加☆者为猛将传新增关卡及地图。)合并本篇和猛将传的内容,村雨城模式剔除,战国史模式可直接游玩。主打两大模式「战史演武」&「争霸演武」。系列作品外传作品" question1 = "《战国无双3》是由哪两个公司合作开发的?" answer1 = ['光荣和ω-force'] question2 = '男女主角亦有专属声优这一模式是由谁改编的?' answer2 = ['村雨城', '任天堂游戏谜之村雨城'] question3 = '战国史模式主打哪两个模式?' answer3 = ['「战史演武」&「争霸演武」'] # change here to select questions question = question2 answer = answer2[0] # preprocess preprocess_config = global_config['preprocess'] context_doc = DocTextCh(context, preprocess_config) question_doc = DocTextCh(question, preprocess_config) link_char = '' # mpl.rcParams['font.sans-serif'] = ['Microsoft YaHei'] mpl.rcParams['font.sans-serif'] = ['SimHei'] context_doc.update_em(question_doc) question_doc.update_em(context_doc) context_token = context_doc.token question_token = question_doc.token context_id_char = None question_id_char = None if preprocess_config['use_char']: context_id_char = to_long_tensor( dataset.sentence_char2id(context_token)) question_id_char = to_long_tensor( dataset.sentence_char2id(question_token)) context_id, context_f = context_doc.to_id(dataset.meta_data) question_id, question_f = question_doc.to_id(dataset.meta_data) bat_input = [ context_id, question_id, context_id_char, question_id_char, context_f, question_f ] bat_input = [x.unsqueeze(0) if x is not None else x for x in bat_input] # predict out_ans_prop, out_ans_range, vis_param = model.forward(*bat_input) if model_rerank is not None: cand_ans_range = beam_search(out_ans_prop, k=rank_k) cand_score, out_ans_range = model_rerank(bat_input[0], bat_input[1], cand_ans_range) out_ans_range = out_ans_range.numpy() start = out_ans_range[0][0] end = out_ans_range[0][1] + 1 out_answer_id = context_id[start:end] out_answer = dataset.sentence_id2word(out_answer_id) logging.info('Predict Answer: ' + link_char.join(out_answer)) # to show on visdom s = 0 e = 48 x_left = vis_param['match']['left']['alpha'][0, :, s:e].numpy() x_right = vis_param['match']['right']['alpha'][0, :, s:e].numpy() x_left_gated = vis_param['match']['left']['gated'][0, :, s:e].numpy() x_right_gated = vis_param['match']['right']['gated'][0, :, s:e].numpy() draw_heatmap_sea(x_left, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-left.png', bottom=0.2) draw_heatmap_sea(x_right, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-right.png', bottom=0.2)
def train(config_path): logger.info('------------MODEL TRAIN--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") logger.info('reading dataset...') dataset = Dataset(global_config) logger.info('constructing model...') dataset_h5_path = global_config['data']['dataset_h5'] model = MatchLSTMPlus(dataset_h5_path) model = model.to(device) criterion = MyNLLLoss() optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) model_rerank = None rank_k = global_config['global']['rank_k'] if global_config['global']['enable_rerank']: model_rerank = ReRanker(dataset_h5_path) model_rerank = model_rerank.to(device) criterion = torch.nn.NLLLoss() optimizer_param = filter(lambda p: p.requires_grad, model_rerank.parameters()) # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) rerank_weight_path = global_config['data']['rerank_model_path'] if global_config['global']['enable_rerank'] and os.path.exists(rerank_weight_path): logger.info('loading existing rerank weight...') weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage.cuda()) model_rerank.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] num_workers = global_config['global']['num_data_workers'] batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers) batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers) clip_grad_max = global_config['train']['clip_grad_norm'] best_avg = 0. # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout if global_config['global']['enable_rerank']: model_rerank.train() sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, model_rerank=model_rerank, rank_k=rank_k) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout if global_config['global']['enable_rerank']: model_rerank.eval() valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device, model_rerank=model_rerank, rank_k=rank_k) valid_avg = (valid_score_em + valid_score_f1) / 2 logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best avg score if valid_avg > best_avg: if model_rerank is not None: save_model(model_rerank, epoch=epoch, model_weight_path=global_config['data']['rerank_model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logging.info("saving rerank model weight on epoch=%d" % epoch) else: save_model(model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_avg = valid_avg logger.info('finished.')
def train(config_path, experiment_info): logger.info('------------ flower classification Train --------------') logger.info('------------ loading config file ------------') global_config = read_config(config_path) logger.info(open(config_path).read()) logger.info('------------ config file info above ------------') # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") cudnn.benchmark = True ############################### 获取数据集 ############################ train_filelist_path = global_config['data']['dataset']['train_path'] dev_filelist_path = global_config['data']['dataset']['dev_path'] logger.info('constructing dataset...') train_dataset = filelist_DataSet(train_filelist_path, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])) dev_dataset = filelist_DataSet(dev_filelist_path, transform=transforms.Compose([ transforms.Resize([224,224]), # transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ])) assert train_dataset, dev_dataset train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=4) dev_loader = torch.utils.data.DataLoader(dev_dataset, batch_size=valid_batch_size, shuffle=False, num_workers=4) logger.info('constructing model...') model_choose = global_config['global']['model'] logger.info('Using model is: %s ' % model_choose) model = globals()[model_choose]() print_network(model) gpu_nums = torch.cuda.device_count() logger.info('dataParallel using %d GPU.....' % gpu_nums) if gpu_nums > 1: model = torch.nn.DataParallel(model) model = model.to(device) task_criterion = CrossEntropyLoss().to(device) # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_eps = float(global_config['train']['eps']) optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param, lr=optimizer_lr, eps=optimizer_eps) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr, momentum=0.9, weight_decay=1e-4) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=10, verbose=True) # check if exist model weight weight_path = global_config['train']['model_path'] if os.path.exists(weight_path) and global_config['train']['continue']: logger.info('loading existing weight............') if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) else: weight = torch.load(weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) # save current code # save_cur_experiment_code_path = "savedcodes/" + experiment_info # save_current_codes(save_cur_experiment_code_path, global_config) # tensorboardX writer tensorboard_writer = SummaryWriter(log_dir=os.path.join('tensorboard_logdir', experiment_info)) # training arguments logger.info('start training............................................') best_valid_acc = None # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout train_avg_loss, train_avg_binary_acc = train_on_model(model=model, criterion=task_criterion, optimizer=optimizer, batch_data=train_loader, epoch=epoch, device=device ) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout val_loss, val_acc = eval_on_model(model=model, criterion=task_criterion, batch_data=dev_loader, epoch=epoch, device=device) # save model when best accuracy score if best_valid_acc is None or val_acc > best_valid_acc: cur_time = time.strftime('%Y-%m-%d-%H_%M_%S', time.localtime()) epoch_info = "%s epoch=%d, cur best accuracy=%.4f" % (cur_time, epoch, val_acc) save_model(model, epoch_info=epoch_info, model_weight_path=global_config['train']['model_weight_dir'] + experiment_info + "_weight.pt", save_ckpt_log_path=global_config['train']['ckpt_log_path'] + experiment_info + "_save.log") logger.info("========= saving model weight on epoch=%d =======" % epoch) best_valid_acc = val_acc tensorboard_writer.add_scalar("train/lr", optimizer.param_groups[0]['lr'], epoch) tensorboard_writer.add_scalar("train/avg_loss", train_avg_loss, epoch) tensorboard_writer.add_scalar("train/binary_acc", train_avg_binary_acc, epoch) tensorboard_writer.add_scalar("val/avg_loss", val_loss, epoch) tensorboard_writer.add_scalar("val/avg_accuracy", val_acc, epoch) # adjust learning rate scheduler.step(train_avg_loss) logger.info('finished.................................') tensorboard_writer.close()
def main(config_path): logger.info('------------Match-LSTM Train--------------') logger.info('loading config file...') global_config = read_config(config_path) # set random seed seed = global_config['model']['global']['random_seed'] torch.manual_seed(seed) enable_cuda = global_config['train']['enable_cuda'] device = torch.device("cuda" if enable_cuda else "cpu") if torch.cuda.is_available() and not enable_cuda: logger.warning("CUDA is avaliable, you can enable CUDA in config file") elif not torch.cuda.is_available() and enable_cuda: raise ValueError("CUDA is not abaliable, please unable CUDA in config file") logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model = MatchLSTMModel(global_config).to(device) criterion = MyNLLLoss() # optimizer optimizer_choose = global_config['train']['optimizer'] optimizer_lr = global_config['train']['learning_rate'] optimizer_param = filter(lambda p: p.requires_grad, model.parameters()) if optimizer_choose == 'adamax': optimizer = optim.Adamax(optimizer_param) elif optimizer_choose == 'adadelta': optimizer = optim.Adadelta(optimizer_param) elif optimizer_choose == 'adam': optimizer = optim.Adam(optimizer_param) elif optimizer_choose == 'sgd': optimizer = optim.SGD(optimizer_param, lr=optimizer_lr) else: raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose) # check if exist model weight weight_path = global_config['data']['model_path'] if os.path.exists(weight_path): logger.info('loading existing weight...') weight = torch.load(weight_path, map_location=lambda storage, loc: storage) if enable_cuda: weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda()) # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden']) # partial initial weight model.load_state_dict(weight, strict=False) # training arguments logger.info('start training...') train_batch_size = global_config['train']['batch_size'] valid_batch_size = global_config['train']['valid_batch_size'] # batch_train_data = dataset.get_dataloader_train(train_batch_size) # batch_dev_data = dataset.get_dataloader_dev(valid_batch_size) batch_train_data = list(dataset.get_batch_train(train_batch_size)) batch_dev_data = list(dataset.get_batch_dev(valid_batch_size)) clip_grad_max = global_config['train']['clip_grad_norm'] enable_char = global_config['model']['encoder']['enable_char'] best_valid_f1 = None # every epoch for epoch in range(global_config['train']['epoch']): # train model.train() # set training = True, make sure right dropout sum_loss = train_on_model(model=model, criterion=criterion, optimizer=optimizer, batch_data=batch_train_data, epoch=epoch, clip_grad_max=clip_grad_max, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss)) # evaluate with torch.no_grad(): model.eval() # let training = False, make sure right dropout valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model, criterion=criterion, batch_data=batch_dev_data, epoch=epoch, device=device, enable_char=enable_char, batch_char_func=dataset.gen_batch_with_char) logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (epoch, valid_score_em, valid_score_f1, valid_loss)) # save model when best f1 score if best_valid_f1 is None or valid_score_f1 > best_valid_f1: save_model(model, epoch=epoch, model_weight_path=global_config['data']['model_path'], checkpoint_path=global_config['data']['checkpoint_path']) logger.info("saving model weight on epoch=%d" % epoch) best_valid_f1 = valid_score_f1 logger.info('finished.')
def main(): logger.info('------------MODEL TEST INPUT--------------') logger.info('loading config file...') global_config = read_config() # set random seed seed = global_config['global']['random_seed'] torch.manual_seed(seed) torch.set_grad_enabled(False) # make sure all tensors below have require_grad=False logger.info('reading squad dataset...') dataset = SquadDataset(global_config) logger.info('constructing model...') model_choose = global_config['global']['model'] dataset_h5_path = global_config['data']['dataset_h5'] if model_choose == 'base': model = BaseModel(dataset_h5_path, model_config=read_config('config/base_model.yaml')) elif model_choose == 'match-lstm': model = MatchLSTM(dataset_h5_path) elif model_choose == 'match-lstm+': model = MatchLSTMPlus(dataset_h5_path) elif model_choose == 'r-net': model = RNet(dataset_h5_path) elif model_choose == 'm-reader': model = MReader(dataset_h5_path) else: raise ValueError('model "%s" in config file not recoginized' % model_choose) model.eval() # let training = False, make sure right dropout logging.info('model parameters count: %d' % count_parameters(model)) # load model weight logger.info('loading model weight...') model_weight_path = global_config['data']['model_path'] is_exist_model_weight = os.path.exists(model_weight_path) assert is_exist_model_weight, "not found model weight file on '%s'" % model_weight_path weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage) model.load_state_dict(weight, strict=False) # manual input qa context = "In 1870, Tesla moved to Karlovac, to attend school at the Higher Real Gymnasium, where he was " \ "profoundly influenced by a math teacher Martin Sekuli\u0107.:32 The classes were held in German, " \ "as it was a school within the Austro-Hungarian Military Frontier. Tesla was able to perform integral " \ "calculus in his head, which prompted his teachers to believe that he was cheating. He finished a " \ "four-year term in three years, graduating in 1873.:33 " question1 = "What language were classes held in at Tesla's school?" answer1 = ["German"] question2 = "Who was Tesla influenced by while in school?" answer2 = ["Martin Sekuli\u0107"] question3 = "Why did Tesla go to Karlovac?" answer3 = ["attend school at the Higher Real Gymnasium", 'to attend school'] # change here to select questions question = question1 answer = answer1[0] # preprocess nlp = spacy.load('en') context_doc = DocText(nlp, context, global_config['preprocess']) question_doc = DocText(nlp, question, global_config['preprocess']) context_doc.update_em(question_doc) question_doc.update_em(context_doc) context_token = context_doc.token question_token = question_doc.token context_id_char = to_long_tensor(dataset.sentence_char2id(context_token)) question_id_char = to_long_tensor(dataset.sentence_char2id(question_token)) context_id, context_f = context_doc.to_id(dataset.meta_data) question_id, question_f = question_doc.to_id(dataset.meta_data) bat_input = [context_id, question_id, context_id_char, question_id_char, context_f, question_f] bat_input = [x.unsqueeze(0) if x is not None else x for x in bat_input] out_ans_prop, out_ans_range, vis_param = model.forward(*bat_input) out_ans_range = out_ans_range.numpy() start = out_ans_range[0][0] end = out_ans_range[0][1] + 1 out_answer_id = context_id[start:end] out_answer = dataset.sentence_id2word(out_answer_id) logging.info('Predict Answer: ' + ' '.join(out_answer)) # to show on visdom s = 0 e = 48 x_left = vis_param['match']['left']['alpha'][0, :, s:e].numpy() x_right = vis_param['match']['right']['alpha'][0, :, s:e].numpy() x_left_gated = vis_param['match']['left']['gated'][0, :, s:e].numpy() x_right_gated = vis_param['match']['right']['gated'][0, :, s:e].numpy() draw_heatmap_sea(x_left, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-left.png', bottom=0.45) draw_heatmap_sea(x_right, xlabels=context_token[s:e], ylabels=question_token, answer=answer, save_path='data/test-right.png', bottom=0.45) enable_self_match = False if enable_self_match: x_self_left = vis_param['self']['left']['alpha'][0, s:e, s:e].numpy() x_self_right = vis_param['self']['right']['alpha'][0, s:e, s:e].numpy() draw_heatmap_sea(x_self_left, xlabels=context_token[s:e], ylabels=context_token[s:e], answer=answer, save_path='data/test-self-left.png', inches=(11, 11), bottom=0.2) draw_heatmap_sea(x_self_right, xlabels=context_token[s:e], ylabels=context_token[s:e], answer=answer, save_path='data/test-self-right.png', inches=(11, 11), bottom=0.2)