Exemplo n.º 1
0
def train(config_path):
    logger.info('------------MODEL TRAIN--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError("CUDA is not abaliable, please unable CUDA in config file")

    logger.info('reading dataset...')
    dataset = Dataset(global_config)

    logger.info('constructing model...')
    dataset_h5_path = global_config['data']['dataset_h5']
    model = MatchLSTMPlus(dataset_h5_path)
    model = model.to(device)
    criterion = MyNLLLoss()
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    model_rerank = None
    rank_k = global_config['global']['rank_k']
    if global_config['global']['enable_rerank']:
        model_rerank = ReRanker(dataset_h5_path)
        model_rerank = model_rerank.to(device)
        criterion = torch.nn.NLLLoss()
        optimizer_param = filter(lambda p: p.requires_grad, model_rerank.parameters())

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param,
                              lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path):
        logger.info('loading existing weight...')
        weight = torch.load(weight_path, map_location=lambda storage, loc: storage)
        if enable_cuda:
            weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda())
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        model.load_state_dict(weight, strict=False)

    rerank_weight_path = global_config['data']['rerank_model_path']
    if global_config['global']['enable_rerank'] and os.path.exists(rerank_weight_path):
        logger.info('loading existing rerank weight...')
        weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage)
        if enable_cuda:
            weight = torch.load(rerank_weight_path, map_location=lambda storage, loc: storage.cuda())
        model_rerank.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training...')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']

    num_workers = global_config['global']['num_data_workers']
    batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers)
    batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers)

    clip_grad_max = global_config['train']['clip_grad_norm']

    best_avg = 0.
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        if global_config['global']['enable_rerank']:
            model_rerank.train()
        sum_loss = train_on_model(model=model,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  batch_data=batch_train_data,
                                  epoch=epoch,
                                  clip_grad_max=clip_grad_max,
                                  device=device,
                                  model_rerank=model_rerank,
                                  rank_k=rank_k)
        logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            if global_config['global']['enable_rerank']:
                model_rerank.eval()
            valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model,
                                                                       criterion=criterion,
                                                                       batch_data=batch_dev_data,
                                                                       epoch=epoch,
                                                                       device=device,
                                                                       model_rerank=model_rerank,
                                                                       rank_k=rank_k)
            valid_avg = (valid_score_em + valid_score_f1) / 2
        logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" %
                    (epoch, valid_score_em, valid_score_f1, valid_loss))

        # save model when best avg score
        if valid_avg > best_avg:
            if model_rerank is not None:
                save_model(model_rerank,
                           epoch=epoch,
                           model_weight_path=global_config['data']['rerank_model_path'],
                           checkpoint_path=global_config['data']['checkpoint_path'])
                logging.info("saving rerank model weight on epoch=%d" % epoch)
            else:
                save_model(model,
                           epoch=epoch,
                           model_weight_path=global_config['data']['model_path'],
                           checkpoint_path=global_config['data']['checkpoint_path'])
                logger.info("saving model weight on epoch=%d" % epoch)
            best_avg = valid_avg

    logger.info('finished.')
Exemplo n.º 2
0
def main(config_path):
    logger.info('------------Match-LSTM Train--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['model']['global']['random_seed']
    torch.manual_seed(seed)

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError("CUDA is not abaliable, please unable CUDA in config file")

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model = MatchLSTMModel(global_config).to(device)
    criterion = MyNLLLoss()

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param,
                              lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path):
        logger.info('loading existing weight...')
        weight = torch.load(weight_path, map_location=lambda storage, loc: storage)
        if enable_cuda:
            weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda())
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        model.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training...')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']

    # batch_train_data = dataset.get_dataloader_train(train_batch_size)
    # batch_dev_data = dataset.get_dataloader_dev(valid_batch_size)
    batch_train_data = list(dataset.get_batch_train(train_batch_size))
    batch_dev_data = list(dataset.get_batch_dev(valid_batch_size))

    clip_grad_max = global_config['train']['clip_grad_norm']
    enable_char = global_config['model']['encoder']['enable_char']

    best_valid_f1 = None
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        sum_loss = train_on_model(model=model,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  batch_data=batch_train_data,
                                  epoch=epoch,
                                  clip_grad_max=clip_grad_max,
                                  device=device,
                                  enable_char=enable_char,
                                  batch_char_func=dataset.gen_batch_with_char)
        logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model,
                                                                       criterion=criterion,
                                                                       batch_data=batch_dev_data,
                                                                       epoch=epoch,
                                                                       device=device,
                                                                       enable_char=enable_char,
                                                                       batch_char_func=dataset.gen_batch_with_char)
        logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" %
                    (epoch, valid_score_em, valid_score_f1, valid_loss))

        # save model when best f1 score
        if best_valid_f1 is None or valid_score_f1 > best_valid_f1:
            save_model(model,
                       epoch=epoch,
                       model_weight_path=global_config['data']['model_path'],
                       checkpoint_path=global_config['data']['checkpoint_path'])
            logger.info("saving model weight on epoch=%d" % epoch)
            best_valid_f1 = valid_score_f1

    logger.info('finished.')
Exemplo n.º 3
0
def test(config_path, out_path):
    logger.info('------------MODEL PREDICT--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)

    #set default gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"])

    enable_cuda = global_config['test']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError(
            "CUDA is not abaliable, please unable CUDA in config file")

    torch.set_grad_enabled(
        False)  # make sure all tensors below have require_grad=False,

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    if model_choose == 'base':
        model_config = read_config('config/base_model.yaml')
        model = BaseModel(dataset_h5_path, model_config)
    elif model_choose == 'match-lstm':
        model = MatchLSTM(dataset_h5_path)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path,
                              global_config['preprocess']['use_domain_tag'])
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    elif model_choose == 'm-reader':
        model = MReader(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recoginized' %
                         model_choose)

    model = model.to(device)
    model.eval()  # let training = False, make sure right dropout

    # load model weight
    logger.info('loading model weight...')
    model_weight_path = global_config['data']['model_path']
    assert os.path.exists(
        model_weight_path
    ), "not found model weight file on '%s'" % model_weight_path

    weight = torch.load(model_weight_path,
                        map_location=lambda storage, loc: storage)
    if enable_cuda:
        weight = torch.load(model_weight_path,
                            map_location=lambda storage, loc: storage.cuda())
    model.load_state_dict(weight, strict=False)

    # forward
    logger.info('forwarding...')

    batch_size = global_config['test']['batch_size']

    num_workers = global_config['global']['num_data_workers']

    if 'test_path' not in global_config['data']['dataset']:
        batch_test_data = dataset.get_dataloader_dev(batch_size, num_workers)
    else:
        batch_test_data = dataset.get_dataloader_test(batch_size, num_workers)

    # to just evaluate score or write answer to file
    if out_path is None:
        criterion = MyNLLLoss()
        score_em, score_f1, sum_loss = eval_on_model(
            model=model,
            criterion=criterion,
            batch_data=batch_test_data,
            epoch=None,
            device=device)
        logger.info(
            "test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" %
            (score_em, score_f1, sum_loss))
    else:
        #context_right_space = dataset.get_all_ct_right_space_dev()
        context_right_space = dataset.get_all_ct_right_space_test()
        predict_ans = predict_on_model(
            model=model,
            batch_data=batch_test_data,
            device=device,
            id_to_word_func=dataset.sentence_id2word,
            right_space=context_right_space)
        #samples_id = dataset.get_all_samples_id_dev()
        samples_id = dataset.get_all_samples_id_test()
        ans_with_id = dict(zip(samples_id, predict_ans))

        logging.info('writing predict answer to file %s' % out_path)
        with open(out_path, 'w') as f:
            json.dump(ans_with_id, f)

    logging.info('finished.')
Exemplo n.º 4
0
def train(config_path):
    logger.info('------------MODEL TRAIN--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError("CUDA is not abaliable, please unable CUDA in config file")

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    if model_choose == 'base':
        model_config = read_config('config/base_model.yaml')
        model = BaseModel(dataset_h5_path,
                          model_config)
    elif model_choose == 'match-lstm':
        model = MatchLSTM(dataset_h5_path)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path)
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    elif model_choose == 'm-reader':
        model = MReader(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recoginized' % model_choose)

    model = model.to(device)
    criterion = MyNLLLoss()

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param,
                              lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' % optimizer_choose)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path):
        logger.info('loading existing weight...')
        weight = torch.load(weight_path, map_location=lambda storage, loc: storage)
        if enable_cuda:
            weight = torch.load(weight_path, map_location=lambda storage, loc: storage.cuda())
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        model.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training...')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']

    num_workers = global_config['global']['num_data_workers']
    batch_train_data = dataset.get_dataloader_train(train_batch_size, num_workers)
    batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers)

    clip_grad_max = global_config['train']['clip_grad_norm']

    best_avg = 0.
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        sum_loss = train_on_model(model=model,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  batch_data=batch_train_data,
                                  epoch=epoch,
                                  clip_grad_max=clip_grad_max,
                                  device=device)
        logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            valid_score_em, valid_score_f1, valid_loss = eval_on_model(model=model,
                                                                       criterion=criterion,
                                                                       batch_data=batch_dev_data,
                                                                       epoch=epoch,
                                                                       device=device)
            valid_avg = (valid_score_em + valid_score_f1) / 2
        logger.info("epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" %
                    (epoch, valid_score_em, valid_score_f1, valid_loss))

        # save model when best avg score
        if valid_avg > best_avg:
            save_model(model,
                       epoch=epoch,
                       model_weight_path=global_config['data']['model_path'],
                       checkpoint_path=global_config['data']['checkpoint_path'])
            logger.info("saving model weight on epoch=%d" % epoch)
            best_avg = valid_avg

    logger.info('finished.')
Exemplo n.º 5
0
def train(config_path):
    logger.info('------------MODEL TRAIN--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)

    #set default gpu
    os.environ["CUDA_VISIBLE_DEVICES"] = str(global_config['train']["gpu_id"])

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError(
            "CUDA is not abaliable, please unable CUDA in config file")

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    if model_choose == 'base':
        model_config = read_config('config/base_model.yaml')
        model = BaseModel(dataset_h5_path, model_config)
    elif model_choose == 'match-lstm':
        model = MatchLSTM(dataset_h5_path)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path,
                              global_config['preprocess']['use_domain_tag'])
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    elif model_choose == 'm-reader':
        model = MReader(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recoginized' %
                         model_choose)

    model = model.to(device)
    criterion = MyNLLLoss()

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param, lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' %
                         optimizer_choose)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path):
        logger.info('loading existing weight...')
        weight = torch.load(weight_path,
                            map_location=lambda storage, loc: storage)
        if enable_cuda:
            weight = torch.load(
                weight_path, map_location=lambda storage, loc: storage.cuda())
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        model.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training...')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']

    num_workers = global_config['global']['num_data_workers']
    batch_train_data = dataset.get_dataloader_train(train_batch_size,
                                                    num_workers)
    batch_dev_data = dataset.get_dataloader_dev(valid_batch_size, num_workers)

    clip_grad_max = global_config['train']['clip_grad_norm']

    best_avg = 0.
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        sum_loss = train_on_model(model=model,
                                  criterion=criterion,
                                  optimizer=optimizer,
                                  batch_data=batch_train_data,
                                  epoch=epoch,
                                  clip_grad_max=clip_grad_max,
                                  device=device)
        logger.info('epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            valid_score_em, valid_score_f1, valid_loss = eval_on_model(
                model=model,
                criterion=criterion,
                batch_data=batch_dev_data,
                epoch=epoch,
                device=device)
            valid_avg = (valid_score_em + valid_score_f1) / 2
        logger.info(
            "epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" %
            (epoch, valid_score_em, valid_score_f1, valid_loss))

        # save model when best avg score
        if valid_avg > best_avg:
            save_model(
                model,
                epoch=epoch,
                model_weight_path=global_config['data']['model_path'],
                checkpoint_path=global_config['data']['checkpoint_path'])
            logger.info("saving model weight on epoch=%d" % epoch)
            best_avg = valid_avg
    logger.info('pretraining finished.')

    if global_config['global']['finetune']:
        batch_train_data = dataset.get_dataloader_train2(
            train_batch_size, num_workers)
        batch_dev_data = dataset.get_dataloader_dev2(valid_batch_size,
                                                     num_workers)
        for epoch in range(global_config['train']['finetune_epoch']):
            # train
            model.train()  # set training = True, make sure right dropout
            sum_loss = train_on_model(model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      batch_data=batch_train_data,
                                      epoch=epoch,
                                      clip_grad_max=clip_grad_max,
                                      device=device)
            logger.info('finetune epoch=%d, sum_loss=%.5f' % (epoch, sum_loss))

            # evaluate
            with torch.no_grad():
                model.eval()  # let training = False, make sure right dropout
                valid_score_em, valid_score_f1, valid_loss = eval_on_model(
                    model=model,
                    criterion=criterion,
                    batch_data=batch_dev_data,
                    epoch=epoch,
                    device=device)
                valid_avg = (valid_score_em + valid_score_f1) / 2
            logger.info(
                "finetune epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f"
                % (epoch, valid_score_em, valid_score_f1, valid_loss))

            # save model when best avg score
            if valid_avg > best_avg:
                save_model(
                    model,
                    epoch=epoch,
                    model_weight_path=global_config['data']['model_path'],
                    checkpoint_path=global_config['data']['checkpoint_path'])
                logger.info("saving model weight on epoch=%d" % epoch)
                best_avg = valid_avg

    if global_config['global']['finetune2']:
        batch_train_data = dataset.get_dataloader_train3(
            train_batch_size, num_workers)
        batch_dev_data = dataset.get_dataloader_dev3(valid_batch_size,
                                                     num_workers)
        for epoch in range(global_config['train']['finetune_epoch2']):
            # train
            model.train()  # set training = True, make sure right dropout
            sum_loss = train_on_model(model=model,
                                      criterion=criterion,
                                      optimizer=optimizer,
                                      batch_data=batch_train_data,
                                      epoch=epoch,
                                      clip_grad_max=clip_grad_max,
                                      device=device)
            logger.info('finetune2 epoch=%d, sum_loss=%.5f' %
                        (epoch, sum_loss))

            # evaluate
            with torch.no_grad():
                model.eval()  # let training = False, make sure right dropout
                valid_score_em, valid_score_f1, valid_loss = eval_on_model(
                    model=model,
                    criterion=criterion,
                    batch_data=batch_dev_data,
                    epoch=epoch,
                    device=device)
                valid_avg = (valid_score_em + valid_score_f1) / 2
            logger.info(
                "finetune2 epoch=%d, ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f"
                % (epoch, valid_score_em, valid_score_f1, valid_loss))

            # save model when best avg score
            if valid_avg > best_avg:
                save_model(
                    model,
                    epoch=epoch,
                    model_weight_path=global_config['data']['model_path'],
                    checkpoint_path=global_config['data']['checkpoint_path'])
                logger.info("saving model weight on epoch=%d" % epoch)
                best_avg = valid_avg

    logger.info('finished.')
Exemplo n.º 6
0
def main(config_path, out_path):
    logger.info('------------Match-LSTM Evaluate--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['model']['global']['random_seed']
    torch.manual_seed(seed)

    enable_cuda = global_config['test']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError("CUDA is not abaliable, please unable CUDA in config file")

    torch.no_grad()  # make sure all tensors below have require_grad=False

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model = MatchLSTMModel(global_config).to(device)
    model.eval()  # let training = False, make sure right dropout

    # load model weight
    logger.info('loading model weight...')
    model_weight_path = global_config['data']['model_path']
    assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path

    weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage)
    if enable_cuda:
        weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda())
    model.load_state_dict(weight, strict=False)

    # forward
    logger.info('forwarding...')

    enable_char = global_config['model']['encoder']['enable_char']
    batch_size = global_config['test']['batch_size']
    # batch_dev_data = dataset.get_dataloader_dev(batch_size)
    batch_dev_data = list(dataset.get_batch_dev(batch_size))

    # to just evaluate score or write answer to file
    if out_path is None:
        criterion = MyNLLLoss()
        score_em, score_f1, sum_loss = eval_on_model(model=model,
                                                     criterion=criterion,
                                                     batch_data=batch_dev_data,
                                                     epoch=None,
                                                     device=device,
                                                     enable_char=enable_char,
                                                     batch_char_func=dataset.gen_batch_with_char)
        logger.info("test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss))
    else:
        predict_ans = predict_on_model(model=model,
                                       batch_data=batch_dev_data,
                                       device=device,
                                       enable_char=enable_char,
                                       batch_char_func=dataset.gen_batch_with_char,
                                       id_to_word_func=dataset.sentence_id2word)
        samples_id = dataset.get_all_samples_id_dev()
        ans_with_id = dict(zip(samples_id, predict_ans))

        logging.info('writing predict answer to file %s' % out_path)
        with open(out_path, 'w') as f:
            json.dump(ans_with_id, f)

    logging.info('finished.')
Exemplo n.º 7
0
def test(config_path, out_path):
    logger.info('------------MODEL PREDICT--------------')
    logger.info('loading config file...')
    global_config = read_config(config_path)

    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)

    enable_cuda = global_config['test']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError("CUDA is not abaliable, please unable CUDA in config file")

    torch.set_grad_enabled(False)  # make sure all tensors below have require_grad=False,

    logger.info('reading squad dataset...')
    dataset = SquadDataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    if model_choose == 'base':
        model_config = read_config('config/base_model.yaml')
        model = BaseModel(dataset_h5_path,
                          model_config)
    elif model_choose == 'match-lstm':
        model = MatchLSTM(dataset_h5_path)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path)
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    elif model_choose == 'm-reader':
        model = MReader(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recoginized' % model_choose)

    model = model.to(device)
    model.eval()  # let training = False, make sure right dropout

    # load model weight
    logger.info('loading model weight...')
    model_weight_path = global_config['data']['model_path']
    assert os.path.exists(model_weight_path), "not found model weight file on '%s'" % model_weight_path

    weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage)
    if enable_cuda:
        weight = torch.load(model_weight_path, map_location=lambda storage, loc: storage.cuda())
    model.load_state_dict(weight, strict=False)

    # forward
    logger.info('forwarding...')

    batch_size = global_config['test']['batch_size']

    num_workers = global_config['global']['num_data_workers']
    batch_dev_data = dataset.get_dataloader_dev(batch_size, num_workers)

    # to just evaluate score or write answer to file
    if out_path is None:
        criterion = MyNLLLoss()
        score_em, score_f1, sum_loss = eval_on_model(model=model,
                                                     criterion=criterion,
                                                     batch_data=batch_dev_data,
                                                     epoch=None,
                                                     device=device)
        logger.info("test: ave_score_em=%.2f, ave_score_f1=%.2f, sum_loss=%.5f" % (score_em, score_f1, sum_loss))
    else:
        context_right_space = dataset.get_all_ct_right_space_dev()
        predict_ans = predict_on_model(model=model,
                                       batch_data=batch_dev_data,
                                       device=device,
                                       id_to_word_func=dataset.sentence_id2word,
                                       right_space=context_right_space)
        samples_id = dataset.get_all_samples_id_dev()
        ans_with_id = dict(zip(samples_id, predict_ans))

        logging.info('writing predict answer to file %s' % out_path)
        with open(out_path, 'w') as f:
            json.dump(ans_with_id, f)

    logging.info('finished.')
Exemplo n.º 8
0
def train(config_path, experiment_info, thread_queue):
    logger.info('------------MedQA v1.0 Train--------------')
    logger.info(
        '============================loading config file... print config file ========================='
    )
    global_config = read_config(config_path)
    logger.info(open(config_path).read())
    logger.info(
        '^^^^^^^^^^^^^^^^^^^^^^   config file info above ^^^^^^^^^^^^^^^^^^^^^^^^^'
    )
    # set random seed
    seed = global_config['global']['random_seed']
    torch.manual_seed(seed)
    global gpu_nums, init_embedding_weight, batch_test_data, tensorboard_writer, test_epoch, embedding_layer_name
    test_epoch = 0

    enable_cuda = global_config['train']['enable_cuda']
    device = torch.device("cuda" if enable_cuda else "cpu")
    if torch.cuda.is_available() and not enable_cuda:
        logger.warning("CUDA is avaliable, you can enable CUDA in config file")
    elif not torch.cuda.is_available() and enable_cuda:
        raise ValueError(
            "CUDA is not abaliable, please unable CUDA in config file")

    ############################### 获取数据集 ############################
    logger.info('reading MedQA h5file dataset...')
    dataset = MedQADataset(global_config)

    logger.info('constructing model...')
    model_choose = global_config['global']['model']
    dataset_h5_path = global_config['data']['dataset_h5']
    logger.info('Using dataset path is : %s' % dataset_h5_path)
    logger.info('### Using model is: %s ###' % model_choose)
    if model_choose == 'SeaReader':
        model = SeaReader(dataset_h5_path, device)
    elif model_choose == 'SimpleSeaReader':
        model = SimpleSeaReader(dataset_h5_path, device)
    elif model_choose == 'TestModel':
        model = TestModel(dataset_h5_path, device)
    elif model_choose == 'cnn_model':
        model = cnn_model(dataset_h5_path, device)
    elif model_choose == 'match-lstm+':
        model = MatchLSTMPlus(dataset_h5_path)
    elif model_choose == 'r-net':
        model = RNet(dataset_h5_path)
    else:
        raise ValueError('model "%s" in config file not recognized' %
                         model_choose)

    print_network(model)
    gpu_nums = torch.cuda.device_count()
    logger.info('dataParallel using %d GPU.....' % gpu_nums)
    if gpu_nums > 1:
        model = torch.nn.DataParallel(model)
    model = model.to(device)
    # weights_init(model)

    embedding_layer_name = 'module.embedding.embedding_layer.weight'
    for name in model.state_dict().keys():
        if 'embedding_layer.weight' in name:
            embedding_layer_name = name
            break
    init_embedding_weight = model.state_dict()[embedding_layer_name].clone()

    task_criterion = CrossEntropyLoss(
        weight=torch.tensor([0.2, 0.8]).to(device)).to(device)
    gate_criterion = gate_Loss().to(device)
    embedding_criterion = Embedding_reg_L21_Loss(c=0.01).to(device)
    all_criterion = [task_criterion, gate_criterion, embedding_criterion]

    # optimizer
    optimizer_choose = global_config['train']['optimizer']
    optimizer_lr = global_config['train']['learning_rate']
    optimizer_eps = float(global_config['train']['eps'])
    optimizer_param = filter(lambda p: p.requires_grad, model.parameters())

    if optimizer_choose == 'adamax':
        optimizer = optim.Adamax(optimizer_param)
    elif optimizer_choose == 'adadelta':
        optimizer = optim.Adadelta(optimizer_param)
    elif optimizer_choose == 'adam':
        optimizer = optim.Adam(optimizer_param,
                               lr=optimizer_lr,
                               eps=optimizer_eps)
    elif optimizer_choose == 'sgd':
        optimizer = optim.SGD(optimizer_param, lr=optimizer_lr)
    else:
        raise ValueError('optimizer "%s" in config file not recoginized' %
                         optimizer_choose)

    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=0.2,
                                  patience=5,
                                  verbose=True)

    # check if exist model weight
    weight_path = global_config['data']['model_path']
    if os.path.exists(weight_path) and global_config['train']['continue']:
        logger.info('loading existing weight............')
        if enable_cuda:
            weight = torch.load(
                weight_path, map_location=lambda storage, loc: storage.cuda())
        else:
            weight = torch.load(weight_path,
                                map_location=lambda storage, loc: storage)
        # weight = pop_dict_keys(weight, ['pointer', 'init_ptr_hidden'])  # partial initial weight
        # todo 之后的版本可能不需要这句了
        if not global_config['train']['keep_embedding']:
            del weight[
                'module.embedding.embedding_layer.weight']  #删除掉embedding层的参数 ,避免尺寸不对的问题
        # # 删除全连接层的参数
        # decision_layer_names=[]
        # for name,w in weight.items():
        #     if 'decision_layer' in name:
        #         decision_layer_names.append(name)
        # for name in decision_layer_names:
        #     del weight[name]
        model.load_state_dict(weight, strict=False)

    # training arguments
    logger.info('start training............................................')
    train_batch_size = global_config['train']['batch_size']
    valid_batch_size = global_config['train']['valid_batch_size']
    test_batch_size = global_config['train']['test_batch_size']

    batch_train_data = dataset.get_dataloader_train(train_batch_size,
                                                    shuffle=False)
    batch_dev_data = dataset.get_dataloader_dev(valid_batch_size,
                                                shuffle=False)
    batch_test_data = dataset.get_dataloader_test(test_batch_size,
                                                  shuffle=False)

    clip_grad_max = global_config['train']['clip_grad_norm']
    enable_char = False
    # tensorboardX writer

    save_cur_experiment_code_path = "savedcodes/" + experiment_info
    save_current_codes(save_cur_experiment_code_path, global_config)

    tensorboard_writer = SummaryWriter(
        log_dir=os.path.join('tensorboard_logdir', experiment_info))

    best_valid_acc = None
    # every epoch
    for epoch in range(global_config['train']['epoch']):
        # train
        model.train()  # set training = True, make sure right dropout
        train_avg_loss, train_avg_binary_acc = train_on_model(
            model=model,
            criterion=all_criterion,
            optimizer=optimizer,
            batch_data=batch_train_data,
            epoch=epoch,
            clip_grad_max=clip_grad_max,
            device=device,
            thread_queue=thread_queue)

        # evaluate
        with torch.no_grad():
            model.eval()  # let training = False, make sure right dropout
            val_avg_loss, val_avg_binary_acc, val_avg_problem_acc = eval_on_model(
                model=model,
                criterion=all_criterion,
                batch_data=batch_dev_data,
                epoch=epoch,
                device=device,
                init_embedding_weight=init_embedding_weight,
                eval_dataset='dev')

            # test_avg_loss, test_avg_binary_acc, test_avg_problem_acc=eval_on_model(model=model,
            #                                                                       criterion=all_criterion,
            #                                                                       batch_data=batch_test_data,
            #                                                                       epoch=epoch,
            #                                                                       device=device,
            #                                                                       enable_char=enable_char,
            #                                                                       batch_char_func=dataset.gen_batch_with_char,
            #                                                                       init_embedding_weight=init_embedding_weight)

        # save model when best f1 score
        if best_valid_acc is None or val_avg_problem_acc > best_valid_acc:
            epoch_info = 'epoch=%d, val_binary_acc=%.4f, val_problem_acc=%.4f' % (
                epoch, val_avg_binary_acc, val_avg_problem_acc)
            save_model(
                model,
                epoch_info=epoch_info,
                model_weight_path=global_config['data']['model_weight_dir'] +
                experiment_info + "_model_weight.pt",
                checkpoint_path=global_config['data']['checkpoint_path'] +
                experiment_info + "_save.log")
            logger.info("=========  saving model weight on epoch=%d  =======" %
                        epoch)
            best_valid_acc = val_avg_problem_acc

        tensorboard_writer.add_scalar("train/lr",
                                      optimizer.param_groups[0]['lr'], epoch)
        tensorboard_writer.add_scalar("train/avg_loss", train_avg_loss, epoch)
        tensorboard_writer.add_scalar("train/binary_acc", train_avg_binary_acc,
                                      epoch)
        tensorboard_writer.add_scalar("val/avg_loss", val_avg_loss, epoch)
        tensorboard_writer.add_scalar("val/binary_acc", val_avg_binary_acc,
                                      epoch)
        tensorboard_writer.add_scalar("val/problem_acc", val_avg_problem_acc,
                                      epoch)

        #  adjust learning rate
        scheduler.step(train_avg_loss)

    logger.info('finished.................................')
    tensorboard_writer.close()
Exemplo n.º 9
0
def train_on_model(model, criterion, optimizer, batch_data, epoch,
                   clip_grad_max, device, thread_queue):
    """
    train on every batch
    :param enable_char:
    :param batch_char_func:
    :param model:
    :param criterion:
    :param batch_data:
    :param optimizer:
    :param epoch:
    :param clip_grad_max:
    :param device:
    :return:
    """
    global test_epoch
    epoch_loss = AverageMeter()
    epoch_binary_acc = AverageMeter()
    batch_cnt = len(batch_data)
    for i, batch in enumerate(batch_data, 0):
        optimizer.zero_grad()
        # batch data
        # bat_context, bat_question, bat_context_char, bat_question_char, bat_answer_range = batch_char_func(batch, enable_char=enable_char, device=device)
        contents, question_ans, sample_labels, sample_ids, sample_categorys, sample_logics = batch
        contents = contents.to(device)
        question_ans = question_ans.to(device)
        sample_labels = sample_labels.to(device)
        sample_logics = sample_logics.to(device)
        # contents:batch_size*10*200,  question_ans:batch_size*100  ,sample_labels=batchsize
        # forward
        pred_labels = model.forward(
            contents, question_ans,
            sample_logics)  # pred_labels size=(batch,2)
        # pred_labels=model_output[0:model_output.size()[0]-1]
        # mean_gate_val=model_output[-1][0][0]

        # get task loss
        task_loss = criterion[0].forward(pred_labels, sample_labels)

        #gate_loss
        # gate_loss=criterion[1].forward(mean_gate_val)
        gate_loss = 0

        # # embedding regularized loss
        embedding_loss = criterion[2].forward(
            model.embedding.embedding_layer.weight, init_embedding_weight)
        # embedding_loss=0
        loss = task_loss + gate_loss + embedding_loss

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       clip_grad_max)  # fix gradient explosion
        optimizer.step()  # update parameters

        # logging
        batch_loss = loss.item()
        epoch_loss.update(batch_loss, len(sample_ids))

        binary_acc = compute_binary_accuracy(pred_labels.data,
                                             sample_labels.data)
        # problem_acc = compute_problems_accuracy(pred_labels.data, sample_labels.data, sample_ids)

        epoch_binary_acc.update(binary_acc.item(), len(sample_ids))
        # epoch_problem_acc.update(problem_acc.item(), int(len(sample_ids) / 5))

        logger.info(
            'epoch=%d, batch=%d/%d, embedding_loss=%.5f  loss=%.5f binary_acc=%.4f '
            % (epoch, i, batch_cnt, embedding_loss, batch_loss, binary_acc))

        #线程间通信,用于存放时间
        if thread_queue.qsize() != 0:
            thread_queue.queue.clear()
        thread_queue.put(time.time())

        # manual release memory, todo: really effect?
        del contents, question_ans, sample_labels, sample_ids
        del pred_labels, loss

        if i % 200 == 0:
            model.eval()
            with torch.no_grad():
                test_avg_loss, test_binary_acc, test_avg_problem_acc = eval_on_model(
                    model=model,
                    criterion=criterion,
                    batch_data=batch_test_data,
                    epoch=test_epoch,
                    device=device,
                    init_embedding_weight=init_embedding_weight,
                    eval_dataset='test')
                tensorboard_writer.add_scalar("test/avg_loss", test_avg_loss,
                                              test_epoch)
                tensorboard_writer.add_scalar("test/binary_acc",
                                              test_binary_acc, test_epoch)
                tensorboard_writer.add_scalar("test/problem_acc",
                                              test_avg_problem_acc, test_epoch)
                test_epoch += 1
            model.train()

    logger.info(
        '===== epoch=%d, batch_count=%d, epoch_average_loss=%.5f, avg_binary_acc=%.4f ===='
        % (epoch, batch_cnt, epoch_loss.avg, epoch_binary_acc.avg))

    return epoch_loss.avg, epoch_binary_acc.avg