Exemplo n.º 1
0
def evaluate_config():

    # args = parser.parse_args()
    data_dir = '/home/deepracer/DeepRacer/AI_Samurai/Esun_AML/NER_BERT_pytorch/temp'
    bert_model_dir = '/home/deepracer/DeepRacer/AI_Samurai/Esun_AML/NER_BERT_pytorch/bert-base-chinese-pytorch'
    model_dir = '/home/deepracer/DeepRacer/AI_Samurai/Esun_AML/NER_BERT_pytorch/experiments/base_model'

    seed = 23  # help="random seed for initialization"
    restore_file = 'best'  # help="name of the file in `model_dir` containing weights to load"
    multi_gpu = False  # action='store_true', help="Whether to use multiple GPUs if available"
    fp16 = False  # action='store_true', help="Whether to use 16-bit float precision instead of 32-bit"

    # Load the parameters from json file
    json_path = os.path.join(model_dir, 'params.json')
    assert os.path.isfile(
        json_path), "No json configuration file found at {}".format(json_path)
    params = Params(json_path)

    # Use GPUs if available
    params.device = torch.device(
        'cuda' if torch.cuda.is_available() else 'cpu')
    params.n_gpu = torch.cuda.device_count()
    params.multi_gpu = multi_gpu

    # Set the random seed for reproducible experiments
    random.seed(seed)
    torch.manual_seed(seed)
    if params.n_gpu > 0:
        torch.cuda.manual_seed_all(seed)  # set random seed for all GPUs
    params.seed = seed

    # Set the logger
    set_logger(os.path.join(model_dir, 'evaluate.log'))

    # Create the input data pipeline
    logging.info("Loading the dataset...")

    # Initialize the DataLoader
    data_loader = DataLoader(data_dir, bert_model_dir, params, token_pad_idx=0)

    logging.info("- done.")

    # Define the model
    config_path = os.path.join(bert_model_dir, 'bert_config.json')
    config = BertConfig.from_json_file(config_path)
    model = BertForTokenClassification(config, num_labels=len(params.tag2idx))

    model.to(params.device)
    # Reload weights from the saved file
    load_checkpoint(os.path.join(model_dir, restore_file + '.pth.tar'), model)
    if fp16:
        model.half()
    if params.n_gpu > 1 and multi_gpu:
        model = torch.nn.DataParallel(model)

    return data_loader, model, params
    # Initialize the DataLoader
    data_loader = DataLoader(args.data_dir, args.bert_model_dir, params, token_pad_idx=0)

    # Load data
    test_data = data_loader.load_data('test')

    # Specify the test set size
    params.test_size = test_data['size']
    params.eval_steps = params.test_size // params.batch_size
    test_data_iterator = data_loader.data_iterator(test_data, shuffle=False)

    logging.info("- done.")

    # Define the model
    config_path = os.path.join(args.bert_model_dir, 'bert_config.json')
    config = BertConfig.from_json_file(config_path)
    model = BertForTokenClassification(config, num_labels=len(params.tag2idx))

    model.to(params.device)
    # Reload weights from the saved file
    utils.load_checkpoint(os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model)
    if args.fp16:
        model.half()
    if params.n_gpu > 1 and args.multi_gpu:
        model = torch.nn.DataParallel(model)

    logging.info("Starting evaluation...")
    test_metrics = evaluate(model, test_data_iterator, params, mark='Test', verbose=True)