Пример #1
0
from data_utils import json2features, get_predictions, get_context

app = Flask(__name__, static_url_path='')

parser = argparse.ArgumentParser()
parser.add_argument('--init_restore_dir', type=str, required=True)
parser.add_argument('--bert_config_file', type=str, required=True)
parser.add_argument('--port', type=int, required=True)
parser.add_argument('--es_ip', type=str, required=True)
parser.add_argument('--es_port', type=int, required=True)

args = parser.parse_args()

print('init model...')
bert_config = BertConfig.from_json_file(args.bert_config_file)
model = BertForQuestionAnswering(bert_config)
tokenizer = tokenization.BertTokenizer(vocab_file=args.vocab_file,
                                       do_lower_case=True)
model.cuda()
utils.torch_show_all_params(model)
utils.torch_init_model(model, args.init_restore_dir)

es = Elasticsearch([args.es_ip], port=args.es_port)


def predict(model, eval_examples, eval_features):
    device = torch.device("cuda")
    print("***** Eval *****")
    RawResult = collections.namedtuple(
        "RawResult", ["unique_id", "start_logits", "end_logits"])
    output_prediction_file = './predictions.json'
Пример #2
0
    if not os.path.exists(args.test_dir1):
        json2features(input_file=args.test_file, output_files=[args.test_dir1, args.test_dir2],
                      tokenizer=tokenizer, is_training=False, repeat_limit=3, max_query_length=96,
                      max_seq_length=args.max_seq_length, doc_stride=128)
    test_examples = json.load(open(args.test_dir1, 'r'))
    test_features = json.load(open(args.test_dir2, 'r'))

    dev_steps_per_epoch = len(test_features) // args.n_batch
    if len(test_features) % args.n_batch != 0:
        dev_steps_per_epoch += 1

    # init model
    print('init model...')
    if 'albert' not in args.init_restore_dir:
        model = BertForQuestionAnswering(bert_config)
    else:
        if 'google' in args.init_restore_dir:
            model = AlbertForMRC(bert_config)
        else:
            model = ALBertForQA(bert_config, dropout_rate=args.dropout)
    utils.torch_show_all_params(model)
    utils.torch_init_model(model, args.init_restore_dir)
    if args.float16:
        model.half()
    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

    test(model, args, test_examples, test_features, device)
Пример #3
0
        best_f1, best_em = 0, 0
        with open(args.log_file, 'a') as aw:
            aw.write('===================================' + 'SEED:' +
                     str(seed_) + '===================================' + '\n')
        print('SEED:', seed_)

        random.seed(seed_)
        np.random.seed(seed_)
        torch.manual_seed(seed_)
        if n_gpu > 0:
            torch.cuda.manual_seed_all(seed_)

        # init model
        print('init model...')
        if 'albert' not in args.init_restore_dir:
            model = BertForQuestionAnswering(bert_config)
        else:
            if 'google' in args.init_restore_dir:
                model = AlbertForMRC(bert_config)
            else:
                model = ALBertForQA(bert_config, dropout_rate=args.dropout)
        utils.torch_show_all_params(model)
        utils.torch_init_model(model, args.init_restore_dir)
        if args.float16:
            model.half()
        model.to(device)
        if n_gpu > 1:
            model = torch.nn.DataParallel(model)
        optimizer = get_optimization(model=model,
                                     float16=args.float16,
                                     learning_rate=args.lr,