Exemplo n.º 1
0
def evaluate(args):
    # file paths
    model_file = os.path.join(args['save_dir'], args['save_name']) if args['save_name'] is not None \
        else '{}/{}_nertagger.pt'.format(args['save_dir'], args['shorthand'])

    loaded_args, trainer, vocab = load_model(args, model_file)

    # load data
    logger.info("Loading data with batch size {}...".format(args['batch_size']))
    doc = Document(json.load(open(args['eval_file'])))
    batch = DataLoader(doc, args['batch_size'], loaded_args, vocab=vocab, evaluation=True)
    utils.warn_missing_tags([i for i in trainer.vocab['tag']], batch.tags, "eval_file")

    logger.info("Start evaluation...")
    preds = []
    for i, b in enumerate(batch):
        preds += trainer.predict(b)

    gold_tags = batch.tags
    _, _, score = scorer.score_by_entity(preds, gold_tags)
    _, _, _, confusion = scorer.score_by_token(preds, gold_tags)

    logger.info("NER tagger score:")
    logger.info("{} {:.2f}".format(args['shorthand'], score*100))
    logger.info("NER token confusion matrix:\n{}".format(format_confusion(confusion)))
Exemplo n.º 2
0
def test_ner_scorer():
    pred_sequences = [['O', 'S-LOC', 'O', 'O', 'B-PER', 'E-PER'],
                    ['O', 'S-MISC', 'O', 'E-ORG', 'O', 'B-PER', 'I-PER', 'E-PER']]
    gold_sequences = [['O', 'B-LOC', 'E-LOC', 'O', 'B-PER', 'E-PER'],
                    ['O', 'S-MISC', 'B-ORG', 'E-ORG', 'O', 'B-PER', 'E-PER', 'S-LOC']]
    
    token_p, token_r, token_f = score_by_token(pred_sequences, gold_sequences)
    assert pytest.approx(token_p, abs=0.00001) == 0.625
    assert pytest.approx(token_r, abs=0.00001) == 0.5
    assert pytest.approx(token_f, abs=0.00001) == 0.55555

    entity_p, entity_r, entity_f = score_by_entity(pred_sequences, gold_sequences)
    assert pytest.approx(entity_p, abs=0.00001) == 0.4
    assert pytest.approx(entity_r, abs=0.00001) == 0.33333
    assert pytest.approx(entity_f, abs=0.00001) == 0.36363
Exemplo n.º 3
0
def evaluate(args):
    # file paths
    model_file = args['save_dir'] + '/' + args['save_name'] if args['save_name'] is not None \
            else '{}/{}_nertagger.pt'.format(args['save_dir'], args['shorthand'])

    # load model
    use_cuda = args['cuda'] and not args['cpu']
    trainer = Trainer(model_file=model_file, use_cuda=use_cuda)
    loaded_args, vocab = trainer.args, trainer.vocab

    # load config
    for k in args:
        if k.endswith('_dir') or k.endswith('_file') or k in [
                'shorthand', 'mode', 'scheme'
        ]:
            loaded_args[k] = args[k]

    # load data
    logger.info("Loading data with batch size {}...".format(
        args['batch_size']))
    doc = Document(json.load(open(args['eval_file'])))
    batch = DataLoader(doc,
                       args['batch_size'],
                       loaded_args,
                       vocab=vocab,
                       evaluation=True)

    logger.info("Start evaluation...")
    preds = []
    for i, b in enumerate(batch):
        preds += trainer.predict(b)

    gold_tags = batch.tags
    _, _, score = scorer.score_by_entity(preds, gold_tags)

    logger.info("NER tagger score:")
    logger.info("{} {:.2f}".format(args['shorthand'], score * 100))
Exemplo n.º 4
0
def train(args):
    utils.ensure_dir(args['save_dir'])
    model_file = args['save_dir'] + '/' + args['save_name'] if args['save_name'] is not None \
            else '{}/{}_nertagger.pt'.format(args['save_dir'], args['shorthand'])

    # load pretrained vectors
    if len(args['wordvec_file']) == 0:
        vec_file = utils.get_wordvec_file(args['wordvec_dir'],
                                          args['shorthand'])
    else:
        vec_file = args['wordvec_file']
    # do not save pretrained embeddings individually
    pretrain = Pretrain(None,
                        vec_file,
                        args['pretrain_max_vocab'],
                        save_to_file=False)

    if args['charlm']:
        if args['charlm_shorthand'] is None:
            logger.info(
                "CharLM Shorthand is required for loading pretrained CharLM model..."
            )
            sys.exit(0)
        logger.info('Use pretrained contextualized char embedding')
        args['charlm_forward_file'] = '{}/{}_forward_charlm.pt'.format(
            args['charlm_save_dir'], args['charlm_shorthand'])
        args['charlm_backward_file'] = '{}/{}_backward_charlm.pt'.format(
            args['charlm_save_dir'], args['charlm_shorthand'])

    # load data
    logger.info("Loading data with batch size {}...".format(
        args['batch_size']))
    train_doc = Document(json.load(open(args['train_file'])))
    train_batch = DataLoader(train_doc,
                             args['batch_size'],
                             args,
                             pretrain,
                             evaluation=False)
    vocab = train_batch.vocab
    dev_doc = Document(json.load(open(args['eval_file'])))
    dev_batch = DataLoader(dev_doc,
                           args['batch_size'],
                           args,
                           pretrain,
                           vocab=vocab,
                           evaluation=True)
    dev_gold_tags = dev_batch.tags

    # skip training if the language does not have training or dev data
    if len(train_batch) == 0 or len(dev_batch) == 0:
        logger.info("Skip training because no data available...")
        sys.exit(0)

    logger.info("Training tagger...")
    trainer = Trainer(args=args,
                      vocab=vocab,
                      pretrain=pretrain,
                      use_cuda=args['cuda'])
    logger.info(trainer.model)

    global_step = 0
    max_steps = args['max_steps']
    dev_score_history = []
    best_dev_preds = []
    current_lr = trainer.optimizer.param_groups[0]['lr']
    global_start_time = time.time()
    format_str = '{}: step {}/{}, loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}'

    # LR scheduling
    if args['lr_decay'] > 0:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, mode='max', factor=args['lr_decay'], \
            patience=args['patience'], verbose=True, min_lr=args['min_lr'])
    else:
        scheduler = None

    # start training
    train_loss = 0
    while True:
        should_stop = False
        for i, batch in enumerate(train_batch):
            start_time = time.time()
            global_step += 1
            loss = trainer.update(batch, eval=False)  # update step
            train_loss += loss
            if global_step % args['log_step'] == 0:
                duration = time.time() - start_time
                logger.info(format_str.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), global_step,\
                        max_steps, loss, duration, current_lr))

            if global_step % args['eval_interval'] == 0:
                # eval on dev
                logger.info("Evaluating on dev set...")
                dev_preds = []
                for batch in dev_batch:
                    preds = trainer.predict(batch)
                    dev_preds += preds
                _, _, dev_score = scorer.score_by_entity(
                    dev_preds, dev_gold_tags)

                train_loss = train_loss / args[
                    'eval_interval']  # avg loss per batch
                logger.info(
                    "step {}: train_loss = {:.6f}, dev_score = {:.4f}".format(
                        global_step, train_loss, dev_score))
                train_loss = 0

                # save best model
                if len(dev_score_history
                       ) == 0 or dev_score > max(dev_score_history):
                    trainer.save(model_file)
                    logger.info("New best model saved.")
                    best_dev_preds = dev_preds

                dev_score_history += [dev_score]
                logger.info("")

                # lr schedule
                if scheduler is not None:
                    scheduler.step(dev_score)

            # check stopping
            current_lr = trainer.optimizer.param_groups[0]['lr']
            if global_step >= args['max_steps'] or current_lr <= args['min_lr']:
                should_stop = True
                break

        if should_stop:
            break

        train_batch.reshuffle()

    logger.info("Training ended with {} steps.".format(global_step))

    best_f, best_eval = max(dev_score_history) * 100, np.argmax(
        dev_score_history) + 1
    logger.info("Best dev F1 = {:.2f}, at iteration = {}".format(
        best_f, best_eval * args['eval_interval']))
Exemplo n.º 5
0
def train(args):
    utils.ensure_dir(args['save_dir'])
    model_file = os.path.join(args['save_dir'], args['save_name']) if args['save_name'] is not None \
        else '{}/{}_nertagger.pt'.format(args['save_dir'], args['shorthand'])

    pretrain = None
    vocab = None
    trainer = None

    if args['finetune'] and args['finetune_load_name']:
        logger.warning('Finetune is ON. Using model from "{}"'.format(args['finetune_load_name']))
        _, trainer, vocab = load_model(args, args['finetune_load_name'])
    elif args['finetune'] and os.path.exists(model_file):
        logger.warning('Finetune is ON. Using model from "{}"'.format(model_file))
        _, trainer, vocab = load_model(args, model_file)
    else:
        if args['finetune']:
            raise FileNotFoundError('Finetune is set to true but model file is not found: {}'.format(model_file))

        # load pretrained vectors
        if args['wordvec_pretrain_file']:
            pretrain_file = args['wordvec_pretrain_file']
            pretrain = Pretrain(pretrain_file, None, args['pretrain_max_vocab'], save_to_file=False)
        else:
            if len(args['wordvec_file']) == 0:
                vec_file = utils.get_wordvec_file(args['wordvec_dir'], args['shorthand'])
            else:
                vec_file = args['wordvec_file']
            # do not save pretrained embeddings individually
            pretrain = Pretrain(None, vec_file, args['pretrain_max_vocab'], save_to_file=False)

        if pretrain is not None:
            word_emb_dim = pretrain.emb.shape[1]
            if args['word_emb_dim'] and args['word_emb_dim'] != word_emb_dim:
                logger.warning("Embedding file has a dimension of {}.  Model will be built with that size instead of {}".format(word_emb_dim, args['word_emb_dim']))
            args['word_emb_dim'] = word_emb_dim

        if args['charlm']:
            if args['charlm_shorthand'] is None:
                raise ValueError("CharLM Shorthand is required for loading pretrained CharLM model...")
            logger.info('Using pretrained contextualized char embedding')
            if not args['charlm_forward_file']:
                args['charlm_forward_file'] = '{}/{}_forward_charlm.pt'.format(args['charlm_save_dir'], args['charlm_shorthand'])
            if not args['charlm_backward_file']:
                args['charlm_backward_file'] = '{}/{}_backward_charlm.pt'.format(args['charlm_save_dir'], args['charlm_shorthand'])

    # load data
    logger.info("Loading data with batch size {}...".format(args['batch_size']))
    train_doc = Document(json.load(open(args['train_file'])))
    train_batch = DataLoader(train_doc, args['batch_size'], args, pretrain, vocab=vocab, evaluation=False)
    vocab = train_batch.vocab
    dev_doc = Document(json.load(open(args['eval_file'])))
    dev_batch = DataLoader(dev_doc, args['batch_size'], args, pretrain, vocab=vocab, evaluation=True)
    dev_gold_tags = dev_batch.tags

    if args['finetune']:
        utils.warn_missing_tags([i for i in trainer.vocab['tag']], train_batch.tags, "training set")
    utils.warn_missing_tags(train_batch.tags, dev_batch.tags, "dev set")

    # skip training if the language does not have training or dev data
    if len(train_batch) == 0 or len(dev_batch) == 0:
        logger.info("Skip training because no data available...")
        sys.exit(0)

    logger.info("Training tagger...")
    if trainer is None: # init if model was not loaded previously from file
        trainer = Trainer(args=args, vocab=vocab, pretrain=pretrain, use_cuda=args['cuda'],
                          train_classifier_only=args['train_classifier_only'])
    logger.info(trainer.model)

    global_step = 0
    max_steps = args['max_steps']
    dev_score_history = []
    best_dev_preds = []
    current_lr = trainer.optimizer.param_groups[0]['lr']
    global_start_time = time.time()
    format_str = '{}: step {}/{}, loss = {:.6f} ({:.3f} sec/batch), lr: {:.6f}'

    # LR scheduling
    if args['lr_decay'] > 0:
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(trainer.optimizer, mode='max', factor=args['lr_decay'], \
            patience=args['patience'], verbose=True, min_lr=args['min_lr'])
    else:
        scheduler = None

    # start training
    train_loss = 0
    while True:
        should_stop = False
        for i, batch in enumerate(train_batch):
            start_time = time.time()
            global_step += 1
            loss = trainer.update(batch, eval=False) # update step
            train_loss += loss
            if global_step % args['log_step'] == 0:
                duration = time.time() - start_time
                logger.info(format_str.format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), global_step,\
                        max_steps, loss, duration, current_lr))

            if global_step % args['eval_interval'] == 0:
                # eval on dev
                logger.info("Evaluating on dev set...")
                dev_preds = []
                for batch in dev_batch:
                    preds = trainer.predict(batch)
                    dev_preds += preds
                _, _, dev_score = scorer.score_by_entity(dev_preds, dev_gold_tags)

                train_loss = train_loss / args['eval_interval'] # avg loss per batch
                logger.info("step {}: train_loss = {:.6f}, dev_score = {:.4f}".format(global_step, train_loss, dev_score))
                train_loss = 0

                # save best model
                if len(dev_score_history) == 0 or dev_score > max(dev_score_history):
                    trainer.save(model_file)
                    logger.info("New best model saved.")
                    best_dev_preds = dev_preds

                dev_score_history += [dev_score]
                logger.info("")

                # lr schedule
                if scheduler is not None:
                    scheduler.step(dev_score)
            
            # check stopping
            current_lr = trainer.optimizer.param_groups[0]['lr']
            if global_step >= args['max_steps'] or current_lr <= args['min_lr']:
                should_stop = True
                break

        if should_stop:
            break

        train_batch.reshuffle()

    logger.info("Training ended with {} steps.".format(global_step))

    if len(dev_score_history) > 0:
        best_f, best_eval = max(dev_score_history)*100, np.argmax(dev_score_history)+1
        logger.info("Best dev F1 = {:.2f}, at iteration = {}".format(best_f, best_eval * args['eval_interval']))