Пример #1
0
    criterion = nn.NLLLoss()

    optimizer = radam.RAdam(model.parameters(), lr=learning_rate) 
    print(model)
    
    total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('The number of trainable parameters is: %d' % (total_trainable_params))



    # train the model
    if skip_training == False:
        train(model, word_num_layers, char_num_layers, morph_num_layers, num_epochs, pairs_batch_train, pairs_batch_dev, word_hidden_size, 
            char_hidden_size, morph_hidden_size, batch_size, criterion, optimizer, patience, device)
        model.load_state_dict(torch.load('weights/model_lower.pt'))
    else:
        model.load_state_dict(torch.load('weights/model_lower.pt'))

    model.eval()

    batch_size = 1

 
    print('\nTEST DATA \n')
    all_predicted, all_true = evaluate.get_predictions(data_test, model, word_num_layers, char_num_layers, morph_num_layers, word_hidden_size, 
                                                        char_hidden_size, morph_hidden_size, batch_size, device)
    evaluate.print_scores(all_predicted, all_true, tag2idx)


    print('\nWIKI DATA \n')
Пример #2
0
    indexed_morph = morph_to_idx(whole_data, morph2idx, word2morph)
    indexed_whole_data = combine_data(indexed_data, indexed_char,
                                      indexed_morph, MAX_SEQ_LENGTH)

    # initialize the model
    model = NERModel(word_embedding_dim, char_embedding_dim,
                     morph_embedding_dim, word_hidden_size,
                     char_hidden_size, morph_hidden_size, len(char2idx),
                     len(morph2idx), num_tags, word_num_layers,
                     char_num_layers, morph_num_layers,
                     dropout_prob).to(device)

    # load the model
    if lowercase_model == False:
        model.load_state_dict(
            torch.load('weights/model_upper.pt',
                       map_location=torch.device('cpu')))
    else:
        model.load_state_dict(
            torch.load('weights/model_lower.pt',
                       map_location=torch.device('cpu')))

    model.eval()
    batch_size = 1

    print('Processing the document')
    evaluate_document(output_path, word_num_layers, char_num_layers,
                      morph_num_layers, word_hidden_size, char_hidden_size,
                      morph_hidden_size, batch_size, indexed_whole_data,
                      whole_data, model, device)
    print('Done')
Пример #3
0
            if arr[i][j] < 3:
                arr[i][j] = enc_tags.inverse_transform([arr[i][j]])[0]
            elif arr[i][j] == 3:
                arr[i][j] = 'X'
            else:
                raise KeyError(str(arr[i][j])+' as key not found in Label Encoder ')
    return arr

if __name__ == "__main__":
    my_parser = argparse.ArgumentParser()
    my_parser.version = '1.0'
    my_parser.add_argument('-g','--grouped_entities', action='store_true',help='if used, evaluate all metrics on exact entity-level matching, instead of just wordpiece-level tokens ')
    args = my_parser.parse_args()
    grouped_entities = args.grouped_entities

    meta_data = joblib.load(config.METADATA_PATH)
    enc_tags = meta_data['enc_tags']

    num_tags = len(list(enc_tags.classes_))
    sentences, tags = preprocess_data(enc_tags)
    test_dataloader = get_dataloader(sentences, tags)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = NERModel(num_tags)
    model.load_state_dict(torch.load(config.MODEL_PATH,map_location=device))
    tags_ypred, tags_ytrue = evaluate(test_dataloader, model, device, num_tags, grouped_entities=grouped_entities)
    # tags_ypred = enc_tags.inverse_transform(tags_ypred)
    # tags_ytrue = enc_tags.inverse_transform(tags_ytrue)
    tags_ypred = decode_transform(tags_ypred, enc_tags)
    tags_ytrue = decode_transform(tags_ytrue, enc_tags)
    # print(tags_ytrue,tags_ypred)
    print(seqeval_classification_report(tags_ytrue, tags_ypred))