Exemplo n.º 1
0
        l_set = functools.reduce(lambda x, y: x | y, map(lambda t: set(t), test_labels), l_set)

    print('constructing dataset')
    dataset, dataset_onlycrf = utils.construct_bucket_mean_vb_wc(train_features, train_labels, CRF_l_map, SCRF_l_map, c_map, f_map, SCRF_stop_tag=SCRF_l_map['<STOP>'], ALLOW_SPANLEN=args.allowspan, train_set=True)
    dev_dataset = utils.construct_bucket_mean_vb_wc(dev_features, dev_labels, CRF_l_map, SCRF_l_map, c_map, f_map, SCRF_stop_tag=SCRF_l_map['<STOP>'], train_set=False)
    test_dataset = utils.construct_bucket_mean_vb_wc(test_features, test_labels, CRF_l_map, SCRF_l_map, c_map, f_map, SCRF_stop_tag=SCRF_l_map['<STOP>'], train_set=False)

    dataset_loader = [torch.utils.data.DataLoader(tup, args.batch_size, shuffle=True, drop_last=False) for tup in dataset]
    dataset_loader_crf = [torch.utils.data.DataLoader(tup, 3, shuffle=True, drop_last=False) for tup in dataset_onlycrf] if dataset_onlycrf else None
    dev_dataset_loader = [torch.utils.data.DataLoader(tup, 50, shuffle=False, drop_last=False) for tup in dev_dataset]
    test_dataset_loader = [torch.utils.data.DataLoader(tup, 50, shuffle=False, drop_last=False) for tup in test_dataset]

    print('building model')
    model = ner_model(args.word_embedding_dim, args.word_hidden_dim, args.word_lstm_layers, len(f_map),
                      len(c_map), args.char_embedding_dim, args.char_lstm_hidden_dim, args.cnn_filter_num,
                      args.char_lstm_layers, args.char_lstm, args.dropout_ratio, args.high_way, args.highway_layers,
                      CRF_l_map['<start>'], CRF_l_map['<pad>'], len(CRF_l_map), SCRF_l_map, args.scrf_dense_dim,
                      in_doc_words,args.index_embeds_dim, args.allowspan, SCRF_l_map['<START>'], SCRF_l_map['<STOP>'], args.grconv)

    if args.load_check_point:
        model.load_state_dict(checkpoint_file['state_dict'])
    else:
        model.word_rep.load_pretrained_word_embedding(embedding_tensor)
        model.word_rep.rand_init()

    optimizer = optim.SGD(model.parameters(),
                           lr=args.lr, momentum=args.momentum)
    # optimizer = optim.Adam(model.parameters())

    if args.load_check_point and args.load_opt:
        optimizer.load_state_dict(checkpoint_file['optimizer'])
Exemplo n.º 2
0
    dev_dataset_loader = [
        torch.utils.data.DataLoader(tup, 50, shuffle=False, drop_last=False)
        for tup in dev_dataset
    ]
    test_dataset_loader = [
        torch.utils.data.DataLoader(tup, 50, shuffle=False, drop_last=False)
        for tup in test_dataset
    ]

    print('build model')
    model = ner_model(
        jd['word_embedding_dim'], jd['word_hidden_dim'],
        jd['word_lstm_layers'], len(f_map), len(c_map),
        jd['char_embedding_dim'], jd['char_lstm_hidden_dim'],
        jd['cnn_filter_num'], jd['char_lstm_layers'], jd['char_lstm'],
        jd['dropout_ratio'], jd['high_way'],
        jd['highway_layers'], CRF_l_map['<start>'], CRF_l_map['<pad>'],
        len(CRF_l_map), SCRF_l_map, jd['scrf_dense_dim'], in_doc_words,
        jd['index_embeds_dim'], jd['allowspan'], SCRF_l_map['<START>'],
        SCRF_l_map['<STOP>'], jd['grconv'])

    print('load model')
    model.load_state_dict(checkpoint_file['state_dict'])

    #model.cuda()
    model.to(local_device)
    packer = Repack()

    evaluator = evaluator(packer, CRF_l_map, SCRF_l_map)

    print('dev...')
Exemplo n.º 3
0

    dev_features, dev_labels = utils.read_corpus(dev_lines)
    test_features, test_labels = utils.read_corpus(test_lines)

    dev_dataset = utils.construct_bucket_mean_vb_wc(dev_features, dev_labels, CRF_l_map, SCRF_l_map, c_map, f_map, SCRF_stop_tag=SCRF_l_map['<STOP>'], train_set=False)
    test_dataset = utils.construct_bucket_mean_vb_wc(test_features, test_labels, CRF_l_map, SCRF_l_map, c_map, f_map, SCRF_stop_tag=SCRF_l_map['<STOP>'], train_set=False)

    dev_dataset_loader = [torch.utils.data.DataLoader(tup, 50, shuffle=False, drop_last=False) for tup in dev_dataset]
    test_dataset_loader = [torch.utils.data.DataLoader(tup, 50, shuffle=False, drop_last=False) for tup in test_dataset]

    print('build model')
    model = ner_model(jd['word_embedding_dim'], jd['word_hidden_dim'], jd['word_lstm_layers'],
                      len(f_map), len(c_map), jd['char_embedding_dim'], jd['char_lstm_hidden_dim'],
                      jd['cnn_filter_num'], jd['char_lstm_layers'], jd['char_lstm'],jd['dropout_ratio'],
                      jd['high_way'], jd['highway_layers'], CRF_l_map['<start>'], CRF_l_map['<pad>'],
                      len(CRF_l_map), SCRF_l_map, jd['scrf_dense_dim'], in_doc_words,
                      jd['index_embeds_dim'], jd['allowspan'], SCRF_l_map['<START>'], SCRF_l_map['<STOP>'],
                      jd['grconv'])

    print('load model')
    model.load_state_dict(checkpoint_file['state_dict'])

    model.cuda()
    packer = Repack()

    evaluator = evaluator(packer, CRF_l_map, SCRF_l_map)


    print('dev...')
    dev_f1_crf, dev_pre_crf, dev_rec_crf, dev_acc_crf, dev_f1_scrf, dev_pre_scrf, dev_rec_scrf, dev_acc_scrf, dev_f1_jnt, dev_pre_jnt, dev_rec_jnt, dev_acc_jnt = \