Пример #1
0
def test(args):
    print '\nNEURAL POS TAGGER START\n'

    print '\tINITIAL EMBEDDING\t%s %s' % (args.word_list, args.emb_list)
    print '\tWORD\t\t\tEmb Dim: %d  Hidden Dim: %d' % (args.w_emb_dim, args.w_hidden_dim)
    print '\tCHARACTER\t\tEmb Dim: %d  Hidden Dim: %d' % (args.c_emb_dim, args.c_hidden_dim)
    print '\tOPTIMIZATION\t\tMethod: %s  Learning Rate: %f\n' % (args.opt, args.lr)

    """ load vocab """
    print 'Loading vocabularies...\n'
    vocab_word = io_utils.load_data('vocab_word')
    vocab_char = io_utils.load_data('vocab_char')
    vocab_tag = io_utils.load_data('vocab_tag')
    print '\tWord size: %d  Char size: %d' % (vocab_word.size(), vocab_char.size())

    """ load data """
    print '\nLoading data set...\n'
    test_corpus, test_vocab_word, test_vocab_char, test_vocab_tag = io_utils.load_conll(args.dev_data)
    print '\tTest Sentences: %d' % len(test_corpus)

    """ converting into ids """
    print '\nConverting into IDs...\n'
    test_x, test_c, test_b, test_y = preprocessor.convert_into_ids(test_corpus, vocab_word, vocab_char, vocab_tag)

    """ tagger set up """
    tagger = io_utils.load_data(args.load)

    dev_f = theano.function(
        inputs=tagger.input[:-1],
        outputs=tagger.result,
        mode='FAST_RUN'
    )

    """ Prediction """
    print '\nPREDICTION START\n'

    print '\tBatch Index: ',
    start = time.time()

    total = 0.0
    correct = 0

    for index in xrange(len(test_x)):
        if index % 100 == 0 and index != 0:
            print index,
            sys.stdout.flush()

        if tagger.name == 'char':
            corrects = dev_f(test_x[index], test_c[index], test_b[index], test_y[index])
        else:
            corrects = dev_f(test_x[index], test_y[index])

        total += len(corrects)
        correct += np.sum(corrects)

    end = time.time()

    print '\n\tTime: %f seconds' % (end - start)
    print '\tAccuracy:%f  Total:%d  Correct:%d' % ((correct / total), total, correct)
def load_weights(model, pth):
	for layer in model.layers:
		wn = layer.weights
		for w in wn:
			vn = str(w).replace('/', '_')
			wv = load_data(pth, vn)[vn]
			w.set_value(wv)
Пример #3
0
def test(argv):
    print '\nSETTING UP A TEST SETTING\n'

    task = argv.task
    batch_size = argv.batch_size
    window = argv.window

    print '\tTASK: %s\tBATCH: %d\tWINDOW: %d' % (task, batch_size, window)

    ##############
    # LOAD FILES #
    ##############

    """ Load files """
    # corpus: 1D: n_sents, 2D: n_words, 3D: (word, pas_info, pas_id)
    vocab_word = io_utils.load_data(argv.load_vocab)

    if argv.dev_data:
        dev_corpus, _ = io_utils.load(argv.dev_data, vocab_word, False)
        print '\nDEV CORPUS'
        corpus_statistics(dev_corpus)

    if argv.test_data:
        test_corpus, _ = io_utils.load(argv.test_data, vocab_word, False)
        print '\nTEST CORPUS'
        corpus_statistics(test_corpus)

    print '\nVocab: %d' % vocab_word.size()

    ##############
    # PREPROCESS #
    ##############

    """ Preprocessing """
    # samples: 1D: n_sents, 2D: [word_ids, tag_ids, prd_indices, contexts]
    if argv.dev_data:
        dev_samples = sample_format(dev_corpus, vocab_word, window)
        n_dev_samples = len(dev_samples)

    if argv.test_data:
        test_samples = sample_format(test_corpus, vocab_word, window)
        n_te_samples = len(test_samples)

    # dataset = [x, y, l]
    # x=features: 1D: n_samples * n_words, 2D: window; elem=word id
    # y=labels: 1D: n_samples; elem=scalar
    # l=question length: 1D: n_samples * 2; elem=scalar
    # bb_x=batch indices for x: 1D: n_samples / batch_size + 1; elem=(bob, eob)
    # bb_y=batch indices for y: 1D: n_samples / batch_size + 1; elem=(bob, eob)

    if argv.dev_data:
        dev_dataset, dev_bb_x, dev_bb_y = theano_format(dev_samples, batch_size)

    if argv.test_data:
        te_dataset, te_bb_x, te_bb_y = theano_format(test_samples, batch_size)

    ######################
    # BUILD ACTUAL MODEL #
    ######################

    """ Set a model """
    print '\n\nBuilding a model...'
    model = io_utils.load_data(argv.load_model)

    if argv.dev_data:
        dev_f = set_predict_f(model, dev_dataset)
    if argv.test_data:
        test_f = set_predict_f(model, te_dataset)

    ########
    # TEST #
    ########

    if argv.dev_data:
        print '\n\tDEV\n\t',
        predict(dev_f, dev_bb_x, dev_bb_y, n_dev_samples)

    if argv.test_data:
        print '\n\tTEST\n\t',
        predict(test_f, te_bb_x, te_bb_y, n_te_samples)
Пример #4
0
            if accum_vote[i] == Validation_Label_List[current_ind]:
                running_corrects_vote += 1
            current_ind += n_samples[i]
        epoch_acc_vote = running_corrects_vote / len(n_samples)

        print('Epoch %d, Vote accuracy: %f' % (epoch, epoch_acc_vote))
    return epoch_acc_single, epoch_acc_vote, loss


# Define Input data:  (batch_size, seq_len, dims)
print('Loading. Please wait... It may take 2-3 minutes')
since = time.time()
SampleRHS = LoadinRHS(input_file)

Train_loader, _ = load_data(SampleRHS['Train_RHS_Sample'],
                            SampleRHS['Train_RHS_Label_Sample'], BATCH_SIZE,
                            True)
Validation_loader, Validation_Label_List = load_data(
    SampleRHS['Validation_RHS_Sample'],
    SampleRHS['Validation_RHS_Label_Sample'], 1, False)
n_samples_train, n_samples_val = SampleRHS['Train_NSamples'], SampleRHS[
    'Validation_NSamples']
LenTrain = len(SampleRHS['Train_RHS_Label_Sample'])
LenValidation = len(SampleRHS['Validation_RHS_Label_Sample'])
print('Number of Training data: ', LenTrain)
print('Number of Validation data: ', LenValidation)

# Hidden_dim is determined by the needs
model = LSTM(in_dim=2, hidden_dim=100, n_layer=1, n_class=NumOfCategory)
# print(model)
if use_gpu:
Пример #5
0
def test(argv):
    print '\nSETTING UP A TEST SETTING\n'

    task = argv.task
    batch_size = argv.batch_size
    window = argv.window

    print '\tTASK: %s\tBATCH: %d\tWINDOW: %d' % (task, batch_size, window)

    ##############
    # LOAD FILES #
    ##############
    """ Load files """
    # corpus: 1D: n_sents, 2D: n_words, 3D: (word, pas_info, pas_id)
    vocab_word = io_utils.load_data(argv.load_vocab)

    if argv.dev_data:
        dev_corpus, _ = io_utils.load(argv.dev_data, vocab_word, False)
        print '\nDEV CORPUS'
        corpus_statistics(dev_corpus)

    if argv.test_data:
        test_corpus, _ = io_utils.load(argv.test_data, vocab_word, False)
        print '\nTEST CORPUS'
        corpus_statistics(test_corpus)

    print '\nVocab: %d' % vocab_word.size()

    ##############
    # PREPROCESS #
    ##############
    """ Preprocessing """
    # samples: 1D: n_sents, 2D: [word_ids, tag_ids, prd_indices, contexts]
    if argv.dev_data:
        dev_samples = sample_format(dev_corpus, vocab_word, window)
        n_dev_samples = len(dev_samples)

    if argv.test_data:
        test_samples = sample_format(test_corpus, vocab_word, window)
        n_te_samples = len(test_samples)

    # dataset = [x, y, l]
    # x=features: 1D: n_samples * n_words, 2D: window; elem=word id
    # y=labels: 1D: n_samples; elem=scalar
    # l=question length: 1D: n_samples * 2; elem=scalar
    # bb_x=batch indices for x: 1D: n_samples / batch_size + 1; elem=(bob, eob)
    # bb_y=batch indices for y: 1D: n_samples / batch_size + 1; elem=(bob, eob)

    if argv.dev_data:
        dev_dataset, dev_bb_x, dev_bb_y = theano_format(
            dev_samples, batch_size)

    if argv.test_data:
        te_dataset, te_bb_x, te_bb_y = theano_format(test_samples, batch_size)

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    """ Set a model """
    print '\n\nBuilding a model...'
    model = io_utils.load_data(argv.load_model)

    if argv.dev_data:
        dev_f = set_predict_f(model, dev_dataset)
    if argv.test_data:
        test_f = set_predict_f(model, te_dataset)

    ########
    # TEST #
    ########

    if argv.dev_data:
        print '\n\tDEV\n\t',
        predict(dev_f, dev_bb_x, dev_bb_y, n_dev_samples)

    if argv.test_data:
        print '\n\tTEST\n\t',
        predict(test_f, te_bb_x, te_bb_y, n_te_samples)