示例#1
0

def to_seq(*args):
    data = []
    for x in args:
        x = x[:(len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data(
    )
    X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, n_class)
    clf.fit(X_train,
            Y_train,
            val_data=(X_test, Y_test),
            keep_prob=0.8,
            n_epoch=5,
            batch_size=BATCH_SIZE)
    y_pred = clf.predict(X_test, batch_size=BATCH_SIZE)
    final_acc = (y_pred == Y_test.ravel()).mean()
    print("final testing accuracy: %.4f" % final_acc)

    idx2tag = {idx: tag for tag, idx in tag2idx.items()}
    labels = clf.infer([word2idx[w] for w in sample])
    print(' '.join(sample))
    print(' '.join([idx2tag[idx] for idx in labels]))
示例#2
0
sample = '我来到大学读书,希望学到知识'
py = int(sys.version[0])


def to_seq(*args):
    data = []
    for x in args:
        x = x[:(len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data(
    )
    X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test)
    print('Vocab size: %d' % vocab_size)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS)
    clf.fit(X_train, Y_train, val_data=(X_test, Y_test), n_epoch=N_EPOCH)

    chars = list(sample) if py == 3 else list(sample.decode('utf-8'))
    labels = clf.infer([char2idx[c] for c in chars])
    res = ''
    for i, l in enumerate(labels):
        c = sample[i] if py == 3 else sample.decode('utf-8')[i]
        if l == 2 or l == 3:
            c += ' '
        res += c
    print(res)
示例#3
0
from birnn_crf_clf import BiRNN_CRF


SEQ_LEN = 20
BATCH_SIZE = 32
sample = ['I', 'love', 'you']


def to_seq(*args):
    data = []
    for x in args:
        x = x[: (len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, n_class, word2idx, tag2idx = pos.load_data()
    X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test)
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, n_class)
    clf.fit(X_train, Y_train, val_data=(X_test, Y_test), keep_prob=0.8, n_epoch=5, batch_size=BATCH_SIZE)
    y_pred = clf.predict(X_test, batch_size=BATCH_SIZE)
    final_acc = (y_pred == Y_test.ravel()).mean()
    print("final testing accuracy: %.4f" % final_acc)

    idx2tag = {idx : tag for tag, idx in tag2idx.items()}
    labels = clf.infer([word2idx[w] for w in sample])
    print(' '.join(sample))
    print(' '.join([idx2tag[idx] for idx in labels]))
示例#4
0
N_EPOCH = 1
sample = '我来到大学读书,希望学到知识'
py = int(sys.version[0])


def to_seq(*args):
    data = []
    for x in args:
        x = x[: (len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data()
    X_train, X_test, Y_train, Y_test = to_seq(x_train, x_test, y_train, y_test)
    print('Vocab size: %d' % vocab_size)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS)
    clf.fit(X_train, Y_train, val_data=(X_test, Y_test), n_epoch=N_EPOCH)
    
    chars = list(sample) if py == 3 else list(sample.decode('utf-8'))
    labels = clf.infer([char2idx[c] for c in chars])
    res = ''
    for i, l in enumerate(labels):
        c = sample[i] if py == 3 else sample.decode('utf-8')[i]
        if l == 2 or l == 3:
            c += ' '
        res += c
    print(res)
    
示例#5
0
        x = x[:(len(x) - len(x) % SEQ_LEN)]
        data.append(np.reshape(x, [-1, SEQ_LEN]))
    return data


def iter_seq(x, text_iter_step=5):
    return np.array(
        [x[i:i + SEQ_LEN] for i in range(0,
                                         len(x) - SEQ_LEN, text_iter_step)])


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data(
    )
    X_train, Y_train = to_train_seq(x_train, y_train)
    X_test, Y_test = to_test_seq(x_test, y_test)
    print('Vocab size: %d' % vocab_size)

    clf = BiRNN_CRF(SEQ_LEN, vocab_size, N_CLASS)
    clf.fit(X_train, Y_train, n_epoch=N_EPOCH, batch_size=BATCH_SIZE)

    chars = list(sample) if py == 3 else list(sample.decode('utf-8'))
    labels = clf.infer([char2idx[c] for c in chars])
    res = ''
    for i, l in enumerate(labels):
        c = sample[i] if py == 3 else sample.decode('utf-8')[i]
        if l == 2 or l == 3:
            c += ' '
        res += c
    print(res)
示例#6
0

def iter_seq(x, text_iter_step=10):
    return np.array(
        [x[i:i + SEQ_LEN] for i in range(0,
                                         len(x) - SEQ_LEN, text_iter_step)])


if __name__ == '__main__':
    x_train, y_train, x_test, y_test, vocab_size, char2idx, idx2char = chseg.load_data(
    )
    X_train, Y_train = to_train_seq(x_train, y_train)
    X_test, Y_test = to_test_seq(x_test, y_test)
    print('Vocab size: %d' % vocab_size)

    clf = BiRNN_CRF(vocab_size, N_CLASS)
    clf.fit(X_train, Y_train, n_epoch=N_EPOCH, batch_size=BATCH_SIZE)

    y_pred = clf.predict(X_test, batch_size=BATCH_SIZE)
    print(
        classification_report(Y_test.ravel(),
                              y_pred.ravel(),
                              target_names=['B', 'M', 'E', 'S']))

    chars = list(sample) if py == 3 else list(sample.decode('utf-8'))
    labels = clf.infer([char2idx[c] for c in chars])
    res = ''
    for i, l in enumerate(labels):
        c = sample[i] if py == 3 else sample.decode('utf-8')[i]
        if l == 2 or l == 3:
            c += ' '