Exemplo n.º 1
0
def main():
    timer = Timer()
    timer.start('Load word2vec models...')
    vocab = load_vocab(config.VOCAB_DATA)
    embeddings = get_trimmed_w2v_vectors(config.W2V_DATA)
    timer.stop()

    timer.start('Load data...')
    train = process_data(opt.train, vocab)
    if opt.val is not None:
        if opt.val != '1vs9':
            validation = process_data(opt.val, vocab)
        else:
            validation, train = train.one_vs_nine()
    else:
        validation = None

    if opt.test is not None:
        test = process_data(opt.test, vocab)
    else:
        test = None
    timer.stop()

    timer.start('Build model...')
    model = CnnModel(embeddings=embeddings)
    model.build()
    timer.stop()

    timer.start('Train model...')
    epochs = opt.e
    batch_size = opt.b
    early_stopping = True if opt.p != 0 else False
    patience = opt.p
    pre_train = opt.pre if opt.pre != '' else None
    model_name = opt.name

    model.train(
        model_name,
        train=train,
        validation=validation,
        epochs=epochs,
        batch_size=batch_size,
        early_stopping=early_stopping,
        patience=patience,
        cont=pre_train,
    )
    timer.stop()

    if test is not None:
        timer.start('Test model...')
        preds = model.predict(test, model_name)
        labels = test.labels

        p, r, f1, _ = precision_recall_fscore_support(labels,
                                                      preds,
                                                      average='binary')
        print('Testing result:P=\t{}\tR={}\tF1={}'.format(p, r, f1))
        timer.stop()
Exemplo n.º 2
0
from model import CnnModel

CnnModel.train(
    vocab_dir='/Users/chengyiwu/GitHub/nlp/vocab.txt',
    categories=['正面', '负面', '中立'],
    save_dir='/Users/chengyiwu/GitHub/nlp/sentiment/textcnn',
    train_dir=
    '/Users/chengyiwu/GitHub/nlp2/text-classification-cnn-rnn-sentiment/data/cnews/cnews.train.txt',
    val_dir=
    '/Users/chengyiwu/GitHub/nlp2/text-classification-cnn-rnn-sentiment/data/cnews/cnews.val.txt',
    config=None,
    full=True,
    num_epochs=1)

CnnModel.train(
    vocab_dir='/Users/chengyiwu/GitHub/nlp/vocab.txt',
    categories=['正面', '负面', '中立'],
    save_dir='/Users/chengyiwu/GitHub/nlp/sentiment/textcnn',
    train_dir=
    '/Users/chengyiwu/GitHub/nlp2/text-classification-cnn-rnn-sentiment/data/cnews/cnews.train.txt',
    val_dir=
    '/Users/chengyiwu/GitHub/nlp2/text-classification-cnn-rnn-sentiment/data/cnews/cnews.val.txt',
    config=None,
    full=False,
    num_epochs=1)