def test_naive_model(model: MultinomialNB, vocabulary: CountVectorizer):
    # Get data, ignore train and test
    _, _, (x_test, y_test) = sentiment_140.load_data()

    starttime = time.time()
    x_test = vocabulary.transform(x_test)

    acc = model.score(x_test, y_test)
    endtime = time.time()

    return acc, endtime - starttime
def test_keras_model(model: Sequential, tokenizer, arguments):
    """Test model on sentiment140 dataset. Both accuracy and throughput."""

    if arguments["maxlen"]:
        maxlen = arguments["maxlen"]
    else:
        maxlen = 0

    # Get data, ignore train and test
    _, _, (x_test, y_test) = sentiment_140.load_data()

    starttime = time.time()
    x_test = tokenizer.texts_to_sequences(x_test)

    x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

    score, acc = model.evaluate(x_test, y_test)
    endtime = time.time()

    return score, acc, endtime - starttime
# Convolution
kernel_size = 5
filters = 64
pool_size = 4

# RNN
rnn_output_size = 70

# Training
batch_size = 512
epochs = 5

print('Loading data...')
(x_train, y_train), (x_val, y_val), (x_test,
                                     y_test) = sentiment_140.load_data()

print('Fitting tokenizer...')
tokenizer = Tokenizer()
tokenizer.fit_on_texts(np.concatenate((x_train, x_val, x_test)))

print('Convert text to sequences')
x_train = tokenizer.texts_to_sequences(x_train)
x_val = tokenizer.texts_to_sequences(x_val)
x_test = tokenizer.texts_to_sequences(x_test)

print(len(x_train), 'train sequences')
print(len(x_val), 'validation sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
Пример #4
0
# Convolution
kernel_size = 5
filters = 96
pool_size = 4

# RNN
rnn_output_size = 70

# Training
batch_size = 512
epochs = 5


print('Loading data...')
(x_train, y_train), (x_val, y_val), (x_test, y_test) = sentiment_140.load_data()

print('Fitting tokenizer...')
tokenizer = Tokenizer()
tokenizer.fit_on_texts(np.concatenate((x_train, x_val, x_test)))

print('Convert text to sequences')
x_train = tokenizer.texts_to_sequences(x_train)
x_val = tokenizer.texts_to_sequences(x_val)
x_test = tokenizer.texts_to_sequences(x_test)

print('Pad sequences (samples x time)')

x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = sequence.pad_sequences(x_val, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)