Пример #1
0
def getPreds(df, out=None):
    # GPU won't work without the next three lines
    physical_devices = tf.config.list_physical_devices('GPU')
    if len(physical_devices) > 0:
        tf.config.experimental.set_memory_growth(physical_devices[0],
                                                 enable=True)
    dp = DataProcessor()

    test_articles = processData(df, ['body']).to_numpy()
    test_articles = list(map(lambda x: x[0], test_articles))

    test_articles_raw = df.to_numpy()
    test_articles_raw = list(map(lambda x: x[0], test_articles_raw))

    with open('./onion_tokenizer.pyc', 'rb') as pickleHand:
        tokenizer = pickle.load(pickleHand)
    assert isinstance(tokenizer, Tokenizer)

    seqs = test_articles
    max_len = dp.getMaxWords()
    seqs = tokenizer.texts_to_sequences(seqs)
    seqs = pad_sequences(seqs, max_len)
    model = keras.models.load_model('static/onion_connoisseur.h5')
    assert isinstance(model, keras.models.Model)
    print(test_articles)
    predVals = model.predict(seqs)
    preds = list(map(lambda x: "Real" if x < 0.75 else "Fake", predVals))
    print(preds)
    if out:
        with open('predictions.csv', 'w', encoding='utf-8') as outHand:
            out = csv.writer(outHand)
            for i in range(0, len(preds)):
                out.writerow([test_articles_raw[i], preds[i], predVals[i]])

    return [preds, predVals]
Пример #2
0
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
'''what version is this'''

num_words = 200000

dp = DataProcessor()

x, y = dp.getTrainingData()

# Assign token to each word present in headlines
tokenizer = Tokenizer(filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n\'`’‘\\',
                      num_words=num_words)
tokenizer.fit_on_texts(x)
max_len = dp.getMaxWords()
trainX = tokenizer.texts_to_sequences(x)
trainX = pad_sequences(trainX, max_len)
indexLen = len(tokenizer.word_index)
with open('onion_tokenizer.pyc', 'wb') as pickleHand:
    pickle.dump(tokenizer, pickleHand)

# Define our deep learning model
model = Sequential([
    Embedding(indexLen + 1, 256),
    LSTM(512, dropout=0.2, return_sequences=True),
    LSTM(256, dropout=0.2),
    Dense(1024, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])