Exemplo n.º 1
0
test_data = test_data.sample(frac=1.0).reset_index(drop=True)
test_data = test_data.head(int(len(test_data) * args.size))

print(test_data.columns)

assert len(test_data.columns) > 1, "Labels of Test set not available."

test_label = list(test_data.SECTION)
test_data = list(test_data.STORY)

predictions = []

if args.model != 'sentence_pair':
    for story in tqdm(test_data):
        words = nltk.word_tokenize(story.lower())
        embedding = np.array([reader.sent2vec(words)])

        prediction = model.predict(embedding)[0]
        predictions.append(np.argmax(prediction))
else:
    for story in tqdm(test_data):
        words = nltk.word_tokenize(story.lower())
        embedding = np.array([reader.sent2vec(words)])
        prediction = []
        for i in range(4):
            x2 = np.array([reader.embedding[reader.label_map[i]]])
            prediction.append(model.predict([embedding, x2])[0])

        predictions.append(np.argmax(prediction))

print(classification_report(test_label, predictions, labels=[0, 1, 2, 3]))