def test_train(self): try: fv = FeatureVector() fv.append(TestFeatureVector.BogusFeature()) fv.append(TestFeatureVector.BogusFeature()) fv.train([], []) except: pytest.fail('Training of features failed')
def test_score(self): fv = FeatureVector() fv.append(TestFeatureVector.BogusFeature()) fv.append(TestFeatureVector.BogusFeature()) fv.append(TestFeatureVector.BogusFeature()) assert len(fv.score(['I was just wondering why you were here?'.split(), 'The long and winding road'.split()])) == 3 assert fv.score(['I was just wondering why you were here?'.split(), 'The long and winding road'.split()]) == [0.0, 0.0, 0.0]
def test_train_improper_arguments(self): fv = FeatureVector() fv.append(TestSVM.BogusFeature()) fv.append(TestSVM.BogusFeature()) fv.append(TestSVM.BogusFeature()) with pytest.raises(TypeError): SVM(feature_vector=fv).train(reviews=TestSVM.sample_reviews, labels=TestSVM.sample_labels[:-1, :])
def test_score(self): fv = FeatureVector() fv.append(TestFeatureVector.BogusFeature()) fv.append(TestFeatureVector.BogusFeature()) fv.append(TestFeatureVector.BogusFeature()) assert len( fv.score([ 'I was just wondering why you were here?'.split(), 'The long and winding road'.split() ])) == 3 assert fv.score([ 'I was just wondering why you were here?'.split(), 'The long and winding road'.split() ]) == [0.0, 0.0, 0.0]
def test_train(self): fv = FeatureVector() fv.append(TestSVM.BogusFeature()) fv.append(TestSVM.BogusFeature()) fv.append(TestSVM.BogusFeature()) try: SVM(feature_vector=fv).train(reviews=TestSVM.sample_reviews, labels=TestSVM.sample_labels) except: pytest.fail('SVM training failed')
def test_svm_predict(self): fv = FeatureVector() fv.append(TestSVM.BogusFeature()) fv.append(TestSVM.BogusFeature()) fv.append(TestSVM.BogusFeature()) svm = SVM(feature_vector=fv) svm.train(reviews=TestSVM.sample_reviews, labels=TestSVM.sample_labels) assert svm.predict(['HI']) == 0 or svm.predict(['HI']) == 1 assert svm.predict(['earth']) == 0 or svm.predict(['earth']) == 1
def main(): reviews = retrieve_reviews(5000) # Split reviews into a training and testing portion train_reviews = reviews[:4000] test_reviews = reviews[4001 + 1:] # Separate text and label to use during the training process text, labels = zip(*train_reviews) vector = FeatureVector() # Add features into feature vector vector.append(average_word_length.AverageWordLength()) vector.append(sentiment_analysis.SentimentAnalysis()) vector.append(rarity_analysis.Rarity()) vector.append(tfidf.TfIdf()) vector.append(readability.Readability()) vector.append(spelling.Spelling()) # Train all of the features individually vector.train(text, labels) model = SVM(vector) model.train(text, labels) # Separate text and label to use during the testing process text, labels = zip(*test_reviews) matches = 0 distance = {} for i in range(len(labels)): predicted_score = model.predict(text[i]) actual_score = labels[i] # count how many predicted scores match with the actual ones if predicted_score == actual_score: matches += 1 # get a histogram of how far predicted scores differ from the actual dist = abs(predicted_score - actual_score) distance[dist] = distance.get(dist, 0) + 1 print('Matches = {0:.2f}%'.format((matches / len(labels)) * 100)) for distance, count in distance.items(): print("{} : {}".format(distance, count))
def test_fail_on_append_non_feature(self): with pytest.raises(TypeError): FeatureVector().append(0)
def test_append(self): FeatureVector().append(TestFeatureVector.BogusFeature())
def main(): reviews = retrieve_reviews(5000) # Split reviews into a training and testing portion train_reviews = reviews[:4500] test_reviews = reviews[4500 + 1:] # Separate text and label to use during the training process text, labels = zip(*train_reviews) vector = FeatureVector() # Add features into feature vector vector.append(sentiment.SentimentAnalysis()) vector.append(tfidf.TfIdf()) vector.append(readability.Readability()) vector.append(food_sophistication.FoodSophistication()) vector.append(average_word_length.AverageWordLength()) vector.append(rarity.Rarity()) vector.append(spelling.Spelling()) vector.append(sentence_topic.SentenceTopic()) # Train all of the features individually vector.train(text, labels) model = SVM(vector) model.train(text, labels) # Separate text and label to use during the testing process text, labels = zip(*test_reviews) matches = 0 distance = {} for i in range(len(labels)): predicted_score = model.predict(text[i]) actual_score = labels[i] # count how many predicted scores match with the actual ones if predicted_score == actual_score: matches += 1 # get a histogram of how far predicted scores differ from the actual dist = abs(predicted_score - actual_score) distance[dist] = distance.get(dist, 0) + 1 print('Matches = {:.2%}'.format(matches / len(labels))) for distance, count in distance.items(): print("{} : {}".format(distance, count))