def test_simple_duplicates(self): dupe = Datapoint(phraseid="a", sentenceid="b", phrase="b a", sentiment="1") # Train has a lot of "2" sentiments train = [Datapoint(phraseid=str(i), sentenceid=str(i), phrase="a b", sentiment="2") for i in range(10)] train.append(dupe) test = [Datapoint(*dupe)] predictor = PhraseSentimentPredictor(duplicates=True) predictor.fit(train) predicted = predictor.predict(test)[0] self.assertEqual(predicted, "1")
def test_simple_predict(self): train, test = corpus.make_train_test_split("inhaler") predictor = PhraseSentimentPredictor() predictor.fit(train) predictions = predictor.predict(test) # Same amount of predictions than input values self.assertEqual(len(predictions), len(test)) # Predicted labels where seen during training train_labels = set(x.sentiment for x in train) predicted_labels = set(predictions) self.assertEqual(predicted_labels - train_labels, set())
def test_simple_duplicates(self): dupe = Datapoint(phraseid="a", sentenceid="b", phrase="b a", sentiment="1") # Train has a lot of "2" sentiments train = [ Datapoint(phraseid=str(i), sentenceid=str(i), phrase="a b", sentiment="2") for i in range(10) ] train.append(dupe) test = [Datapoint(*dupe)] predictor = PhraseSentimentPredictor(duplicates=True) predictor.fit(train) predicted = predictor.predict(test)[0] self.assertEqual(predicted, "1")
value = float(value) except ValueError: pass new[key] = value return new if __name__ == "__main__": import argparse import json import csv import sys from samr.corpus import iter_corpus, iter_test_corpus from samr.predictor import PhraseSentimentPredictor parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("filename") config = parser.parse_args() config = json.load(open(config.filename)) predictor = PhraseSentimentPredictor(**config) predictor.fit(list(iter_corpus())) test = list(iter_test_corpus()) prediction = predictor.predict(test) writer = csv.writer(sys.stdout) writer.writerow(("PhraseId", "Sentiment")) for datapoint, sentiment in zip(test, prediction): writer.writerow((datapoint.phraseid, sentiment))
# print(list(iter_corpus())[:10]) predictor.fit(list(iter_corpus())) # print("prediction Fitting Done") test = list(iter_test_corpus()) # print("Testing Fitting Done") # print("Testing done") dataF = open("../data/outfinal2.csv") header_csv = dataF.readline() data = dataF.read().splitlines() test = [Datapoint(d.split(",")[0],None) for d in data] prediction = predictor.predict(test) print(header_csv[:-1]+",Sentiment") i=0 for datapoint, sentiment in zip(test, prediction): data[i]+=","+str(sentiment) print(data[i]) i+=1