Exemplo n.º 1
0
 def test_make_train_test_split_seed_works(self):
     a1, a2 = corpus.make_train_test_split("a")
     b1, b2 = corpus.make_train_test_split("b")
     c1, c2 = corpus.make_train_test_split("a")
     self.assertEqual(a1, c1)
     self.assertEqual(a2, c2)
     self.assertNotEqual(a1, b1)
     self.assertNotEqual(a2, b2)
Exemplo n.º 2
0
 def test_make_train_test_split_seed_works(self):
     a1, a2 = corpus.make_train_test_split("a")
     b1, b2 = corpus.make_train_test_split("b")
     c1, c2 = corpus.make_train_test_split("a")
     self.assertEqual(a1, c1)
     self.assertEqual(a2, c2)
     self.assertNotEqual(a1, b1)
     self.assertNotEqual(a2, b2)
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.realTest = corpus.iter_test_corpus()
     self.corpus = buildVocabulary(getPhrases(self.train + self.test + self.realTest))
     # self.training = buildVocabulary(getPhrases(self.train))
     # self.testing = buildVocabulary(getPhrases(self.test))
     self.size = 50
Exemplo n.º 4
0
 def test_make_train_test_split_no_shared_sentences(self):
     """
     Test that train and test don't share sent ids.
     """
     train, test = corpus.make_train_test_split("semis")
     train_ids = set(x.sentenceid for x in train)
     test_ids = set(x.sentenceid for x in test)
     self.assertEqual(train_ids & test_ids, set())
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.samples = 50000
     self.xTrain = buildVocabulary(getPhrases(self.train[:self.samples]))
     self.xTest = buildVocabulary(getPhrases(self.test[:self.samples]))
     self.size = 150
     self.labelsTrain = getLabels(self.train)
     self.labelsTest = getLabels(self.test)
Exemplo n.º 6
0
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.realTest = corpus.iter_test_corpus()
     self.corpus = buildVocabulary(
         getPhrases(self.train + self.test + self.realTest))
     # self.training = buildVocabulary(getPhrases(self.train))
     # self.testing = buildVocabulary(getPhrases(self.test))
     self.size = 50
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.samples = 50000
     self.xTrain = buildVocabulary(getPhrases(self.train[:self.samples]))
     self.xTest = buildVocabulary(getPhrases(self.test[:self.samples]))
     self.size = 150
     self.labelsTrain = getLabels(self.train)
     self.labelsTest = getLabels(self.test)
Exemplo n.º 8
0
 def test_make_train_test_split_no_shared_sentences(self):
     """
     Test that train and test don't share sent ids.
     """
     train, test = corpus.make_train_test_split("semis")
     train_ids = set(x.sentenceid for x in train)
     test_ids = set(x.sentenceid for x in test)
     self.assertEqual(train_ids & test_ids, set())
Exemplo n.º 9
0
def cross_validation(factory, seed, K=10, callback=None):
    seed = str(seed)
    scores = []
    for k in range(K):
        train, test = make_train_test_split(seed + str(k))
        predictor = factory()
        predictor.fit(train)
        score = predictor.score(test)
        if callback:
            callback(score)
        scores.append(score)
    return sum(scores) / len(scores)
    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())
    def test_simple_error_matrix(self):
        train, test = corpus.make_train_test_split("reflektor", proportion=0.4)
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        error = predictor.error_matrix(test)
        for real, predicted in error.keys():
            self.assertNotEqual(real, predicted)

        score = predictor.score(test)
        assert score > 0, "Test is valid only if score is more than 0"
        N = float(len(test))
        wrong = sum(len(xs) for xs in error.values())
        self.assertEqual((N - wrong) / N, score)
    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())
    def test_simple_error_matrix(self):
        train, test = corpus.make_train_test_split("reflektor", proportion=0.4)
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        error = predictor.error_matrix(test)
        for real, predicted in error.keys():
            self.assertNotEqual(real, predicted)

        score = predictor.score(test)
        assert score > 0, "Test is valid only if score is more than 0"
        N = float(len(test))
        wrong = sum(len(xs) for xs in error.values())
        self.assertEqual((N - wrong) / N, score)
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.config = json.load(open("../data/model2.json"))
     # self.samples = len(self.train)
     self.samples = len(self.train)
Exemplo n.º 15
0
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
Exemplo n.º 16
0
 def test_make_train_test_split_simple(self):
     train, test = corpus.make_train_test_split("blitz")
     self.assertIn("word play", [x.phrase for x in train + test])
     self.assertEqual(len(set(x.sentenceid for x in test)), 1)
     self.assertEqual(len(set(x.sentenceid for x in test + train)), 4)
 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
 def test_fit_returns_self(self):
     train, _ = corpus.make_train_test_split("defiant order")
     predictor = PhraseSentimentPredictor()
     s = predictor.fit(train)
     self.assertEqual(predictor, s)
 def test_fit_returns_self(self):
     train, _ = corpus.make_train_test_split("defiant order")
     predictor = PhraseSentimentPredictor()
     s = predictor.fit(train)
     self.assertEqual(predictor, s)
def getTrainingAndTestSplitOnSize(size):
    train, test = corpus.make_train_test_split("mySeed")
    trainingDataWithLabels, trainingDataShortWithLabels = getWordListsGreaterThan(size, train)
    testDataWithLabels, testDataShortWithLabels = getWordListsGreaterThan(size, test)
    return trainingDataWithLabels, trainingDataShortWithLabels, testDataWithLabels, testDataShortWithLabels
Exemplo n.º 21
0
 def test_make_train_test_split_simple(self):
     train, test = corpus.make_train_test_split("blitz")
     self.assertIn("word play", [x.phrase for x in train + test])
     self.assertEqual(len(set(x.sentenceid for x in test)), 1)
     self.assertEqual(len(set(x.sentenceid for x in test + train)), 4)
def getVocabularyOfSizeGreaterThan(size):
    train, test = corpus.make_train_test_split("mySeed")
    trainingDataWithLabels, dummy = getWordListsGreaterThan(size, train)
    testDataWithLabels, dummy = getWordListsGreaterThan(size, test)
    return trainingDataWithLabels, testDataWithLabels