Exemplos de make_train_test_split em Python, exemplos de samr.corpus.make_train_test_split em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_corpus.py Projeto: DeeeFOX/samr

 def test_make_train_test_split_seed_works(self):
     a1, a2 = corpus.make_train_test_split("a")
     b1, b2 = corpus.make_train_test_split("b")
     c1, c2 = corpus.make_train_test_split("a")
     self.assertEqual(a1, c1)
     self.assertEqual(a2, c2)
     self.assertNotEqual(a1, b1)
     self.assertNotEqual(a2, b2)

Exemplo n.º 2

0

Exibir arquivo

 def test_make_train_test_split_seed_works(self):
     a1, a2 = corpus.make_train_test_split("a")
     b1, b2 = corpus.make_train_test_split("b")
     c1, c2 = corpus.make_train_test_split("a")
     self.assertEqual(a1, c1)
     self.assertEqual(a2, c2)
     self.assertNotEqual(a1, b1)
     self.assertNotEqual(a2, b2)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_word2vec.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.realTest = corpus.iter_test_corpus()
     self.corpus = buildVocabulary(getPhrases(self.train + self.test + self.realTest))
     # self.training = buildVocabulary(getPhrases(self.train))
     # self.testing = buildVocabulary(getPhrases(self.test))
     self.size = 50

Exemplo n.º 4

0

Exibir arquivo

 def test_make_train_test_split_no_shared_sentences(self):
     """
     Test that train and test don't share sent ids.
     """
     train, test = corpus.make_train_test_split("semis")
     train_ids = set(x.sentenceid for x in train)
     test_ids = set(x.sentenceid for x in test)
     self.assertEqual(train_ids & test_ids, set())

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_doc2vec.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.samples = 50000
     self.xTrain = buildVocabulary(getPhrases(self.train[:self.samples]))
     self.xTest = buildVocabulary(getPhrases(self.test[:self.samples]))
     self.size = 150
     self.labelsTrain = getLabels(self.train)
     self.labelsTest = getLabels(self.test)

Exemplo n.º 6

0

Exibir arquivo

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.realTest = corpus.iter_test_corpus()
     self.corpus = buildVocabulary(
         getPhrases(self.train + self.test + self.realTest))
     # self.training = buildVocabulary(getPhrases(self.train))
     # self.testing = buildVocabulary(getPhrases(self.test))
     self.size = 50

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_doc2vec.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.samples = 50000
     self.xTrain = buildVocabulary(getPhrases(self.train[:self.samples]))
     self.xTest = buildVocabulary(getPhrases(self.test[:self.samples]))
     self.size = 150
     self.labelsTrain = getLabels(self.train)
     self.labelsTest = getLabels(self.test)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_corpus.py Projeto: DeeeFOX/samr

 def test_make_train_test_split_no_shared_sentences(self):
     """
     Test that train and test don't share sent ids.
     """
     train, test = corpus.make_train_test_split("semis")
     train_ids = set(x.sentenceid for x in train)
     test_ids = set(x.sentenceid for x in test)
     self.assertEqual(train_ids & test_ids, set())

Exemplo n.º 9

0

Exibir arquivo

def cross_validation(factory, seed, K=10, callback=None):
    seed = str(seed)
    scores = []
    for k in range(K):
        train, test = make_train_test_split(seed + str(k))
        predictor = factory()
        predictor.fit(train)
        score = predictor.score(test)
        if callback:
            callback(score)
        scores.append(score)
    return sum(scores) / len(scores)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_predictor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())

Exemplo n.º 11

0

Exibir arquivo

Arquivo: test_predictor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

    def test_simple_error_matrix(self):
        train, test = corpus.make_train_test_split("reflektor", proportion=0.4)
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        error = predictor.error_matrix(test)
        for real, predicted in error.keys():
            self.assertNotEqual(real, predicted)

        score = predictor.score(test)
        assert score > 0, "Test is valid only if score is more than 0"
        N = float(len(test))
        wrong = sum(len(xs) for xs in error.values())
        self.assertEqual((N - wrong) / N, score)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_predictor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

    def test_simple_predict(self):
        train, test = corpus.make_train_test_split("inhaler")
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        predictions = predictor.predict(test)

        # Same amount of predictions than input values
        self.assertEqual(len(predictions), len(test))

        # Predicted labels where seen during training
        train_labels = set(x.sentiment for x in train)
        predicted_labels = set(predictions)
        self.assertEqual(predicted_labels - train_labels, set())

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_predictor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

    def test_simple_error_matrix(self):
        train, test = corpus.make_train_test_split("reflektor", proportion=0.4)
        predictor = PhraseSentimentPredictor()
        predictor.fit(train)
        error = predictor.error_matrix(test)
        for real, predicted in error.keys():
            self.assertNotEqual(real, predicted)

        score = predictor.score(test)
        assert score > 0, "Test is valid only if score is more than 0"
        N = float(len(test))
        wrong = sum(len(xs) for xs in error.values())
        self.assertEqual((N - wrong) / N, score)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_different_setups.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")
     self.config = json.load(open("../data/model2.json"))
     # self.samples = len(self.train)
     self.samples = len(self.train)

Exemplo n.º 15

0

Exibir arquivo

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_corpus.py Projeto: DeeeFOX/samr

 def test_make_train_test_split_simple(self):
     train, test = corpus.make_train_test_split("blitz")
     self.assertIn("word play", [x.phrase for x in train + test])
     self.assertEqual(len(set(x.sentenceid for x in test)), 1)
     self.assertEqual(len(set(x.sentenceid for x in test + train)), 4)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_svm.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def setUp(self):
     self.train, self.test = corpus.make_train_test_split("mySeed")

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_predictor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def test_fit_returns_self(self):
     train, _ = corpus.make_train_test_split("defiant order")
     predictor = PhraseSentimentPredictor()
     s = predictor.fit(train)
     self.assertEqual(predictor, s)

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_predictor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

 def test_fit_returns_self(self):
     train, _ = corpus.make_train_test_split("defiant order")
     predictor = PhraseSentimentPredictor()
     s = predictor.fit(train)
     self.assertEqual(predictor, s)

Exemplo n.º 20

0

Exibir arquivo

Arquivo: preprocessor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

def getTrainingAndTestSplitOnSize(size):
    train, test = corpus.make_train_test_split("mySeed")
    trainingDataWithLabels, trainingDataShortWithLabels = getWordListsGreaterThan(size, train)
    testDataWithLabels, testDataShortWithLabels = getWordListsGreaterThan(size, test)
    return trainingDataWithLabels, trainingDataShortWithLabels, testDataWithLabels, testDataShortWithLabels

Exemplo n.º 21

0

Exibir arquivo

 def test_make_train_test_split_simple(self):
     train, test = corpus.make_train_test_split("blitz")
     self.assertIn("word play", [x.phrase for x in train + test])
     self.assertEqual(len(set(x.sentenceid for x in test)), 1)
     self.assertEqual(len(set(x.sentenceid for x in test + train)), 4)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: preprocessor.py Projeto: EspenAlbert/sentimentAnalysisMovieReviews

def getVocabularyOfSizeGreaterThan(size):
    train, test = corpus.make_train_test_split("mySeed")
    trainingDataWithLabels, dummy = getWordListsGreaterThan(size, train)
    testDataWithLabels, dummy = getWordListsGreaterThan(size, test)
    return trainingDataWithLabels, testDataWithLabels