Exemplo n.º 1
0
 def test_fit_ok(self):
     random.seed("sofi needs a ladder")
     X = list(self._get_random_tuples())
     V = FeatureMappingFlattener()
     V.fit(X)
     V = FeatureMappingFlattener()
     # Check that works for one dict
     V.fit([next(self._get_random_tuples())])
Exemplo n.º 2
0
 def test_sparse_is_equivalent(self):
     random.seed("the man who sold the world")
     X = list(self._get_random_tuples())
     # fit + transform
     A = FeatureMappingFlattener(sparse=True)
     YA = A.fit_transform(X).todense()
     # fit_transform
     B = FeatureMappingFlattener(sparse=False)
     YB = B.fit_transform(X)
     self.assertTrue(numpy.array_equal(YA, YB))
Exemplo n.º 3
0
 def test_transform_empty(self):
     X = list(self._get_random_tuples())
     for sparse in [True, False]:
         V = FeatureMappingFlattener(sparse=sparse)
         V.fit(X)
         Z = V.transform([])
         self.assertEqual(Z.shape[0], 0)
Exemplo n.º 4
0
 def test_transform_produce_the_expected_values_on_the_result(self):
     random.seed("lady smith")
     X = self._get_random_tuples()
     random.seed("black mambazo")
     Y = list(self._get_random_tuples())
     V = FeatureMappingFlattener(sparse=False)
     V.fit(X)
     Z = V.transform(Y)
     for y, z in zip(Y, Z):
         for i, v in enumerate(y):
             if isinstance(v, (int, float)):
                 vector_idx = V.indexes[(i, None)]
                 self.assertEqual(v, z[vector_idx])
             elif isinstance(v, str):
                 # we know that there's only ENUM type, with DRINKS
                 vector_idx = V.indexes[(i, v)]
                 self.assertEqual(1.0, z[vector_idx])
                 for other_value in self.DRINKS:
                     if other_value != v:
                         vector_idx = V.indexes[(i, other_value)]
                         self.assertEqual(0.0, z[vector_idx])
             else:
                 # It's an array
                 for j, v_j in enumerate(v):
                     vector_idx = V.indexes[(i, j)]
                     self.assertEqual(v_j, z[vector_idx])
Exemplo n.º 5
0
 def check_fit_fails(self, X):
     V = FeatureMappingFlattener()
     self.assertRaises(ValueError, V.fit, X)
     self.assertRaises(ValueError, V.fit,
                       list(self.make_every_list_(X, set)))
     self.assertRaises(ValueError, V.fit,
                       list(self.make_every_list_(X, tuple)))
Exemplo n.º 6
0
 def __init__(self, features, tolerant=False, sparse=True):
     # Upgrade `features` to `Feature` instances.
     features = list(map(make_feature, features))
     if tolerant:
         self.evaluator = TolerantFeatureEvaluator(features)
     else:
         self.evaluator = FeatureEvaluator(features)
     self.flattener = FeatureMappingFlattener(sparse=sparse)
Exemplo n.º 7
0
 def test_fit_bad_values(self):
     V = FeatureMappingFlattener()
     self.assertRaises(ValueError, V.fit, [tuple()])
     self.assertRaises(ValueError, V.fit, [({},)])
     self.assertRaises(ValueError, V.fit, [([1], u"a"), ([], u"a")])
     self.assertRaises(ValueError, V.fit, [(random,)])
     self.assertRaises(ValueError, V.fit, [([1, u"a"],)])
     self.assertRaises(ValueError, V.fit, [(u"a",), (1,)])
Exemplo n.º 8
0
 def test_sparse_single_zero(self):
     random.seed("something about us")
     V = FeatureMappingFlattener(sparse=True)
     abc = [chr(i) for i in range(65, 123)]
     X = [(set(random.choice(abc) for _ in range(20)), ) for _ in range(7)]
     element = chr(32)  # Clearly outside what was seen at training
     V.fit(X)
     X = V.transform([(set(element), )])
     self.assertEqual(X.shape[0], 1)
Exemplo n.º 9
0
 def test_fit_transform_consumes_data_only_once(self):
     random.seed("a kiss to build a dream on")
     X = list(self._get_random_tuples())
     X_consumable = (x for x in X)
     V1 = FeatureMappingFlattener(sparse=False)
     V1.fit(X)
     Z1 = V1.transform(X)
     Z2 = V1.fit_transform(X_consumable)
     self.assertTrue(numpy.array_equal(Z1, Z2))
Exemplo n.º 10
0
    def test_fit_transform_equivalent(self):
        random.seed("j0hny guitar")
        X = list(self._get_random_tuples())

        for sparse in [True, False]:
            # fit + transform
            A = FeatureMappingFlattener(sparse=sparse)
            A.fit(X)
            YA = A.transform(X)

            # fit_transform
            B = FeatureMappingFlattener(sparse=sparse)
            YB = B.fit_transform(X)

            if sparse:
                self.assertTrue(numpy.array_equal(YA.todense(), YB.todense()))
            else:
                self.assertTrue(numpy.array_equal(YA, YB))
            self.assertEqual(A.indexes, B.indexes)
            self.assertEqual(A.reverse, B.reverse)
Exemplo n.º 11
0
 def test_fit_transform_ok(self):
     random.seed("a kiss to build a dream on")
     X = list(self._get_random_tuples())
     for sparse in [True, False]:
         V = FeatureMappingFlattener(sparse=sparse)
         Z = V.fit_transform(X)
         n = 100
         m = 4 + 3 + 5  # 4 float, 1 enum, 1 list
         self.assertEqual(Z.shape, (n, m))
         d = next(self._get_random_tuples())
         Z = V.transform([d])  # Test that works for one dict too
         self.assertEqual(Z.shape, (1, m))
Exemplo n.º 12
0
 def test_transform_returns_a_matrix(self):
     random.seed("lady smith")
     X = list(self._get_random_tuples())
     random.seed("black mambazo")
     Y = list(self._get_random_tuples())
     for sparse in [True, False]:
         V = FeatureMappingFlattener(sparse=sparse)
         V.fit(X)
         Z = V.transform(Y)
         if sparse:
             self.assertIsInstance(Z, scipy.sparse.csr_matrix)
         else:
             self.assertIsInstance(Z, numpy.ndarray)
Exemplo n.º 13
0
 def test_transform_bad_values(self):
     random.seed("king of the streets")
     X = list(self._get_random_tuples())
     d = X.pop()
     for sparse in [True, False]:
         V = FeatureMappingFlattener(sparse=sparse)
     V.fit(X)
     dd = tuple(list(d)[:-1])  # Missing value
     self.assertRaises(ValueError, V.transform, [dd])
     dd = d + (10, )  # Extra value
     self.assertRaises(ValueError, V.transform, [dd])
     dd = tuple([u"a string"] + list(d)[1:])  # Changed type
     self.assertRaises(ValueError, V.transform, [dd])
Exemplo n.º 14
0
 def test_transforming_non_fitted_word_is_ignored(self):
     X = [(self.COLORS[:-2], ), (self.COLORS[:-1], )]
     # never fited with self.COLORS[-1]
     known_colors = len(self.COLORS) - 1
     V = FeatureMappingFlattener(sparse=False)
     V.fit(X)
     Y = [
         (self.COLORS[-1:], ),  # the unknown color only
         (self.COLORS[:], ),
     ]
     Z = V.transform(Y)
     self.assertTrue(numpy.array_equal(Z[0], [0.0] * known_colors))
     self.assertTrue(numpy.array_equal(Z[1], [1.0] * known_colors))
Exemplo n.º 15
0
 def test_transform_ok(self):
     random.seed("i am the program")
     X = list(self._get_random_tuples())
     random.seed("dream on")
     Y = self._get_random_tuples()
     for sparse in [True, False]:
         V = FeatureMappingFlattener(sparse=sparse)
     V.fit(X)
     Z = V.transform(Y)
     n = 100
     m = 4 + 3 + 5  # 3 float, 1 enum, 1 list
     self.assertEqual(Z.shape, (n, m))
     d = next(self._get_random_tuples())
     Z = V.transform([d])  # Test that works for one dict too
     self.assertEqual(Z.shape, (1, m))
Exemplo n.º 16
0
 def test_transform_produce_expected_values_on_the_result(self):
     random.seed("Lady smith")
     X = list(self._get_random_tuples())
     random.seed("black mambazo")
     Y = list(self._get_random_tuples())
     V = FeatureMappingFlattener(sparse=False)
     V.fit(X)
     Z = V.transform(Y)
     for y, z in zip(Y, Z):
         for i, v_seq in enumerate(y):
             assert isinstance(v_seq, (list, set, tuple))
             # we know that there's only Bag-of-strings type, with COLORS
             # and a Bag of Persons
             counter = Counter(v_seq)
             for v, v_count in (counter.items()):
                 vector_idx = V.indexes[(i, v)]
                 self.assertEqual(v_count, z[vector_idx])
Exemplo n.º 17
0
    def test_fit_transform_bad_values(self):
        random.seed("king of the streets")
        X = list(self._get_random_tuples())
        d = X.pop()
        for sparse in [True, False]:
            V = FeatureMappingFlattener(sparse=sparse)

            # Typical fit failures
            self.assertRaises(ValueError, V.fit_transform, [tuple()])
            self.assertRaises(ValueError, V.fit_transform, [({},)])
            self.assertRaises(ValueError, V.fit_transform, [([1], u"a"), ([], u"a")])
            self.assertRaises(ValueError, V.fit_transform, [(random,)])
            self.assertRaises(ValueError, V.fit_transform, [([1, u"a"],)])
            self.assertRaises(ValueError, V.fit_transform, [("a",), (1,)])

            # Typical transform failures
            bad = X + [tuple(list(d)[:-1])]  # Missing value
            self.assertRaises(ValueError, V.fit_transform, bad)
            bad = X + [d + (10, )]  # Extra value
            self.assertRaises(ValueError, V.fit_transform, bad)
            bad = X + [tuple([u"a string"] + list(d)[1:])]  # Changed type
            self.assertRaises(ValueError, V.fit_transform, bad)
Exemplo n.º 18
0
 def test_fit_transform_empty(self):
     for sparse in [True, False]:
         V = FeatureMappingFlattener(sparse=sparse)
         self.assertRaises(ValueError, V.fit_transform, [])
Exemplo n.º 19
0
 def test_fit_empty(self):
     V = FeatureMappingFlattener()
     self.assertRaises(ValueError, V.fit, [])
Exemplo n.º 20
0
 def check_fit_ok(self, X):
     V = FeatureMappingFlattener()
     V.fit(X)
     V.fit(list(self.make_every_list_(X, set)))
     V.fit(list(self.make_every_list_(X, tuple)))