Exemplo n.º 1
0
    def test_domain_basic_stats(self):
        domain = self.zoo.domain
        attr_stats = [BasicStats(self.zoo, a) for a in domain.attributes]
        class_var_stats = [BasicStats(self.zoo, a) for a in domain.class_vars]
        meta_stats = [BasicStats(self.zoo, a) for a in domain.metas]

        domain_stats = DomainBasicStats(self.zoo)
        self.assertStatsEqual(domain_stats.stats, attr_stats + class_var_stats)

        domain_stats = DomainBasicStats(self.zoo, include_metas=True)
        self.assertStatsEqual(domain_stats.stats,
                              attr_stats + class_var_stats + meta_stats)
Exemplo n.º 2
0
 def test_speed(self):
     n, m = 10, 10000
     data = Table.from_numpy(None, np.random.rand(n, m))
     start = time.time()
     for i in range(m):
         BasicStats(data, i)
     elapsed = time.time() - start
     self.assertLess(elapsed, 10.0)
Exemplo n.º 3
0
 def __call__(self, data, attribute, fixed=None):
     if fixed:
         min, max = fixed[attribute.name]
         points = self._split_eq_width(min, max)
     else:
         if type(data) == SqlTable:
             stats = BasicStats(data, attribute)
             points = self._split_eq_width(stats.min, stats.max)
         else:
             values = data[:, attribute]
             values = values.X if values.X.size else values.Y
             min, max = ut.nanmin(values), ut.nanmax(values)
             points = self._split_eq_width(min, max)
     return Discretizer.create_discretized_var(
         data.domain[attribute], points)
Exemplo n.º 4
0
 def __call__(self, data: Table, attribute, fixed=None):
     if fixed:
         mn, mx = fixed[attribute.name]
         points = self._split_eq_width(mn, mx)
     else:
         if type(data) == SqlTable:
             stats = BasicStats(data, attribute)
             points = self._split_eq_width(stats.min, stats.max)
         else:
             values, _ = data.get_column_view(attribute)
             if values.size:
                 mn, mx = ut.nanmin(values), ut.nanmax(values)
                 points = self._split_eq_width(mn, mx)
             else:
                 points = []
     return Discretizer.create_discretized_var(data.domain[attribute],
                                               points)
Exemplo n.º 5
0
    def test_basic_stats(self):
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        stats = BasicStats(iris, iris.domain['sepal length'])
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)

        domain_stats = DomainBasicStats(iris, include_metas=True)
        self.assertEqual(len(domain_stats.stats),
                         len(iris.domain) + len(iris.domain.metas))
        stats = domain_stats['sepal length']
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)
Exemplo n.º 6
0
    def test_basic_stats_on_large_data(self):
        # By setting LARGE_TABLE to 100, iris will be treated as
        # a large table and sampling will be used. As the table
        # is actually small, time base sampling should return
        # all rows, so the same assertions can be used.
        iris = SqlTable(self.conn, self.iris, inspect_values=True)
        stats = BasicStats(iris, iris.domain['sepal length'])
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)

        domain_stats = DomainBasicStats(iris, include_metas=True)
        self.assertEqual(len(domain_stats.stats),
                         len(iris.domain) + len(iris.domain.metas))
        stats = domain_stats['sepal length']
        self.assertAlmostEqual(stats.min, 4.3)
        self.assertAlmostEqual(stats.max, 7.9)
        self.assertAlmostEqual(stats.mean, 5.8, 1)
        self.assertEqual(stats.nans, 0)
        self.assertEqual(stats.non_nans, 150)