Пример #1
0
    def test_sum(self):
        with self.assertRaises(DataTypeError):
            Sum('three').validate(self.table)

        Sum('one').validate(self.table)

        self.assertEqual(Sum('one').run(self.table), Decimal('6.5'))
        self.assertEqual(Sum('two').run(self.table), Decimal('13.13'))
Пример #2
0
 def test_sum(self):
     self.assertIsInstance(
         Sum('test').get_aggregate_data_type(self.time_delta_table),
         TimeDelta)
     Sum('test').validate(self.time_delta_table)
     self.assertEqual(
         Sum('test').run(self.time_delta_table),
         datetime.timedelta(seconds=30))
Пример #3
0
    def test_aggregeate_bad_column(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        with self.assertRaises(KeyError):
            tableset.aggregate([('one_sum', Sum('one'))])

        with self.assertRaises(KeyError):
            tableset.aggregate([('bad_sum', Sum('bad'))])
Пример #4
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`
        """
        # If the user has provided a total, use that
        if self._total is not None:
            total = self._total
        # Otherwise compute the sum of all the values in that column to
        # act as our denominator
        else:
            total = table.aggregate(Sum(self._column_name))
            # Raise error if sum is less than or equal to zero
            if total <= 0:
                raise DataTypeError(
                    'The sum of column values must be a positive number')

        # Create a list new rows
        new_column = []
        # Loop through the existing rows
        for row in table.rows:
            # Pull the value
            value = row[self._column_name]
            if value is None:
                new_column.append(None)
                continue
            # Try to divide it out of the total
            percent = value / total
            # And multiply it by 100
            percent = percent * 100
            # Append the value to the new list
            new_column.append(percent)
        # Pass out the list
        return new_column
Пример #5
0
 def test_multiple(self):
     self.assertEqual(
         self.table.aggregate([
             ('count', Count()),
             ('sum', Sum('two'))
         ]),
         {
             'count': 3,
             'sum': 9
         }
     )
Пример #6
0
    def test_pivot_sum(self):
        table = Table(self.rows, self.column_names, self.column_types)

        pivot_table = table.pivot('race', 'gender', Sum('age'))

        pivot_rows = (('white', 20, 45), ('black', 20, 0), ('latino', 25, 0),
                      ('asian', 0, 25))

        self.assertColumnNames(pivot_table, ['race', 'male', 'female'])
        self.assertColumnTypes(pivot_table, [Text, Number, Number])
        self.assertRows(pivot_table, pivot_rows)
Пример #7
0
    def test_aggregate_sum(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        new_table = tableset.aggregate([('count', Count()),
                                        ('number_sum', Sum('number'))])

        self.assertIsInstance(new_table, Table)
        self.assertColumnNames(new_table, ('group', 'count', 'number_sum'))
        self.assertColumnTypes(new_table, [Text, Number, Number])
        self.assertRows(new_table, [('table1', 3, 6), ('table2', 3, 7),
                                    ('table3', 3, 6)])
Пример #8
0
    def test_having_complex(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        new_tableset = tableset.having(
            [('count', Count()), ('number_sum', Sum('number'))],
            lambda t: t['count'] >= 3 and t['number_sum'] > 6)

        self.assertIsInstance(new_tableset, TableSet)
        self.assertSequenceEqual(new_tableset.keys(), ['table2'])
        self.assertIs(new_tableset.values()[0], tableset['table2'])
        self.assertEqual(new_tableset.key_name, 'test')
Пример #9
0
    def pearson_correlation(self, column_one, column_two):
        """
        Calculates the `Pearson correlation coefficient <http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient>`_
        for :code:`column_one` and :code:`column_two`.

        Returns a number between -1 and 1 with 0 implying no correlation. A correlation close to 1 implies a high positive correlation i.e. as x increases so does y. A correlation close to -1 implies a high negative correlation i.e. as x increases, y decreases.

        Note: this implementation is borrowed from the MIT licensed `latimes-calculate <https://github.com/datadesk/latimes-calculate/blob/master/calculate/pearson.py>`_. Thanks, LAT!

        :param column_one: The name of a column.
        :param column_two: The name of a column.
        :returns: :class:`decimal.Decimal`.
        """
        x = self.columns[column_one]
        y = self.columns[column_two]

        if x.has_nulls() or y.has_nulls():
            raise NullComputationError

        n = len(x)

        sum_x = x.aggregate(Sum())
        sum_y = y.aggregate(Sum())

        square = lambda x: pow(x, 2)
        sum_x_sq = sum(map(square, x))
        sum_y_sq = sum(map(square, y))

        product_sum = sum((x_val * y_val for x_val, y_val in zip(x, y)))

        pearson_numerator = product_sum - (sum_x * sum_y / n)
        pearson_denominator = ((sum_x_sq - pow(sum_x, 2) / n) *
                               (sum_y_sq - pow(sum_y, 2) / n)).sqrt()

        if pearson_denominator == 0:
            return 0

        return pearson_numerator / pearson_denominator
Пример #10
0
    def test_nested_aggregation(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        nested = tableset.group_by('letter')

        results = nested.aggregate([('count', Count()),
                                    ('number_sum', Sum('number'))])

        self.assertIsInstance(results, Table)
        self.assertColumnNames(results,
                               ('test', 'letter', 'count', 'number_sum'))
        self.assertColumnTypes(results, (Text, Text, Number, Number))
        self.assertRows(results, [('table1', 'a', 2, 4), ('table1', 'b', 1, 2),
                                  ('table2', 'b', 1, 0), ('table2', 'a', 1, 2),
                                  ('table2', 'c', 1, 5), ('table3', 'a', 2, 3),
                                  ('table3', 'c', 1, 3)])
Пример #11
0
    def test_nested_aggregate_row_names(self):
        tableset = TableSet(self.tables.values(),
                            self.tables.keys(),
                            key_name='test')

        nested = tableset.group_by('letter')

        results = nested.aggregate([('count', Count()),
                                    ('number_sum', Sum('number'))])

        self.assertRowNames(results, [
            ('table1', 'a'),
            ('table1', 'b'),
            ('table2', 'b'),
            ('table2', 'a'),
            ('table2', 'c'),
            ('table3', 'a'),
            ('table3', 'c'),
        ])
        self.assertSequenceEqual(results.rows[('table1', 'a')],
                                 ('table1', 'a', 2, 4))
        self.assertSequenceEqual(results.rows[('table2', 'c')],
                                 ('table2', 'c', 1, 5))
Пример #12
0
 def test_sum(self):
     self.assertEqual(self.table.aggregate(Sum('two')), 9)
Пример #13
0
 def test_sum_all_nulls(self):
     self.assertEqual(Sum('four').run(self.table), Decimal('0'))
Пример #14
0
 def test_sum_all_nulls(self):
     self.assertEqual(
         Sum('null').run(self.time_delta_table), datetime.timedelta(0))
Пример #15
0
 def test_multiple(self):
     self.assertSequenceEqual(self.table.aggregate([Count(),
                                                    Sum('two')]), [3, 9])
Пример #16
0
    def test_aggregate_sum_invalid(self):
        tableset = TableSet(self.tables.values(), self.tables.keys())

        with self.assertRaises(DataTypeError):
            tableset.aggregate([('letter_sum', Sum('letter'))])