Пример #1
0
    def run(self, column):
        """
        :returns: An array of :class:`decimal.Decimal`.
        """
        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls_sorted()

        # Zeroth percentile is first datum
        quantiles = [data[0]]

        for percentile in range(1, 100):
            k = len(data) * (float(percentile) / 100)

            low = max(1, int(math.ceil(k)))
            high = min(len(data), int(math.floor(k + 1)))

            # No remainder
            if low == high:
                value = data[low - 1]
            # Remainder
            else:
                value = (data[low - 1] + data[high - 1]) / 2

            quantiles.append(value)

        # Hundredth percentile is final datum
        quantiles.append(data[-1])

        return Quantiles(quantiles)
Пример #2
0
 def validate(self, table):
     column = table.columns[self._column_name]
     if not isinstance(column.data_type, Number):
         raise DataTypeError('Percent column must contain Number data.')
     if self._total is not None and self._total <= 0:
         raise DataTypeError('The total must be a positive number')
     # Throw a warning if there are nulls in there
     if HasNulls(self._column_name).run(table):
         warn_null_calculation(self, column)
Пример #3
0
 def validate(self, table):
     column = table.columns[self._column_name]
     if not isinstance(column.data_type, Number):
         raise DataTypeError('Percent column must contain Number data.')
     if self._total is not None and self._total <= 0:
         raise DataTypeError('The total must be a positive number')
     # Throw a warning if there are nulls in there
     if HasNulls(self._column_name).run(table):
         warn_null_calculation(self, column)
Пример #4
0
    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Deciles can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)
Пример #5
0
    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Median can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)
Пример #6
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'Mean can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        return column.aggregate(Sum()) / len(column.values_without_nulls())
Пример #7
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'IQR can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        percentiles = column.aggregate(Percentiles())

        return percentiles[75] - percentiles[25]
Пример #8
0
    def validate(self, table):
        before_column = table.columns[self._before_column_name]
        after_column = table.columns[self._after_column_name]

        if not isinstance(before_column.data_type, Number):
            raise DataTypeError('PercentChange before column must contain Number data.')

        if not isinstance(after_column.data_type, Number):
            raise DataTypeError('PercentChange after column must contain Number data.')

        if HasNulls(self._before_column_name).run(table):
            warn_null_calculation(self, before_column)

        if HasNulls(self._after_column_name).run(table):
            warn_null_calculation(self, after_column)
Пример #9
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'MAD can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls_sorted()
        m = column.aggregate(Percentiles())[50]

        return median(tuple(abs(n - m) for n in data))
Пример #10
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'PopulationVariance can only be applied to columns containing Number data.'
            )

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        mean = column.aggregate(Mean())

        return sum((n - mean)**2 for n in data) / len(data)
Пример #11
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('IQR can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        percentiles = Percentiles(self._column_name).run(table)

        return percentiles[75] - percentiles[25]
Пример #12
0
    def validate(self, table):
        before_column = table.columns[self._before_column_name]
        after_column = table.columns[self._after_column_name]

        for data_type in (Number, Date, DateTime, TimeDelta):
            if isinstance(before_column.data_type, data_type):
                if not isinstance(after_column.data_type, data_type):
                    raise DataTypeError('Specified columns must be of the same type')

                if HasNulls(self._before_column_name).run(table):
                    warn_null_calculation(self, before_column)

                if HasNulls(self._after_column_name).run(table):
                    warn_null_calculation(self, after_column)

                return

        raise DataTypeError('Change before and after columns must both contain data that is one of: Number, Date, DateTime or TimeDelta.')
Пример #13
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'Mode can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        state = defaultdict(int)

        for n in data:
            state[n] += 1

        return max(state.keys(), key=lambda x: state[x])
Пример #14
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Mean can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        sum_total = Sum(self._column_name).run(table)

        return sum_total / len(column.values_without_nulls())
Пример #15
0
    def validate(self, table):
        before_column = table.columns[self._before_column_name]
        after_column = table.columns[self._after_column_name]

        for data_type in (Number, Date, DateTime, TimeDelta):
            if isinstance(before_column.data_type, data_type):
                if not isinstance(after_column.data_type, data_type):
                    raise DataTypeError('Specified columns must be of the same type')

                if HasNulls(self._before_column_name).run(table):
                    warn_null_calculation(self, before_column)

                if HasNulls(self._after_column_name).run(table):
                    warn_null_calculation(self, after_column)

                return

        raise DataTypeError('Change before and after columns must both contain data that is one of: Number, Date, DateTime or TimeDelta.')
Пример #16
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('MAD can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls_sorted()
        m = Median(self._column_name).run(table)

        return median(tuple(abs(n - m) for n in data))
Пример #17
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('PopulationVariance can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        mean = Mean(self._column_name).run(table)

        return sum((n - mean) ** 2 for n in data) / len(data)
Пример #18
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Mode can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        state = defaultdict(int)

        for n in data:
            state[n] += 1

        return max(state.keys(), key=lambda x: state[x])
Пример #19
0
    def run(self, table):
        """
        :returns:
            An instance of :class:`Quantiles`.
        """
        column = table.columns[self._column_name]

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls_sorted()

        # Zeroth percentile is first datum
        quantiles = [data[0]]

        for percentile in range(1, 100):
            k = len(data) * (float(percentile) / 100)

            low = max(1, int(math.ceil(k)))
            high = min(len(data), int(math.floor(k + 1)))

            # No remainder
            if low == high:
                value = data[low - 1]
            # Remainder
            else:
                value = (data[low - 1] + data[high - 1]) / 2

            quantiles.append(value)

        # Hundredth percentile is final datum
        quantiles.append(data[-1])

        return Quantiles(quantiles)