def validate(self, table):
     column = table.columns[self._column_name]
     if not isinstance(column.data_type, Number):
         raise DataTypeError('Percent column must contain Number data.')
     if self._total is not None and self._total <= 0:
         raise DataTypeError('The total must be a positive number')
     # Throw a warning if there are nulls in there
     if HasNulls(self._column_name).run(table):
         warn_null_calculation(self, column)
示例#2
0
    def prepare(self, table):
        before_column = table.columns[self._before_column_name]
        after_column = table.columns[self._after_column_name]

        if not isinstance(before_column.data_type, Number):
            raise DataTypeError(
                'PercentChange before column must contain Number data.')

        if not isinstance(after_column.data_type, Number):
            raise DataTypeError(
                'PercentChange after column must contain Number data.')
示例#3
0
    def validate(self, table):
        before_column = table.columns[self._before_column_name]
        after_column = table.columns[self._after_column_name]

        if not isinstance(before_column.data_type, Number):
            raise DataTypeError('PercentChange before column must contain Number data.')

        if not isinstance(after_column.data_type, Number):
            raise DataTypeError('PercentChange after column must contain Number data.')

        if HasNulls(self._before_column_name).run(table):
            warn_null_calculation(self, before_column)

        if HasNulls(self._after_column_name).run(table):
            warn_null_calculation(self, after_column)
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`
        """
        # If the user has provided a total, use that
        if self._total is not None:
            total = self._total
        # Otherwise compute the sum of all the values in that column to
        # act as our denominator
        else:
            total = table.aggregate(Sum(self._column_name))
            # Raise error if sum is less than or equal to zero
            if total <= 0:
                raise DataTypeError(
                    'The sum of column values must be a positive number')

        # Create a list new rows
        new_column = []
        # Loop through the existing rows
        for row in table.rows:
            # Pull the value
            value = row[self._column_name]
            if value is None:
                new_column.append(None)
                continue
            # Try to divide it out of the total
            percent = value / total
            # And multiply it by 100
            percent = percent * 100
            # Append the value to the new list
            new_column.append(percent)
        # Pass out the list
        return new_column
示例#5
0
    def validate(self, table):
        column = table.columns[self._column_name]

        if not (isinstance(column.data_type, Number) or
        isinstance(column.data_type, Date) or
        isinstance(column.data_type, DateTime)):
            raise DataTypeError('Min can only be applied to columns containing DateTime orNumber data.')
示例#6
0
    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, (Number, TimeDelta)):
            raise DataTypeError(
                'Sum can only be applied to columns containing Number or TimeDelta data.'
            )
示例#7
0
    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'MaxPrecision can only be applied to columns containing Number data.'
            )
示例#8
0
    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Text):
            raise DataTypeError(
                'MaxLength can only be applied to columns containing Text data.'
            )
示例#9
0
    def prepare(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'PercentileRank column must contain Number data.')

        self._percentiles = Percentiles(self._column_name).run(table)
示例#10
0
def merge(cls, tables, row_names=None, column_names=None):
    """
    Create a new table from a sequence of similar tables.

    This method will not carry over row names from the merged tables, but new
    row names can be specified with the :code:`row_names` argument.

    It is possible to limit the columns included in the new :class:`.Table`
    with :code:`column_names` argument. For example, to only include columns
    from a specific table, set :code:`column_names` equal to
    :code:`table.column_names`.

    :param tables:
        An sequence of :class:`.Table` instances.
    :param row_names:
        See :class:`.Table` for the usage of this parameter.
    :param column_names:
        A sequence of column names to include in the new :class:`.Table`. If
        not specified, all distinct column names from `tables` are included.
    :returns:
        A new :class:`.Table`.
    """
    from agate.table import Table

    new_columns = OrderedDict()

    for table in tables:
        for i in range(0, len(table.columns)):
            if column_names is None or table.column_names[i] in column_names:
                column_name = table.column_names[i]
                column_type = table.column_types[i]

                if column_name in new_columns:
                    if not isinstance(column_type, type(new_columns[column_name])):
                        raise DataTypeError('Tables contain columns with the same names, but different types.')
                else:
                    new_columns[column_name] = column_type

    column_keys = new_columns.keys()
    column_types = new_columns.values()

    rows = []

    for table in tables:
        # Performance optimization for identical table structures
        if table.column_names == column_keys and table.column_types == column_types:
            rows.extend(table.rows)
        else:
            for row in table.rows:
                data = []

                for column_key in column_keys:
                    data.append(row.get(column_key, None))

                rows.append(Row(data, column_keys))

    return Table(rows, column_keys, column_types, row_names=row_names, _is_fork=True)
示例#11
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'StDev can only be applied to columns containing Number data.')

        return column.aggregate(Variance()).sqrt()
示例#12
0
文件: change.py 项目: sethk/agate
    def validate(self, table):
        before_column = table.columns[self._before_column_name]
        after_column = table.columns[self._after_column_name]

        for data_type in (Number, Date, DateTime, TimeDelta):
            if isinstance(before_column.data_type, data_type):
                if not isinstance(after_column.data_type, data_type):
                    raise DataTypeError('Specified columns must be of the same type')

                if HasNulls(self._before_column_name).run(table):
                    warn_null_calculation(self, before_column)

                if HasNulls(self._after_column_name).run(table):
                    warn_null_calculation(self, after_column)

                return

        raise DataTypeError('Change before and after columns must both contain data that is one of: Number, Date, DateTime or TimeDelta.')
示例#13
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'Sum can only be applied to columns containing Number data.')

        return sum(column.values_without_nulls())
示例#14
0
    def run(self, column):
        """
        :returns: :class:`int`.
        """
        if not isinstance(column.data_type, Text):
            raise DataTypeError(
                'MaxLength can only be applied to columns containing Text data.'
            )

        return max([len(d) for d in column.values_without_nulls()])
    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Deciles can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)
示例#16
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('PopulationStDev can only be applied to columns containing Number data.')

        return PopulationVariance(self._column_name).run(table).sqrt()
示例#17
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('MaxPrecision can only be applied to columns containing Number data.')

        return max_precision(column.values_without_nulls())
示例#18
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'Mean can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        return column.aggregate(Sum()) / len(column.values_without_nulls())
示例#19
0
    def run(self, column):
        """
        :returns: :class:`datetime.date`
        """
        if not (isinstance(column.data_type, Number) or \
            isinstance(column.data_type, Date) or \
            isinstance(column.data_type, DateTime)):
            raise DataTypeError(
                'Max can only be applied to columns containing DateTime or Number data.'
            )

        return max(column.values_without_nulls())
示例#20
0
    def run(self, table):
        """
        :returns:
            A single value whose type is dependent on the type of the column.
        """
        column = table.columns[self._column_name]

        if not (isinstance(column.data_type, Number) or \
            isinstance(column.data_type, Date) or \
            isinstance(column.data_type, DateTime)):
            raise DataTypeError('Max can only be applied to columns containing DateTime or Number data.')

        return max(column.values_without_nulls())
示例#21
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'IQR can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        percentiles = column.aggregate(Percentiles())

        return percentiles[75] - percentiles[25]
示例#22
0
    def validate(self, table):
        if issequence(self._column_name):
            column_names = self._column_name
        else:
            column_names = [self._column_name]

        for column_name in column_names:
            column = table.columns[column_name]

            if not isinstance(column.data_type, Text):
                raise DataTypeError('Slug column must contain Text data.')

            if HasNulls(column_name).run(table):
                raise ValueError('Slug column cannot contain `None`.')
示例#23
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'MAD can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls_sorted()
        m = column.aggregate(Percentiles())[50]

        return median(tuple(abs(n - m) for n in data))
示例#24
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'PopulationVariance can only be applied to columns containing Number data.'
            )

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        mean = column.aggregate(Mean())

        return sum((n - mean)**2 for n in data) / len(data)
示例#25
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('IQR can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        percentiles = Percentiles(self._column_name).run(table)

        return percentiles[75] - percentiles[25]
示例#26
0
    def run(self, column):
        """
        :returns: :class:`decimal.Decimal`.
        """
        if not isinstance(column.data_type, Number):
            raise DataTypeError(
                'Mode can only be applied to columns containing Number data.')

        if column.aggregate(HasNulls()):
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        state = defaultdict(int)

        for n in data:
            state[n] += 1

        return max(state.keys(), key=lambda x: state[x])
示例#27
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Mean can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        sum_total = Sum(self._column_name).run(table)

        return sum_total / len(column.values_without_nulls())
示例#28
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('PopulationVariance can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        mean = Mean(self._column_name).run(table)

        return sum((n - mean) ** 2 for n in data) / len(data)
示例#29
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('MAD can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls_sorted()
        m = Median(self._column_name).run(table)

        return median(tuple(abs(n - m) for n in data))
示例#30
0
    def run(self, table):
        """
        :returns:
            :class:`decimal.Decimal`.
        """
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Mode can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

        data = column.values_without_nulls()
        state = defaultdict(int)

        for n in data:
            state[n] += 1

        return max(state.keys(), key=lambda x: state[x])