示例#1
0
    def test_max_length(self):
        rows = [
            Row(['a'], ['test']),
            Row(['gobble'], ['test']),
            Row(['w'], ['test']),
        ]

        column = Column(0, 'test', Text(), rows)
        self.assertEqual(column.aggregate(MaxLength()), 6)
示例#2
0
    def test_max_length(self):
        rows = [
            Row(['a'], ['test']),
            Row(['gobble'], ['test']),
            Row(['w'], ['test']),
        ]

        column = Column(0, 'test', Text(), rows)
        self.assertEqual(column.aggregate(MaxLength()), 6)
示例#3
0
    def test_max(self):
        rows = [
            Row([datetime.datetime(1994, 3, 3, 6, 31)], ['test']),
            Row([datetime.datetime(1994, 3, 3, 6, 30, 30)], ['test']),
            Row([datetime.datetime(1994, 3, 3, 6, 30)], ['test']),
        ]

        column = Column(0, 'test', DateTime(), rows)

        self.assertIsInstance(Max().get_aggregate_data_type(column), DateTime)
        self.assertEqual(column.aggregate(Max()), datetime.datetime(1994, 3, 3, 6, 31))
示例#4
0
    def test_all(self):
        rows = [
            Row([True], ['test']),
            Row([True], ['test']),
            Row([None], ['test']),
        ]
        column = Column(0, 'test', Boolean(), rows)
        self.assertEqual(column.aggregate(All()), False)

        rows = [
            Row([True], ['test']),
            Row([True], ['test']),
            Row([True], ['test']),
        ]
        column = Column(0, 'test', Boolean(), rows)
        self.assertEqual(column.aggregate(All()), True)
示例#5
0
    def test_all(self):
        rows = [
            Row([True], ['test']),
            Row([True], ['test']),
            Row([None], ['test']),
        ]
        column = Column(0, 'test', Boolean(), rows)
        self.assertEqual(column.aggregate(All()), False)

        rows = [
            Row([True], ['test']),
            Row([True], ['test']),
            Row([True], ['test']),
        ]
        column = Column(0, 'test', Boolean(), rows)
        self.assertEqual(column.aggregate(All()), True)
示例#6
0
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        # Validate column names
        if column_names:
            final_column_names = []

            for i, column_name in enumerate(column_names):
                if column_name is None:
                    final_column_names.append(utils.letter_name(i))
                elif isinstance(column_name, six.string_types):
                    final_column_names.append(column_name)
                else:
                    raise ValueError('Column names must be strings or None.')

            if len(set(final_column_names)) != len(final_column_names):
                raise ValueError('Duplicate column names are not allowed.')

            self._column_names = tuple(final_column_names)
        else:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, TypeTester):
            pass
        else:
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row,
                                [None] * (len(self.column_names) - len_row))

                new_rows.append(
                    Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)),
                        self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif isinstance(row_names, Sequence):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i, (name, data_type) in enumerate(
                zip(self._column_names, self._column_types)):
            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)
            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
示例#7
0
    def __init__(self, rows, column_info, row_names=None, _is_fork=False):
        column_info = list(column_info)

        if isinstance(column_info[0], Column):
            self._column_names = tuple(c.name for c in column_info)
            self._column_types = tuple(c.data_type for c in column_info)
        else:
            column_names, self._column_types = zip(*column_info)

            self._column_names = []

            # Validation
            for i, column_name in enumerate(column_names):
                if not column_name:
                    self._column_names.append(letter_name(i))
                else:
                    if not isinstance(column_name, six.string_types):
                        raise ValueError('Column names must be strings.')

                    self._column_names.append(column_name)

            len_column_names = len(self._column_names)

            if len(set(self._column_names)) != len_column_names:
                raise ValueError('Duplicate column names are not allowed.')

            self._column_names = tuple(self._column_names)

            for column_type in self._column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError('Column types must be instances of DataType.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError('Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len(self.column_names) - len_row))

                new_rows.append(Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif isinstance(row_names, Sequence):
                computed_row_names = row_names
            else:
                raise ValueError('row_names must be a column name, function or sequence')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i, (name, data_type) in enumerate(zip(self._column_names, self._column_types)):
            column = Column(i, name, data_type, self._rows, row_names=self._row_names)
            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
示例#8
0
文件: __init__.py 项目: ejmurra/agate
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError(
                'When created directly, the first argument to Table must be a sequence of rows. Did you want agate.Table.from_csv?'
            )

        # Validate column names
        if column_names:
            final_column_names = []

            for i, column_name in enumerate(column_names):
                if column_name is None:
                    new_column_name = utils.letter_name(i)
                    warnings.warn(
                        'Column name not specified. "%s" will be used as name.'
                        % new_column_name, RuntimeWarning)
                elif isinstance(column_name, six.string_types):
                    new_column_name = column_name
                else:
                    raise ValueError('Column names must be strings or None.')

                final_column_name = new_column_name
                duplicates = 0
                while final_column_name in final_column_names:
                    final_column_name = new_column_name + '_' + str(
                        duplicates + 2)
                    duplicates += 1

                if duplicates > 0:
                    warn_duplicate_column(new_column_name, final_column_name)

                final_column_names.append(final_column_name)

            self._column_names = tuple(final_column_names)
        elif rows:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn(
                'Column names not specified. "%s" will be used as names.' %
                str(self._column_names),
                RuntimeWarning,
                stacklevel=2)
        else:
            self._column_names = []

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()

        elif isinstance(column_types, dict):
            for v in six.itervalues(column_types):
                if not isinstance(v, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')
            column_types = TypeTester(force=column_types)

        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row,
                                [None] * (len(self.column_names) - len_row))

                new_rows.append(
                    Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)),
                        self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i, (name, data_type) in enumerate(
                zip(self._column_names, self._column_types)):
            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)
            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
示例#9
0
    def test_max_length_invalid(self):
        column = Column(0, 'test', Number(), ([1], [2], [3]))

        with self.assertRaises(DataTypeError):
            column.aggregate(MaxLength())
示例#10
0
    def __init__(self,
                 rows,
                 column_names=None,
                 column_types=None,
                 row_names=None,
                 _is_fork=False):
        if isinstance(rows, six.string_types):
            raise ValueError(
                'When created directly, the first argument to Table must be a sequence of rows. '
                'Did you want agate.Table.from_csv?')

        # Validate column names
        if column_names:
            self._column_names = utils.deduplicate(column_names,
                                                   column_names=True)
        elif rows:
            self._column_names = tuple(
                utils.letter_name(i) for i in range(len(rows[0])))
            warnings.warn(
                'Column names not specified. "%s" will be used as names.' %
                str(self._column_names),
                RuntimeWarning,
                stacklevel=2)
        else:
            self._column_names = tuple()

        len_column_names = len(self._column_names)

        # Validate column_types
        if column_types is None:
            column_types = TypeTester()
        elif isinstance(column_types, dict):
            for v in column_types.values():
                if not isinstance(v, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

            column_types = TypeTester(force=column_types)
        elif not isinstance(column_types, TypeTester):
            for column_type in column_types:
                if not isinstance(column_type, DataType):
                    raise ValueError(
                        'Column types must be instances of DataType.')

        if isinstance(column_types, TypeTester):
            self._column_types = column_types.run(rows, self._column_names)
        else:
            self._column_types = tuple(column_types)

        if len_column_names != len(self._column_types):
            raise ValueError(
                'column_names and column_types must be the same length.')

        if not _is_fork:
            new_rows = []
            cast_funcs = [c.cast for c in self._column_types]

            for i, row in enumerate(rows):
                len_row = len(row)

                if len_row > len_column_names:
                    raise ValueError(
                        'Row %i has %i values, but Table only has %i columns.'
                        % (i, len_row, len_column_names))
                elif len(row) < len_column_names:
                    row = chain(row, [None] * (len_column_names - len_row))

                row_values = []
                for j, d in enumerate(row):
                    try:
                        row_values.append(cast_funcs[j](d))
                    except CastError as e:
                        raise CastError(
                            str(e) + ' Error at row %s column %s.' %
                            (i, self._column_names[j]))

                new_rows.append(Row(row_values, self._column_names))
        else:
            new_rows = rows

        if row_names:
            computed_row_names = []

            if isinstance(row_names, six.string_types):
                for row in new_rows:
                    name = row[row_names]
                    computed_row_names.append(name)
            elif hasattr(row_names, '__call__'):
                for row in new_rows:
                    name = row_names(row)
                    computed_row_names.append(name)
            elif utils.issequence(row_names):
                computed_row_names = row_names
            else:
                raise ValueError(
                    'row_names must be a column name, function or sequence')

            for row_name in computed_row_names:
                if type(row_name) is int:
                    raise ValueError(
                        'Row names cannot be of type int. Use Decimal for numbered row names.'
                    )

            self._row_names = tuple(computed_row_names)
        else:
            self._row_names = None

        self._rows = MappedSequence(new_rows, self._row_names)

        # Build columns
        new_columns = []

        for i in range(len_column_names):
            name = self._column_names[i]
            data_type = self._column_types[i]

            column = Column(i,
                            name,
                            data_type,
                            self._rows,
                            row_names=self._row_names)

            new_columns.append(column)

        self._columns = MappedSequence(new_columns, self._column_names)
示例#11
0
    def test_max_length_invalid(self):
        column = Column(0, 'test', Number(), ([1], [2], [3]))

        with self.assertRaises(DataTypeError):
            column.aggregate(MaxLength())