def from_csv(cls, path, column_info=None, row_names=None, header=True, **kwargs): """ Create a new table for a CSV. This method will use csvkit if it is available, otherwise it will use Python's builtin csv module. ``kwargs`` will be passed through to :meth:`csv.reader`. If you are using Python 2 and not using csvkit, this method is not unicode-safe. :param path: Filepath or file-like object from which to read CSV data. :param column_info: May be any valid input to :meth:`Table.__init__` or an instance of :class:`.TypeTester`. Or, None, in which case a generic :class:`.TypeTester` will be created. :param row_names: See :meth:`Table.__init__`. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. """ if column_info is None: column_info = TypeTester() use_inference = isinstance(column_info, TypeTester) if hasattr(path, 'read'): rows = list(csv.reader(path, **kwargs)) else: with open(path) as f: rows = list(csv.reader(f, **kwargs)) if header: column_names = rows.pop(0) else: column_names = [None] * len(rows[0]) if use_inference: column_info = column_info.run(rows, column_names) else: if len(column_names) != len(column_info): # TKTK Better Error raise ValueError('CSV contains more columns than were specified.') return Table(rows, column_info, row_names=row_names)
def __init__(self, rows, column_names=None, column_types=None, row_names=None, _is_fork=False): # Validate column names if column_names: final_column_names = [] for i, column_name in enumerate(column_names): if column_name is None: final_column_names.append(utils.letter_name(i)) elif isinstance(column_name, six.string_types): final_column_names.append(column_name) else: raise ValueError('Column names must be strings or None.') if len(set(final_column_names)) != len(final_column_names): raise ValueError('Duplicate column names are not allowed.') self._column_names = tuple(final_column_names) else: self._column_names = tuple( utils.letter_name(i) for i in range(len(rows[0]))) len_column_names = len(self._column_names) # Validate column_types if column_types is None: column_types = TypeTester() elif isinstance(column_types, TypeTester): pass else: for column_type in column_types: if not isinstance(column_type, DataType): raise ValueError( 'Column types must be instances of DataType.') if isinstance(column_types, TypeTester): self._column_types = column_types.run(rows, self._column_names) else: self._column_types = tuple(column_types) if len_column_names != len(self._column_types): raise ValueError( 'column_names and column_types must be the same length.') if not _is_fork: new_rows = [] cast_funcs = [c.cast for c in self._column_types] for i, row in enumerate(rows): len_row = len(row) if len_row > len_column_names: raise ValueError( 'Row %i has %i values, but Table only has %i columns.' % (i, len_row, len_column_names)) elif len(row) < len_column_names: row = chain(row, [None] * (len(self.column_names) - len_row)) new_rows.append( Row(tuple(cast_funcs[i](d) for i, d in enumerate(row)), self._column_names)) else: new_rows = rows if row_names: computed_row_names = [] if isinstance(row_names, six.string_types): for row in new_rows: name = row[row_names] computed_row_names.append(name) elif hasattr(row_names, '__call__'): for row in new_rows: name = row_names(row) computed_row_names.append(name) elif isinstance(row_names, Sequence): computed_row_names = row_names else: raise ValueError( 'row_names must be a column name, function or sequence') self._row_names = tuple(computed_row_names) else: self._row_names = None self._rows = MappedSequence(new_rows, self._row_names) # Build columns new_columns = [] for i, (name, data_type) in enumerate( zip(self._column_names, self._column_types)): column = Column(i, name, data_type, self._rows, row_names=self._row_names) new_columns.append(column) self._columns = MappedSequence(new_columns, self._column_names)