def test_reader_func(self): with open('examples/testfixed_converted.csv') as f: csv_reader = csv.reader(f) csv_header = next(csv_reader) csv_data = list(csv_reader) with open('examples/testfixed') as f: with open('examples/testfixed_schema.csv') as schema_f: fixed_reader = fixed.reader(f, schema_f) fixed_data = list(fixed_reader) self.assertEqual(csv_header, fixed_reader.fieldnames) self.assertEqual(csv_data, fixed_data)
def __init__(self, f, schema_f): from agate import csv self.file = f self.fields = [] reader = csv.reader(schema_f) header = next(reader) if header != ['column', 'start', 'length']: raise ValueError('Schema must contain exactly three columns: "column", "start", and "length".') for row in reader: self.fields.append(Field(row[0], int(row[1]), int(row[2])))
def __init__(self, f, schema_f): from agate import csv self.file = f self.fields = [] reader = csv.reader(schema_f) header = next(reader) if header != ['column', 'start', 'length']: raise ValueError( 'Schema must contain exactly three columns: "column", "start", and "length".' ) for row in reader: self.fields.append(Field(row[0], int(row[1]), int(row[2])))
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0, encoding='utf-8', **kwargs): """ Create a new table from a CSV. This method uses agate's builtin CSV reader, which supplies encoding support for both Python 2 and Python 3. :code:`kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. :param column_names: See :meth:`.Table.__init__`. :param column_types: See :meth:`.Table.__init__`. :param row_names: See :meth:`.Table.__init__`. :param skip_lines: Either a single number indicating the number of lines to skip from the top of the file or a sequence of line indexes to skip where the first line is index 0. :param header: If `True`, the first row of the CSV is assumed to contains headers and will be skipped. If `header` and `column_names` are both specified then a row will be skipped, but `column_names` will be used. :param sniff_limit: Limit CSV dialect sniffing to the specified number of bytes. Set to None to sniff the entire file. Defaults to 0 or no sniffing. :param encoding: Character encoding of the CSV file. Note: if passing in a file handle it is assumed you have already opened it with the correct encoding specified. """ from agate import csv from agate.table import Table if hasattr(path, 'read'): lines = path.readlines() else: with io.open(path, encoding=encoding) as f: lines = f.readlines() if utils.issequence(skip_lines): lines = [line for i, line in enumerate(lines) if i not in skip_lines] contents = ''.join(lines) elif isinstance(skip_lines, int): contents = ''.join(lines[skip_lines:]) else: raise ValueError('skip_lines argument must be an int or sequence') if sniff_limit is None: kwargs['dialect'] = csv.Sniffer().sniff(contents) elif sniff_limit > 0: kwargs['dialect'] = csv.Sniffer().sniff(contents[:sniff_limit]) if six.PY2: contents = contents.encode('utf-8') rows = list(csv.reader(six.StringIO(contents), header=header, **kwargs)) if header: if column_names is None: column_names = rows.pop(0) else: rows.pop(0) return Table(rows, column_names, column_types, row_names=row_names)
def from_csv(cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0, encoding='utf-8', row_limit=None, **kwargs): """ Create a new table from a CSV. This method uses agate's builtin CSV reader, which supplies encoding support for both Python 2 and Python 3. :code:`kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. If a file-like object is specified, it must be seekable. If using Python 2, the file should be opened in binary mode (`rb`). :param column_names: See :meth:`.Table.__init__`. :param column_types: See :meth:`.Table.__init__`. :param row_names: See :meth:`.Table.__init__`. :param skip_lines: The number of lines to skip from the top of the file. :param header: If :code:`True`, the first row of the CSV is assumed to contain column names. If :code:`header` and :code:`column_names` are both specified then a row will be skipped, but :code:`column_names` will be used. :param sniff_limit: Limit CSV dialect sniffing to the specified number of bytes. Set to None to sniff the entire file. Defaults to 0 (no sniffing). :param encoding: Character encoding of the CSV file. Note: if passing in a file handle it is assumed you have already opened it with the correct encoding specified. :param row_limit: Limit how many rows of data will be read. """ from agate import csv from agate.table import Table close = False try: if hasattr(path, 'read'): f = path else: if six.PY2: f = open(path, 'Urb') else: f = io.open(path, encoding=encoding) close = True if isinstance(skip_lines, int): while skip_lines > 0: f.readline() skip_lines -= 1 else: raise ValueError('skip_lines argument must be an int') contents = six.StringIO(f.read()) if sniff_limit is None: kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()) elif sniff_limit > 0: kwargs['dialect'] = csv.Sniffer().sniff(contents.getvalue()[:sniff_limit]) if six.PY2: kwargs['encoding'] = encoding reader = csv.reader(contents, header=header, **kwargs) if header: if column_names is None: column_names = next(reader) else: next(reader) if row_limit is None: rows = tuple(reader) else: rows = tuple(itertools.islice(reader, row_limit)) finally: if close: f.close() return Table(rows, column_names, column_types, row_names=row_names)
def from_csv( cls, path, column_names=None, column_types=None, row_names=None, skip_lines=0, header=True, sniff_limit=0, encoding="utf-8", **kwargs ): """ Create a new table from a CSV. This method uses agate's builtin CSV reader, which supplies encoding support for both Python 2 and Python 3. :code:`kwargs` will be passed through to the CSV reader. :param path: Filepath or file-like object from which to read CSV data. If a file-like object is specified, it must be seekable. :param column_names: See :meth:`.Table.__init__`. :param column_types: See :meth:`.Table.__init__`. :param row_names: See :meth:`.Table.__init__`. :param skip_lines: The number of lines to skip from the top of the file. :param header: If :code:`True`, the first row of the CSV is assumed to contain column names. If :code:`header` and :code:`column_names` are both specified then a row will be skipped, but :code:`column_names` will be used. :param sniff_limit: Limit CSV dialect sniffing to the specified number of bytes. Set to None to sniff the entire file. Defaults to 0 (no sniffing). :param encoding: Character encoding of the CSV file. Note: if passing in a file handle it is assumed you have already opened it with the correct encoding specified. """ from agate import csv from agate.table import Table close = False if hasattr(path, "read"): f = path else: f = io.open(path, encoding=encoding) close = True if isinstance(skip_lines, int): while skip_lines > 0: f.readline() skip_lines -= 1 else: raise ValueError("skip_lines argument must be an int") start = f.tell() if sniff_limit is None: kwargs["dialect"] = csv.Sniffer().sniff(f.read()) elif sniff_limit > 0: kwargs["dialect"] = csv.Sniffer().sniff(f.read(sniff_limit)) f.seek(start) if six.PY2: f = six.StringIO(f.read().encode("utf-8")) reader = csv.reader(f, header=header, **kwargs) if header: if column_names is None: column_names = next(reader) else: next(reader) rows = tuple(reader) if close: f.close() return Table(rows, column_names, column_types, row_names=row_names)