Exemplo n.º 1
0
def rowlengths(table):
    """
    Report on row lengths found in the table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', '2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', 'xyz', 9.0],
        ...          ['E', None],
        ...          ['F', 9]]
        >>> etl.rowlengths(table)
        +--------+-------+
        | length | count |
        +========+=======+
        |      3 |     3 |
        +--------+-------+
        |      2 |     2 |
        +--------+-------+
        |      4 |     1 |
        +--------+-------+

    Useful for finding potential problems in data files.

    """

    counter = Counter()
    for row in data(table):
        counter[len(row)] += 1
    output = [('length', 'count')]
    output.extend(counter.most_common())
    return wrap(output)
Exemplo n.º 2
0
def rowlengths(table):
    """
    Report on row lengths found in the table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', '2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', 'xyz', 9.0],
        ...          ['E', None],
        ...          ['F', 9]]
        >>> etl.rowlengths(table)
        +--------+-------+
        | length | count |
        +========+=======+
        |      3 |     3 |
        +--------+-------+
        |      2 |     2 |
        +--------+-------+
        |      4 |     1 |
        +--------+-------+

    Useful for finding potential problems in data files.

    """

    counter = Counter()
    for row in data(table):
        counter[len(row)] += 1
    output = [('length', 'count')]
    output.extend(counter.most_common())
    return wrap(output)
Exemplo n.º 3
0
def tojsonarrays(table,
                 source=None,
                 prefix=None,
                 suffix=None,
                 output_header=False,
                 *args,
                 **kwargs):
    """
    Write a table in JSON format, with rows output as JSON arrays. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 2]]
        >>> etl.tojsonarrays(table1, 'example.file4.json')
        >>> # check what it did
        ... print(open('example.file4.json').read())
        [["a", 1], ["b", 2], ["c", 2]]

    Note that this is currently not streaming, all data is loaded into memory
    before being written to the file.

    """

    if output_header:
        obj = list(table)
    else:
        obj = list(data(table))
    _writejson(source, obj, prefix, suffix, *args, **kwargs)
Exemplo n.º 4
0
def counts(request, uuid):
    try:
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    columns_str = request.GET.get('columns', '')
    columns = sorted([c for c in columns_str.split(',') if c.strip()])
    for column in columns:
        if column not in header(people):
            return HttpResponseBadRequest('Bad request.')
    if not columns:
        return redirect(to=reverse('people_list', kwargs={'uuid': uuid}))

    counts = valuecounts(people, *columns)
    counts = cutout(counts, 'frequency')

    return render(
        request, 'counts.html', {
            'csvdownload': csvdownload,
            'columns': header(people),
            'headers': header(counts),
            'counts': data(counts),
            'queryparams': {
                'columns': columns
            }
        })
Exemplo n.º 5
0
Arquivo: xlsx.py Projeto: juarezr/petl
def appendxlsx(tbl, filename, sheet=None, write_header=False):
    """
    Appends rows to an existing Excel .xlsx file.
    """

    import openpyxl
    source = read_source_from_arg(filename)
    with source.open('rb') as source2:
        wb = openpyxl.load_workbook(filename=source2, read_only=False)
        if sheet is None:
            ws = wb[wb.sheetnames[0]]
        elif isinstance(sheet, int):
            ws = wb[wb.sheetnames[sheet]]
        else:
            ws = wb[str(sheet)]
        if write_header:
            it = iter(tbl)
            hdr = next(it)
            flds = list(map(text_type, hdr))
            rows = itertools.chain([flds], it)
        else:
            rows = data(tbl)
        for row in rows:
            ws.append(row)
        target = write_source_from_arg(filename)
        with target.open('wb') as target2:
            wb.save(target2)
Exemplo n.º 6
0
def toxlsx(tbl, filename, sheet=None, write_header=True, mode="replace"):
    """
    Write a table to a new Excel .xlsx file.

    N.B., the sheet name is case sensitive.

    The `mode` argument controls how the file and sheet are treated:

      - `replace`: This is the default. It either replaces or adds a
        named sheet, or if no sheet name is provided, all sheets
        (overwrites the entire file).

      - `overwrite`: Always overwrites the file. This produces a file
        with a single sheet.

      - `add`: Adds a new sheet. Raises `ValueError` if a named sheet
        already exists.

    The `sheet` argument can be omitted in all cases. The new sheet
    will then get a default name.
    If the file does not exist, it will be created, unless `replace`
    mode is used with a named sheet. In the latter case, the file
    must exist and be a valid .xlsx file.
    """
    wb = _load_or_create_workbook(filename, mode, sheet)
    ws = _insert_sheet_on_workbook(mode, sheet, wb)
    if write_header:
        rows = tbl
    else:
        rows = data(tbl)
    for row in rows:
        ws.append(row)
    target = write_source_from_arg(filename)
    with target.open('wb') as target2:
        wb.save(target2)
def _insert(table, h5table):
    it = data(table)  # don't need header
    for row in it:
        for i, f in enumerate(h5table.colnames):
            # depends on order of fields being the same in input table
            # and hd5 table, but field names don't need to match
            h5table.row[f] = row[i]
        h5table.row.append()
    h5table.flush()
Exemplo n.º 8
0
def _insert(table, h5table):
    it = data(table)  # don't need header
    for row in it:
        for i, f in enumerate(h5table.colnames):
            # depends on order of fields being the same in input table
            # and hd5 table, but field names don't need to match
            h5table.row[f] = row[i]
        h5table.row.append()
    h5table.flush() 
Exemplo n.º 9
0
def nrows(table):
    """
    Count the number of data rows in a table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'], ['a', 1], ['b', 2]]
        >>> etl.nrows(table)
        2

    """

    return sum(1 for _ in data(table))
Exemplo n.º 10
0
def nrows(table):
    """
    Count the number of data rows in a table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'], ['a', 1], ['b', 2]]
        >>> etl.nrows(table)
        2

    """

    return sum(1 for _ in data(table))
Exemplo n.º 11
0
def _writecsv(table, source, mode, write_header, encoding, errors, **csvargs):
    rows = table if write_header else data(table)
    with source.open(mode) as buf:
        # wrap buffer for text IO
        csvfile = io.TextIOWrapper(buf, encoding=encoding, errors=errors,
                                   newline='')
        try:
            writer = csv.writer(csvfile, **csvargs)
            for row in rows:
                writer.writerow(row)
            csvfile.flush()
        finally:
            csvfile.detach()
Exemplo n.º 12
0
def _writecsv(table, source, mode, write_header, encoding, errors, **csvargs):
    rows = table if write_header else data(table)
    with source.open(mode) as buf:
        # wrap buffer for text IO
        csvfile = io.TextIOWrapper(buf,
                                   encoding=encoding,
                                   errors=errors,
                                   newline='')
        try:
            writer = csv.writer(csvfile, **csvargs)
            for row in rows:
                writer.writerow(row)
            csvfile.flush()
        finally:
            csvfile.detach()
Exemplo n.º 13
0
def toxlsx(tbl, filename, sheet=None, write_header=True):
    """
    Write a table to a new Excel .xlsx file.

    """

    import openpyxl
    wb = openpyxl.Workbook(write_only=True)
    ws = wb.create_sheet(title=sheet)
    if write_header:
        rows = tbl
    else:
        rows = data(tbl)
    for row in rows:
        ws.append(row)
    wb.save(filename)
Exemplo n.º 14
0
def _writecsv(table, source, mode, write_header, encoding, errors, **csvargs):
    rows = table if write_header else data(table)
    with source.open(mode) as buf:

        # determine encoding
        codec = getcodec(encoding)

        # ascii
        if codec.name == "ascii":
            # bypass encoding
            writer = csv.writer(buf, **csvargs)

        # non-ascii
        else:
            writer = UnicodeWriter(buf, encoding=encoding, errors=errors, **csvargs)

        for row in rows:
            writer.writerow(row)
Exemplo n.º 15
0
def itercrossjoin(sources, prefix):

    # construct fields
    outhdr = list()
    for i, s in enumerate(sources):
        if prefix:
            # use one-based numbering
            outhdr.extend([str(i+1) + '_' + str(f) for f in header(s)])
        else:
            outhdr.extend(header(s))
    yield tuple(outhdr)

    datasrcs = [data(src) for src in sources]
    for prod in itertools.product(*datasrcs):
        outrow = list()
        for row in prod:
            outrow.extend(row)
        yield tuple(outrow)
Exemplo n.º 16
0
def _writecsv(table, source, mode, write_header, encoding, errors, **csvargs):
    rows = table if write_header else data(table)
    with source.open(mode) as buf:

        # determine encoding
        codec = getcodec(encoding)

        # ascii
        if codec.name == 'ascii':
            # bypass encoding
            writer = csv.writer(buf, **csvargs)

        # non-ascii
        else:
            writer = UnicodeWriter(buf, encoding=encoding, errors=errors,
                                   **csvargs)

        for row in rows:
            writer.writerow(row)
Exemplo n.º 17
0
def appendxlsx(tbl, filename, sheet=None, write_header=False):
    """
    Appends rows to an existing Excel .xlsx file.
    """

    import openpyxl
    wb = openpyxl.load_workbook(filename=filename, read_only=False)
    if sheet is None:
        ws = wb[wb.sheetnames[0]]
    elif isinstance(sheet, int):
        ws = wb[wb.sheetnames[sheet]]
    else:
        ws = wb[str(sheet)]
    if write_header:
        rows = tbl
    else:
        rows = data(tbl)
    for row in rows:
        ws.append(row)
    wb.save(filename)
Exemplo n.º 18
0
def people_list(request, uuid):
    try:
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    sortby = request.GET.get('sortby', 'name')
    ordering = request.GET.get('ordering', 'asc')
    count_str = request.GET.get('count', '10')

    if sortby not in header(people):
        return HttpResponseBadRequest('Bad request.')
    if ordering not in ('asc', 'desc'):
        return HttpResponseBadRequest('Bad request.')
    try:
        count = int(count_str)
    except ValueError:
        return HttpResponseBadRequest('Bad request.')
    if count < 1:
        return HttpResponseBadRequest('Bad request.')

    people = sort(people, sortby, reverse=ordering == 'desc')
    people = head(people, count)

    return render(
        request, 'people_list.html', {
            'csvdownload': csvdownload,
            'headers': header(people),
            'people': data(people),
            'has_more': len(people) > count,
            'queryparams': {
                'sortby': sortby,
                'ordering': ordering,
                'count': str(count + 10)
            }
        })
Exemplo n.º 19
0
Arquivo: json.py Projeto: sv1jsb/petl
def tojsonarrays(table, source=None, prefix=None, suffix=None, output_header=False, *args, **kwargs):
    """
    Write a table in JSON format, with rows output as JSON arrays. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 2]]
        >>> etl.tojsonarrays(table1, 'example.json')
        >>> # check what it did
        ... print(open('example.json').read())
        [["a", 1], ["b", 2], ["c", 2]]

    Note that this is currently not streaming, all data is loaded into memory
    before being written to the file.

    """

    if output_header:
        obj = list(table)
    else:
        obj = list(data(table))
    _writejson(source, obj, prefix, suffix, *args, **kwargs)
Exemplo n.º 20
0
def test_data():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = data(table)
    expect = (('a', 1), ('b', 2))
    ieq(expect, actual)
Exemplo n.º 21
0
 def __iter__(self):
     for row in data(self.table):
         for value in row:
             yield value
Exemplo n.º 22
0
def test_data():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = data(table)
    expect = (('a', 1), ('b', 2))
    ieq(expect, actual)