Exemplo n.º 1
0
def test_cutout():
    
    table = (('foo', 'bar', 'baz'),
             ('A', 1, 2),
             ('B', '2', '3.4'),
             (u'B', u'3', u'7.8', True),
             ('D', 'xyz', 9.0),
             ('E', None))

    cut1 = cutout(table, 'bar', 'baz')
    expectation = (('foo',),
                   ('A',),
                   ('B',),
                   (u'B',),
                   ('D',),
                   ('E',))
    ieq(expectation, cut1)
    
    cut2 = cutout(table, 'bar')
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut2)
    
    cut3 = cutout(table, 1)
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut3)
Exemplo n.º 2
0
def test_cutout():
    
    table = (('foo', 'bar', 'baz'),
             ('A', 1, 2),
             ('B', '2', '3.4'),
             (u'B', u'3', u'7.8', True),
             ('D', 'xyz', 9.0),
             ('E', None))

    cut1 = cutout(table, 'bar', 'baz')
    expectation = (('foo',),
                   ('A',),
                   ('B',),
                   (u'B',),
                   ('D',),
                   ('E',))
    ieq(expectation, cut1)
    
    cut2 = cutout(table, 'bar')
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut2)
    
    cut3 = cutout(table, 1)
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut3)
Exemplo n.º 3
0
def counts(request, uuid):
    try:
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    columns_str = request.GET.get('columns', '')
    columns = sorted([c for c in columns_str.split(',') if c.strip()])
    for column in columns:
        if column not in header(people):
            return HttpResponseBadRequest('Bad request.')
    if not columns:
        return redirect(to=reverse('people_list', kwargs={'uuid': uuid}))

    counts = valuecounts(people, *columns)
    counts = cutout(counts, 'frequency')

    return render(
        request, 'counts.html', {
            'csvdownload': csvdownload,
            'columns': header(people),
            'headers': header(counts),
            'counts': data(counts),
            'queryparams': {
                'columns': columns
            }
        })
Exemplo n.º 4
0
def fetch_people_table():
    planet_fetcher = CachedPlanetFetcher()

    first_page_response = _fetch_people_page(1).json()
    total_count = first_page_response['count']
    fetched_results = first_page_response['results']
    fetched_count = len(fetched_results)
    remaining_count = total_count - fetched_count
    remaining_pages = math.ceil(remaining_count / fetched_count)

    table = fromdicts(fetched_results, header=PEOPLE_HEADER)

    with ThreadPoolExecutor(max_workers=8) as executor:
        response_futures = [
            executor.submit(_fetch_people_page, page_number)
            for page_number in range(2, 2 + remaining_pages)
        ]
        for future in as_completed(response_futures):
            page_response = future.result().json()
            table = cat(
                table, fromdicts(page_response['results'],
                                 header=PEOPLE_HEADER))

    table = addfields(table, [('date', lambda rec: datetime.fromisoformat(rec[
        'edited'].replace('Z', '+00:00')).date().isoformat())])
    table = cutout(table, 'edited')
    table = convert(
        table, 'homeworld', lambda homeworld_url: planet_fetcher.fetch(
            homeworld_url).json()['name'])

    return table
Exemplo n.º 5
0
def unjoin(table, value, key=None, autoincrement=(1, 1), presorted=False,
           buffersize=None, tempdir=None, cache=True):
    """
    Split a table into two tables by reversing an inner join.

    E.g., if the join key is present in the table::

        >>> from petl import look, unjoin
        >>> look(table1)
        +-------+-------+----------+
        | 'foo' | 'bar' | 'baz'    |
        +=======+=======+==========+
        | 'A'   | 1     | 'apple'  |
        +-------+-------+----------+
        | 'B'   | 1     | 'apple'  |
        +-------+-------+----------+
        | 'C'   | 2     | 'orange' |
        +-------+-------+----------+

        >>> table2, table3 = unjoin(table1, 'baz', key='bar')
        >>> look(table2)
        +-------+-------+
        | 'foo' | 'bar' |
        +=======+=======+
        | 'A'   | 1     |
        +-------+-------+
        | 'B'   | 1     |
        +-------+-------+
        | 'C'   | 2     |
        +-------+-------+

        >>> look(table3)
        +-------+----------+
        | 'bar' | 'baz'    |
        +=======+==========+
        | 1     | 'apple'  |
        +-------+----------+
        | 2     | 'orange' |
        +-------+----------+

    An integer join key can also be reconstructed, e.g.::

        >>> look(table4)
        +-------+----------+
        | 'foo' | 'bar'    |
        +=======+==========+
        | 'A'   | 'apple'  |
        +-------+----------+
        | 'B'   | 'apple'  |
        +-------+----------+
        | 'C'   | 'orange' |
        +-------+----------+

        >>> table5, table6 = unjoin(table4, 'bar')
        >>> look(table5)
        +-------+----------+
        | 'foo' | 'bar_id' |
        +=======+==========+
        | 'A'   | 1        |
        +-------+----------+
        | 'B'   | 1        |
        +-------+----------+
        | 'C'   | 2        |
        +-------+----------+

        >>> look(table6)
        +------+----------+
        | 'id' | 'bar'    |
        +======+==========+
        | 1    | 'apple'  |
        +------+----------+
        | 2    | 'orange' |
        +------+----------+

    .. versionadded:: 0.12

    """

    if key is None:
        # first sort the table by the value field
        if presorted:
            tbl_sorted = table
        else:
            tbl_sorted = sort(table, value, buffersize=buffersize,
                              tempdir=tempdir, cache=cache)
        # on the left, return the original table but with the value field
        # replaced by an incrementing integer
        left = ConvertToIncrementingCounterView(tbl_sorted, value,
                                                autoincrement)
        # on the right, return a new table with distinct values from the
        # given field
        right = EnumerateDistinctView(tbl_sorted, value, autoincrement)
    else:
        # on the left, return distinct rows from the original table
        # with the value field cut out
        left = distinct(cutout(table, value))
        # on the right, return distinct rows from the original table
        # with all fields but the key and value cut out
        right = distinct(cut(table, key, value))
    return left, right
Exemplo n.º 6
0
def unjoin(table,
           value,
           key=None,
           autoincrement=(1, 1),
           presorted=False,
           buffersize=None,
           tempdir=None,
           cache=True):
    """
    Split a table into two tables by reversing an inner join. E.g.::

        >>> import petl as etl
        >>> # join key is present in the table
        ... table1 = (('foo', 'bar', 'baz'),
        ...           ('A', 1, 'apple'),
        ...           ('B', 1, 'apple'),
        ...           ('C', 2, 'orange'))
        >>> table2, table3 = etl.unjoin(table1, 'baz', key='bar')
        >>> table2
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   1 |
        +-----+-----+
        | 'B' |   1 |
        +-----+-----+
        | 'C' |   2 |
        +-----+-----+

        >>> table3
        +-----+----------+
        | bar | baz      |
        +=====+==========+
        |   1 | 'apple'  |
        +-----+----------+
        |   2 | 'orange' |
        +-----+----------+

        >>> # an integer join key can also be reconstructed
        ... table4 = (('foo', 'bar'),
        ...           ('A', 'apple'),
        ...           ('B', 'apple'),
        ...           ('C', 'orange'))
        >>> table5, table6 = etl.unjoin(table4, 'bar')
        >>> table5
        +-----+--------+
        | foo | bar_id |
        +=====+========+
        | 'A' |      1 |
        +-----+--------+
        | 'B' |      1 |
        +-----+--------+
        | 'C' |      2 |
        +-----+--------+

        >>> table6
        +----+----------+
        | id | bar      |
        +====+==========+
        |  1 | 'apple'  |
        +----+----------+
        |  2 | 'orange' |
        +----+----------+

    The `autoincrement` parameter controls how an integer join key is
    reconstructed, and should be a tuple of (`start`, `step`).

    """

    if key is None:
        # first sort the table by the value field
        if presorted:
            tbl_sorted = table
        else:
            tbl_sorted = sort(table,
                              value,
                              buffersize=buffersize,
                              tempdir=tempdir,
                              cache=cache)
        # on the left, return the original table but with the value field
        # replaced by an incrementing integer
        left = ConvertToIncrementingCounterView(tbl_sorted, value,
                                                autoincrement)
        # on the right, return a new table with distinct values from the
        # given field
        right = EnumerateDistinctView(tbl_sorted, value, autoincrement)
    else:
        # on the left, return distinct rows from the original table
        # with the value field cut out
        left = distinct(cutout(table, value))
        # on the right, return distinct rows from the original table
        # with all fields but the key and value cut out
        right = distinct(cut(table, key, value))
    return left, right
Exemplo n.º 7
0
def unjoin(table, value, key=None, autoincrement=(1, 1), presorted=False,
           buffersize=None, tempdir=None, cache=True):
    """
    Split a table into two tables by reversing an inner join. E.g.::

        >>> import petl as etl
        >>> # join key is present in the table
        ... table1 = (('foo', 'bar', 'baz'),
        ...           ('A', 1, 'apple'),
        ...           ('B', 1, 'apple'),
        ...           ('C', 2, 'orange'))
        >>> table2, table3 = etl.unjoin(table1, 'baz', key='bar')
        >>> table2
        +-----+-----+
        | foo | bar |
        +=====+=====+
        | 'A' |   1 |
        +-----+-----+
        | 'B' |   1 |
        +-----+-----+
        | 'C' |   2 |
        +-----+-----+

        >>> table3
        +-----+----------+
        | bar | baz      |
        +=====+==========+
        |   1 | 'apple'  |
        +-----+----------+
        |   2 | 'orange' |
        +-----+----------+

        >>> # an integer join key can also be reconstructed
        ... table4 = (('foo', 'bar'),
        ...           ('A', 'apple'),
        ...           ('B', 'apple'),
        ...           ('C', 'orange'))
        >>> table5, table6 = etl.unjoin(table4, 'bar')
        >>> table5
        +-----+--------+
        | foo | bar_id |
        +=====+========+
        | 'A' |      1 |
        +-----+--------+
        | 'B' |      1 |
        +-----+--------+
        | 'C' |      2 |
        +-----+--------+

        >>> table6
        +----+----------+
        | id | bar      |
        +====+==========+
        |  1 | 'apple'  |
        +----+----------+
        |  2 | 'orange' |
        +----+----------+

    The `autoincrement` parameter controls how an integer join key is
    reconstructed, and should be a tuple of (`start`, `step`).

    """

    if key is None:
        # first sort the table by the value field
        if presorted:
            tbl_sorted = table
        else:
            tbl_sorted = sort(table, value, buffersize=buffersize,
                              tempdir=tempdir, cache=cache)
        # on the left, return the original table but with the value field
        # replaced by an incrementing integer
        left = ConvertToIncrementingCounterView(tbl_sorted, value,
                                                autoincrement)
        # on the right, return a new table with distinct values from the
        # given field
        right = EnumerateDistinctView(tbl_sorted, value, autoincrement)
    else:
        # on the left, return distinct rows from the original table
        # with the value field cut out
        left = distinct(cutout(table, value))
        # on the right, return distinct rows from the original table
        # with all fields but the key and value cut out
        right = distinct(cut(table, key, value))
    return left, right