Пример #1
def test_cutout():
    table = (('foo', 'bar', 'baz'),
             ('A', 1, 2),
             ('B', '2', '3.4'),
             (u'B', u'3', u'7.8', True),
             ('D', 'xyz', 9.0),
             ('E', None))

    cut1 = cutout(table, 'bar', 'baz')
    expectation = (('foo',),
    ieq(expectation, cut1)
    cut2 = cutout(table, 'bar')
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut2)
    cut3 = cutout(table, 1)
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut3)
Пример #2
def test_cutout():
    table = (('foo', 'bar', 'baz'),
             ('A', 1, 2),
             ('B', '2', '3.4'),
             (u'B', u'3', u'7.8', True),
             ('D', 'xyz', 9.0),
             ('E', None))

    cut1 = cutout(table, 'bar', 'baz')
    expectation = (('foo',),
    ieq(expectation, cut1)
    cut2 = cutout(table, 'bar')
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut2)
    cut3 = cutout(table, 1)
    expectation = (('foo', 'baz'),
                   ('A', 2),
                   ('B', '3.4'),
                   (u'B', u'7.8'),
                   ('D', 9.0),
                   ('E', None))
    ieq(expectation, cut3)
Пример #3
def counts(request, uuid):
        csvdownload = CSVDownload.objects.get(uuid=uuid)
    except CSVDownload.DoesNotExist:
        return HttpResponseNotFound("Not found.")

    fname = '{0}.csv'.format(csvdownload.uuid)
    full_fname = os.path.join(settings.CSV_DIR, fname)
    people = fromcsv(full_fname)

    columns_str = request.GET.get('columns', '')
    columns = sorted([c for c in columns_str.split(',') if c.strip()])
    for column in columns:
        if column not in header(people):
            return HttpResponseBadRequest('Bad request.')
    if not columns:
        return redirect(to=reverse('people_list', kwargs={'uuid': uuid}))

    counts = valuecounts(people, *columns)
    counts = cutout(counts, 'frequency')

    return render(
        request, 'counts.html', {
            'csvdownload': csvdownload,
            'columns': header(people),
            'headers': header(counts),
            'counts': data(counts),
            'queryparams': {
                'columns': columns
Пример #4
def fetch_people_table():
    planet_fetcher = CachedPlanetFetcher()

    first_page_response = _fetch_people_page(1).json()
    total_count = first_page_response['count']
    fetched_results = first_page_response['results']
    fetched_count = len(fetched_results)
    remaining_count = total_count - fetched_count
    remaining_pages = math.ceil(remaining_count / fetched_count)

    table = fromdicts(fetched_results, header=PEOPLE_HEADER)

    with ThreadPoolExecutor(max_workers=8) as executor:
        response_futures = [
            executor.submit(_fetch_people_page, page_number)
            for page_number in range(2, 2 + remaining_pages)
        for future in as_completed(response_futures):
            page_response = future.result().json()
            table = cat(
                table, fromdicts(page_response['results'],

    table = addfields(table, [('date', lambda rec: datetime.fromisoformat(rec[
        'edited'].replace('Z', '+00:00')).date().isoformat())])
    table = cutout(table, 'edited')
    table = convert(
        table, 'homeworld', lambda homeworld_url: planet_fetcher.fetch(

    return table
Пример #5
def unjoin(table, value, key=None, autoincrement=(1, 1), presorted=False,
           buffersize=None, tempdir=None, cache=True):
    Split a table into two tables by reversing an inner join.

    E.g., if the join key is present in the table::

        >>> from petl import look, unjoin
        >>> look(table1)
        | 'foo' | 'bar' | 'baz'    |
        | 'A'   | 1     | 'apple'  |
        | 'B'   | 1     | 'apple'  |
        | 'C'   | 2     | 'orange' |

        >>> table2, table3 = unjoin(table1, 'baz', key='bar')
        >>> look(table2)
        | 'foo' | 'bar' |
        | 'A'   | 1     |
        | 'B'   | 1     |
        | 'C'   | 2     |

        >>> look(table3)
        | 'bar' | 'baz'    |
        | 1     | 'apple'  |
        | 2     | 'orange' |

    An integer join key can also be reconstructed, e.g.::

        >>> look(table4)
        | 'foo' | 'bar'    |
        | 'A'   | 'apple'  |
        | 'B'   | 'apple'  |
        | 'C'   | 'orange' |

        >>> table5, table6 = unjoin(table4, 'bar')
        >>> look(table5)
        | 'foo' | 'bar_id' |
        | 'A'   | 1        |
        | 'B'   | 1        |
        | 'C'   | 2        |

        >>> look(table6)
        | 'id' | 'bar'    |
        | 1    | 'apple'  |
        | 2    | 'orange' |

    .. versionadded:: 0.12


    if key is None:
        # first sort the table by the value field
        if presorted:
            tbl_sorted = table
            tbl_sorted = sort(table, value, buffersize=buffersize,
                              tempdir=tempdir, cache=cache)
        # on the left, return the original table but with the value field
        # replaced by an incrementing integer
        left = ConvertToIncrementingCounterView(tbl_sorted, value,
        # on the right, return a new table with distinct values from the
        # given field
        right = EnumerateDistinctView(tbl_sorted, value, autoincrement)
        # on the left, return distinct rows from the original table
        # with the value field cut out
        left = distinct(cutout(table, value))
        # on the right, return distinct rows from the original table
        # with all fields but the key and value cut out
        right = distinct(cut(table, key, value))
    return left, right
Пример #6
def unjoin(table,
           autoincrement=(1, 1),
    Split a table into two tables by reversing an inner join. E.g.::

        >>> import petl as etl
        >>> # join key is present in the table
        ... table1 = (('foo', 'bar', 'baz'),
        ...           ('A', 1, 'apple'),
        ...           ('B', 1, 'apple'),
        ...           ('C', 2, 'orange'))
        >>> table2, table3 = etl.unjoin(table1, 'baz', key='bar')
        >>> table2
        | foo | bar |
        | 'A' |   1 |
        | 'B' |   1 |
        | 'C' |   2 |

        >>> table3
        | bar | baz      |
        |   1 | 'apple'  |
        |   2 | 'orange' |

        >>> # an integer join key can also be reconstructed
        ... table4 = (('foo', 'bar'),
        ...           ('A', 'apple'),
        ...           ('B', 'apple'),
        ...           ('C', 'orange'))
        >>> table5, table6 = etl.unjoin(table4, 'bar')
        >>> table5
        | foo | bar_id |
        | 'A' |      1 |
        | 'B' |      1 |
        | 'C' |      2 |

        >>> table6
        | id | bar      |
        |  1 | 'apple'  |
        |  2 | 'orange' |

    The `autoincrement` parameter controls how an integer join key is
    reconstructed, and should be a tuple of (`start`, `step`).


    if key is None:
        # first sort the table by the value field
        if presorted:
            tbl_sorted = table
            tbl_sorted = sort(table,
        # on the left, return the original table but with the value field
        # replaced by an incrementing integer
        left = ConvertToIncrementingCounterView(tbl_sorted, value,
        # on the right, return a new table with distinct values from the
        # given field
        right = EnumerateDistinctView(tbl_sorted, value, autoincrement)
        # on the left, return distinct rows from the original table
        # with the value field cut out
        left = distinct(cutout(table, value))
        # on the right, return distinct rows from the original table
        # with all fields but the key and value cut out
        right = distinct(cut(table, key, value))
    return left, right
Пример #7
def unjoin(table, value, key=None, autoincrement=(1, 1), presorted=False,
           buffersize=None, tempdir=None, cache=True):
    Split a table into two tables by reversing an inner join. E.g.::

        >>> import petl as etl
        >>> # join key is present in the table
        ... table1 = (('foo', 'bar', 'baz'),
        ...           ('A', 1, 'apple'),
        ...           ('B', 1, 'apple'),
        ...           ('C', 2, 'orange'))
        >>> table2, table3 = etl.unjoin(table1, 'baz', key='bar')
        >>> table2
        | foo | bar |
        | 'A' |   1 |
        | 'B' |   1 |
        | 'C' |   2 |

        >>> table3
        | bar | baz      |
        |   1 | 'apple'  |
        |   2 | 'orange' |

        >>> # an integer join key can also be reconstructed
        ... table4 = (('foo', 'bar'),
        ...           ('A', 'apple'),
        ...           ('B', 'apple'),
        ...           ('C', 'orange'))
        >>> table5, table6 = etl.unjoin(table4, 'bar')
        >>> table5
        | foo | bar_id |
        | 'A' |      1 |
        | 'B' |      1 |
        | 'C' |      2 |

        >>> table6
        | id | bar      |
        |  1 | 'apple'  |
        |  2 | 'orange' |

    The `autoincrement` parameter controls how an integer join key is
    reconstructed, and should be a tuple of (`start`, `step`).


    if key is None:
        # first sort the table by the value field
        if presorted:
            tbl_sorted = table
            tbl_sorted = sort(table, value, buffersize=buffersize,
                              tempdir=tempdir, cache=cache)
        # on the left, return the original table but with the value field
        # replaced by an incrementing integer
        left = ConvertToIncrementingCounterView(tbl_sorted, value,
        # on the right, return a new table with distinct values from the
        # given field
        right = EnumerateDistinctView(tbl_sorted, value, autoincrement)
        # on the left, return distinct rows from the original table
        # with the value field cut out
        left = distinct(cutout(table, value))
        # on the right, return distinct rows from the original table
        # with all fields but the key and value cut out
        right = distinct(cut(table, key, value))
    return left, right