示例#1
0
文件: counting.py 项目: zli69/petl
def rowlengths(table):
    """
    Report on row lengths found in the table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', '2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', 'xyz', 9.0],
        ...          ['E', None],
        ...          ['F', 9]]
        >>> etl.rowlengths(table)
        +--------+-------+
        | length | count |
        +========+=======+
        |      3 |     3 |
        +--------+-------+
        |      2 |     2 |
        +--------+-------+
        |      4 |     1 |
        +--------+-------+

    Useful for finding potential problems in data files.

    """

    counter = Counter()
    for row in data(table):
        counter[len(row)] += 1
    output = [('length', 'count')]
    output.extend(counter.most_common())
    return wrap(output)
示例#2
0
文件: counting.py 项目: DeanWay/petl
def rowlengths(table):
    """
    Report on row lengths found in the table. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', '2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', 'xyz', 9.0],
        ...          ['E', None],
        ...          ['F', 9]]
        >>> etl.rowlengths(table)
        +--------+-------+
        | length | count |
        +========+=======+
        |      3 |     3 |
        +--------+-------+
        |      2 |     2 |
        +--------+-------+
        |      4 |     1 |
        +--------+-------+

    Useful for finding potential problems in data files.

    """

    counter = Counter()
    for row in data(table):
        counter[len(row)] += 1
    output = [('length', 'count')]
    output.extend(counter.most_common())
    return wrap(output)
示例#3
0
def _get_error_details(target, num, err, record, schema):
    '''show last row when failed writing for throubleshooting'''
    headers = _get_schema_header_names(schema)
    if isinstance(record, dict):
        table = [headers, list(record.values())]
    else:
        table = [headers, record]
    example = wrap(table).look()
    dest = " output: %s" % target if isinstance(target, string_types) else ''
    printed = "failed writing on row #%d: %s\n%s\n schema: %s\n%s"
    details = printed % (num, err, dest, schema, example)
    return details
示例#4
0
文件: counting.py 项目: zli69/petl
def stringpatterns(table, field):
    """
    Profile string patterns in the given field, returning a table of patterns,
    counts and frequencies. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['Mr. Foo', '123-1254'],
        ...          ['Mrs. Bar', '234-1123'],
        ...          ['Mr. Spo', '123-1254'],
        ...          [u'Mr. Baz', u'321 1434'],
        ...          [u'Mrs. Baz', u'321 1434'],
        ...          ['Mr. Quux', '123-1254-XX']]
        >>> etl.stringpatterns(table, 'foo')
        +------------+-------+---------------------+
        | pattern    | count | frequency           |
        +============+=======+=====================+
        | 'Aa. Aaa'  |     3 |                 0.5 |
        +------------+-------+---------------------+
        | 'Aaa. Aaa' |     2 |  0.3333333333333333 |
        +------------+-------+---------------------+
        | 'Aa. Aaaa' |     1 | 0.16666666666666666 |
        +------------+-------+---------------------+

        >>> etl.stringpatterns(table, 'bar')
        +---------------+-------+---------------------+
        | pattern       | count | frequency           |
        +===============+=======+=====================+
        | '999-9999'    |     3 |                 0.5 |
        +---------------+-------+---------------------+
        | '999 9999'    |     2 |  0.3333333333333333 |
        +---------------+-------+---------------------+
        | '999-9999-AA' |     1 | 0.16666666666666666 |
        +---------------+-------+---------------------+

    """

    counter = stringpatterncounter(table, field)
    output = [('pattern', 'count', 'frequency')]
    counter = counter.most_common()
    total = sum(c[1] for c in counter)
    cnts = [(c[0], c[1], float(c[1]) / total) for c in counter]
    output.extend(cnts)
    return wrap(output)
示例#5
0
文件: counting.py 项目: DeanWay/petl
def stringpatterns(table, field):
    """
    Profile string patterns in the given field, returning a table of patterns,
    counts and frequencies. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['Mr. Foo', '123-1254'],
        ...          ['Mrs. Bar', '234-1123'],
        ...          ['Mr. Spo', '123-1254'],
        ...          [u'Mr. Baz', u'321 1434'],
        ...          [u'Mrs. Baz', u'321 1434'],
        ...          ['Mr. Quux', '123-1254-XX']]
        >>> etl.stringpatterns(table, 'foo')
        +------------+-------+---------------------+
        | pattern    | count | frequency           |
        +============+=======+=====================+
        | 'Aa. Aaa'  |     3 |                 0.5 |
        +------------+-------+---------------------+
        | 'Aaa. Aaa' |     2 |  0.3333333333333333 |
        +------------+-------+---------------------+
        | 'Aa. Aaaa' |     1 | 0.16666666666666666 |
        +------------+-------+---------------------+

        >>> etl.stringpatterns(table, 'bar')
        +---------------+-------+---------------------+
        | pattern       | count | frequency           |
        +===============+=======+=====================+
        | '999-9999'    |     3 |                 0.5 |
        +---------------+-------+---------------------+
        | '999 9999'    |     2 |  0.3333333333333333 |
        +---------------+-------+---------------------+
        | '999-9999-AA' |     1 | 0.16666666666666666 |
        +---------------+-------+---------------------+

    """

    counter = stringpatterncounter(table, field)
    output = [('pattern', 'count', 'frequency')]
    counter = counter.most_common()
    total = sum(c[1] for c in counter)
    cnts = [(c[0], c[1], float(c[1])/total) for c in counter]
    output.extend(cnts)
    return wrap(output)