Exemplo n.º 1
0
def construct_dtype(flds, peek, dtype):
    import numpy as np

    if dtype is None:
        dtype = infer_dtype(peek)

    elif isinstance(dtype, string_types):
        # insert field names from source table
        typestrings = [s.strip() for s in dtype.split(',')]
        dtype = [(f, t) for f, t in zip(flds, typestrings)]

    elif (isinstance(dtype, dict)
          and ('names' not in dtype or 'formats' not in dtype)):
        # allow for partial specification of dtype
        cols = columns(peek)
        newdtype = {'names': [], 'formats': []}
        for f in flds:
            newdtype['names'].append(f)
            if f in dtype and isinstance(dtype[f], tuple):
                # assume fully specified
                newdtype['formats'].append(dtype[f][0])
            elif f not in dtype:
                # not specified at all
                a = np.array(cols[f])
                newdtype['formats'].append(a.dtype)
            else:
                # assume directly specified, just need to add offset
                newdtype['formats'].append(dtype[f])
        dtype = newdtype

    return dtype
Exemplo n.º 2
0
def construct_dtype(flds, peek, dtype):
    import numpy as np

    if dtype is None:
        dtype = infer_dtype(peek)

    elif isinstance(dtype, string_types):
        # insert field names from source table
        typestrings = [s.strip() for s in dtype.split(',')]
        dtype = [(f, t) for f, t in zip(flds, typestrings)]

    elif (isinstance(dtype, dict) and
          ('names' not in dtype or 'formats' not in dtype)):
        # allow for partial specification of dtype
        cols = columns(peek)
        newdtype = {'names': [], 'formats': []}
        for f in flds:
            newdtype['names'].append(f)
            if f in dtype and isinstance(dtype[f], tuple):
                # assume fully specified
                newdtype['formats'].append(dtype[f][0])
            elif f not in dtype:
                # not specified at all
                a = np.array(cols[f])
                newdtype['formats'].append(a.dtype)
            else:
                # assume directly specified, just need to add offset
                newdtype['formats'].append(dtype[f])
        dtype = newdtype

    return dtype
Exemplo n.º 3
0
def make_sqlalchemy_table(table, tablename, schema=None, constraints=True, metadata=None):
    """
    Create an SQLAlchemy table definition based on data in `table`.

    Keyword arguments:

    table : table container
        Table data to use to infer types etc.
    tablename : text
        Name of the table
    schema : text
        Name of the database schema to create the table in
    constraints : bool
        If True use length and nullable constraints
    metadata : sqlalchemy.MetaData
        Custom table metadata

    """

    import sqlalchemy

    if not metadata:
        metadata = sqlalchemy.MetaData()

    sql_table = sqlalchemy.Table(tablename, metadata, schema=schema)
    cols = columns(table)
    flds = list(cols.keys())
    for f in flds:
        sql_column = make_sqlalchemy_column(cols[f], f, constraints=constraints)
        sql_table.append_column(sql_column)

    return sql_table
Exemplo n.º 4
0
def make_sqlalchemy_table(table,
                          tablename,
                          schema=None,
                          constraints=True,
                          metadata=None):
    """
    Create an SQLAlchemy table definition based on data in `table`.

    Keyword arguments:

    table : table container
        Table data to use to infer types etc.
    tablename : text
        Name of the table
    schema : text
        Name of the database schema to create the table in
    constraints : bool
        If True use length and nullable constraints
    metadata : sqlalchemy.MetaData
        Custom table metadata

    """

    import sqlalchemy

    if not metadata:
        metadata = sqlalchemy.MetaData()

    sql_table = sqlalchemy.Table(tablename, metadata, schema=schema)
    cols = columns(table)
    flds = list(cols.keys())
    for f in flds:
        sql_column = make_sqlalchemy_column(cols[f],
                                            f,
                                            constraints=constraints)
        sql_table.append_column(sql_column)

    return sql_table
Exemplo n.º 5
0
def test_columns():

    table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
    cols = columns(table)
    eq_(['a', 'b', 'b'], cols['foo'])
    eq_([1, 2, 3], cols['bar'])
Exemplo n.º 6
0
def toarray(table, dtype=None, count=-1, sample=1000):
    """
    Load data from the given `table` into a
    `numpy <http://www.numpy.org/>`_ structured array. E.g.::

        >>> import petl as etl
        >>> table = [('foo', 'bar', 'baz'),
        ...          ('apples', 1, 2.5),
        ...          ('oranges', 3, 4.4),
        ...          ('pears', 7, .1)]
        >>> a = etl.toarray(table)
        >>> a
        array([('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)],
              dtype=[('foo', '<U7'), ('bar', '<i8'), ('baz', '<f8')])
        >>> # the dtype can be specified as a string
        ... a = etl.toarray(table, dtype='a4, i2, f4')
        >>> a
        array([(b'appl', 1, 2.5), (b'oran', 3, 4.400000095367432),
               (b'pear', 7, 0.10000000149011612)],
              dtype=[('foo', 'S4'), ('bar', '<i2'), ('baz', '<f4')])
        >>> # the dtype can also be partially specified
        ... a = etl.toarray(table, dtype={'foo': 'a4'})
        >>> a
        array([(b'appl', 1, 2.5), (b'oran', 3, 4.4), (b'pear', 7, 0.1)],
              dtype=[('foo', 'S4'), ('bar', '<i8'), ('baz', '<f8')])

    If the dtype is not completely specified, `sample` rows will be
    examined to infer an appropriate dtype.

    """
    
    import numpy as np
    it = iter(table)
    peek, it = iterpeek(it, sample)
    hdr = next(it)
    flds = list(map(str, hdr))

    if dtype is None:
        dtype = guessdtype(peek)

    elif isinstance(dtype, string_types):
        # insert field names from source table
        typestrings = [s.strip() for s in dtype.split(',')]
        dtype = [(f, t) for f, t in zip(flds, typestrings)]

    elif (isinstance(dtype, dict)
          and ('names' not in dtype or 'formats' not in dtype)):
        # allow for partial specification of dtype
        cols = columns(peek)
        newdtype = {'names': [], 'formats': []}
        for f in flds:
            newdtype['names'].append(f)
            if f in dtype and isinstance(dtype[f], tuple):
                # assume fully specified
                newdtype['formats'].append(dtype[f][0])
            elif f not in dtype:
                # not specified at all
                a = np.array(cols[f])
                newdtype['formats'].append(a.dtype)
            else:
                # assume directly specified, just need to add offset
                newdtype['formats'].append(dtype[f])
        dtype = newdtype

    else:
        pass  # leave dtype as-is

    # numpy is fussy about having tuples, need to make sure
    it = (tuple(row) for row in it)
    sa = np.fromiter(it, dtype=dtype, count=count)

    return sa
Exemplo n.º 7
0
def test_columns():

    table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
    cols = columns(table)
    eq_(['a', 'b', 'b'], cols['foo'])
    eq_([1, 2, 3], cols['bar'])