def construct_dtype(flds, peek, dtype): import numpy as np if dtype is None: dtype = infer_dtype(peek) elif isinstance(dtype, string_types): # insert field names from source table typestrings = [s.strip() for s in dtype.split(',')] dtype = [(f, t) for f, t in zip(flds, typestrings)] elif (isinstance(dtype, dict) and ('names' not in dtype or 'formats' not in dtype)): # allow for partial specification of dtype cols = columns(peek) newdtype = {'names': [], 'formats': []} for f in flds: newdtype['names'].append(f) if f in dtype and isinstance(dtype[f], tuple): # assume fully specified newdtype['formats'].append(dtype[f][0]) elif f not in dtype: # not specified at all a = np.array(cols[f]) newdtype['formats'].append(a.dtype) else: # assume directly specified, just need to add offset newdtype['formats'].append(dtype[f]) dtype = newdtype return dtype
def make_sqlalchemy_table(table, tablename, schema=None, constraints=True, metadata=None): """ Create an SQLAlchemy table definition based on data in `table`. Keyword arguments: table : table container Table data to use to infer types etc. tablename : text Name of the table schema : text Name of the database schema to create the table in constraints : bool If True use length and nullable constraints metadata : sqlalchemy.MetaData Custom table metadata """ import sqlalchemy if not metadata: metadata = sqlalchemy.MetaData() sql_table = sqlalchemy.Table(tablename, metadata, schema=schema) cols = columns(table) flds = list(cols.keys()) for f in flds: sql_column = make_sqlalchemy_column(cols[f], f, constraints=constraints) sql_table.append_column(sql_column) return sql_table
def test_columns(): table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]] cols = columns(table) eq_(['a', 'b', 'b'], cols['foo']) eq_([1, 2, 3], cols['bar'])
def toarray(table, dtype=None, count=-1, sample=1000): """ Load data from the given `table` into a `numpy <http://www.numpy.org/>`_ structured array. E.g.:: >>> import petl as etl >>> table = [('foo', 'bar', 'baz'), ... ('apples', 1, 2.5), ... ('oranges', 3, 4.4), ... ('pears', 7, .1)] >>> a = etl.toarray(table) >>> a array([('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, 0.1)], dtype=[('foo', '<U7'), ('bar', '<i8'), ('baz', '<f8')]) >>> # the dtype can be specified as a string ... a = etl.toarray(table, dtype='a4, i2, f4') >>> a array([(b'appl', 1, 2.5), (b'oran', 3, 4.400000095367432), (b'pear', 7, 0.10000000149011612)], dtype=[('foo', 'S4'), ('bar', '<i2'), ('baz', '<f4')]) >>> # the dtype can also be partially specified ... a = etl.toarray(table, dtype={'foo': 'a4'}) >>> a array([(b'appl', 1, 2.5), (b'oran', 3, 4.4), (b'pear', 7, 0.1)], dtype=[('foo', 'S4'), ('bar', '<i8'), ('baz', '<f8')]) If the dtype is not completely specified, `sample` rows will be examined to infer an appropriate dtype. """ import numpy as np it = iter(table) peek, it = iterpeek(it, sample) hdr = next(it) flds = list(map(str, hdr)) if dtype is None: dtype = guessdtype(peek) elif isinstance(dtype, string_types): # insert field names from source table typestrings = [s.strip() for s in dtype.split(',')] dtype = [(f, t) for f, t in zip(flds, typestrings)] elif (isinstance(dtype, dict) and ('names' not in dtype or 'formats' not in dtype)): # allow for partial specification of dtype cols = columns(peek) newdtype = {'names': [], 'formats': []} for f in flds: newdtype['names'].append(f) if f in dtype and isinstance(dtype[f], tuple): # assume fully specified newdtype['formats'].append(dtype[f][0]) elif f not in dtype: # not specified at all a = np.array(cols[f]) newdtype['formats'].append(a.dtype) else: # assume directly specified, just need to add offset newdtype['formats'].append(dtype[f]) dtype = newdtype else: pass # leave dtype as-is # numpy is fussy about having tuples, need to make sure it = (tuple(row) for row in it) sa = np.fromiter(it, dtype=dtype, count=count) return sa