Exemplo n.º 1
0
def append_iterator_to_table(t, rows, dshape=None, **kwargs):
    assert not isinstance(t, type)
    rows = iter(rows)

    # We see if the sequence is of tuples or dicts
    # If tuples then we coerce them to dicts
    try:
        row = next(rows)
    except StopIteration:
        return
    rows = chain([row], rows)
    if isinstance(row, (tuple, list)):
        if dshape and isinstance(dshape.measure, datashape.Record):
            names = dshape.measure.names
            if set(names) != set(discover(t).measure.names):
                raise ValueError("Column names of incoming data don't match "
                                 "column names of existing SQL table\n"
                                 "Names in SQL table: %s\n"
                                 "Names from incoming data: %s\n" %
                                 (discover(t).measure.names, names))
        else:
            names = discover(t).measure.names
        rows = (dict(zip(names, row)) for row in rows)

    engine = t.bind
    with engine.connect() as conn:
        for chunk in partition_all(1000, rows):  # TODO: 1000 is hardcoded
            conn.execute(t.insert(), chunk)

    return t
Exemplo n.º 2
0
Arquivo: test_sql.py Projeto: EGQM/odo
def test_discover_numeric_column():
    assert discover(sa.String()) == datashape.string
    metadata = sa.MetaData()
    s = sa.Table('name', metadata,
                 sa.Column('name', sa.types.NUMERIC),)

    assert discover(s)
Exemplo n.º 3
0
 def _dtype(self):
     # we can't simply use .schema or .datashape because we may have a bare
     # integer, for example
     lhs, rhs = discover(self.lhs).measure, discover(self.rhs).measure
     if isinstance(lhs, Option) or isinstance(rhs, Option):
         return Option(ct.bool_)
     return ct.bool_
Exemplo n.º 4
0
    def _dtype(self):
        lmeasure = discover(self.lhs).measure
        rmeasure = discover(self.rhs).measure
        if not (isinstance(getattr(lmeasure, 'ty', lmeasure), String)):
            raise TypeError('can only interp strings got: %s' % lmeasure)

        return optionify(lmeasure, rmeasure, lmeasure)
Exemplo n.º 5
0
Arquivo: sql.py Projeto: jcrist/odo
def discover_sqlalchemy_selectable(t):
    ordering = dict((c, i) for i, c in enumerate(c for c in t.columns.keys()))
    records = list(sum([discover(c).parameters[0] for c in t.columns], ()))
    fkeys = [discover(fkey, t, parent_measure=Record(records)) for fkey in t.foreign_keys]
    for name, column in merge(*fkeys).items():
        records[ordering[name]] = (name, column)
    return var * Record(records)
Exemplo n.º 6
0
def test_categorical_pandas():
    df = pd.DataFrame({'x': list('a'*5 + 'b'*5 + 'c'*5),
                       'y': range(15)}, columns=['x', 'y'])
    df.x = df.x.astype('category')
    assert_dshape_equal(discover(df), 15 * Record([('x',
                        Categorical(['a', 'b', 'c'])), ('y', int64)]))
    assert_dshape_equal(discover(df.x), 15 * Categorical(['a', 'b', 'c']))
Exemplo n.º 7
0
def test_datetimetz_pandas():
    df = pd.DataFrame(
        OrderedDict([
            ('naive', pd.date_range('2014', periods=5)),
            ('Europe/Moscow', pd.date_range('2014', periods=5, tz='Europe/Moscow')),
            ('UTC', pd.date_range('2014', periods=5, tz='UTC')),
            ('US/Eastern', pd.date_range('2014', periods=5, tz='US/Eastern')),
        ])
    )

    assert_dshape_equal(
        discover(df),
        5 * Record[
            'naive': Option(DateTime(tz=None)),
            'Europe/Moscow': Option(DateTime(tz='Europe/Moscow')),
            'UTC': Option(DateTime(tz='UTC')),
            'US/Eastern': Option(DateTime(tz='US/Eastern')),
        ]
    )

    assert_dshape_equal(discover(df.naive), 5 * Option(DateTime(tz=None)))
    for tz in ('Europe/Moscow', 'UTC', 'US/Eastern'):
        assert_dshape_equal(
            discover(df[tz]),
            5 * Option(DateTime(tz=tz))
        )
Exemplo n.º 8
0
 def _dtype(self):
     lhs, rhs = discover(self.lhs).measure, discover(self.rhs).measure
     is_unsigned = lhs in unsigned and rhs in unsigned
     max_width = max(lhs.itemsize, rhs.itemsize)
     prefix = 'u' if is_unsigned else ''
     measure = getattr(ct, '%sint%d' % (prefix, max_width * 8))
     return optionify(lhs, rhs, measure)
Exemplo n.º 9
0
def Data(data, dshape=None, name=None, fields=None, columns=None, schema=None,
         **kwargs):
    sub_uri = ''
    if isinstance(data, _strtypes):
        if '::' in data:
            data, sub_uri = data.split('::')
        data = resource(data, schema=schema, dshape=dshape, columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator) and
            not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if columns:
        warnings.warn("columns kwarg deprecated.  Use fields instead",
                      DeprecationWarning)
    if columns and not fields:
        fields = columns
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure,) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema,)))
        elif isrecord(dshape.measure) and fields:
            ds = discover(data)
            assert isrecord(ds.measure)
            names = ds.measure.names
            if names != fields:
                raise ValueError('data column names %s\n'
                                 '\tnot equal to fields parameter %s,\n'
                                 '\tuse Data(data).relabel(%s) to rename fields'
                                 % (names,
                                    fields,
                                    ', '.join('%s=%r' % (k, v)
                                              for k, v in zip(names, fields))))
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))

    ds = datashape.dshape(dshape)
    result = InteractiveSymbol(data, ds, name)

    if sub_uri:
        for field in sub_uri.split('/'):
            if field:
                result = result[field]

    return result
Exemplo n.º 10
0
def test_compute_up_on_dict():
    d = {'a': [1, 2, 3], 'b': [4, 5, 6]}

    assert str(discover(d)) == str(dshape('{a: 3 * int64, b: 3 * int64}'))

    s = symbol('s', discover(d))

    assert compute(s.a, {s: d}) == [1, 2, 3]
Exemplo n.º 11
0
def test_concat_arr():
    s_data = np.arange(15)
    t_data = np.arange(15, 30)

    s = symbol("s", discover(s_data))
    t = symbol("t", discover(t_data))

    assert (compute(concat(s, t), {s: s_data, t: t_data}) == np.arange(30)).all()
Exemplo n.º 12
0
def test_binary_math(funcname):
    s_data = np.arange(15).reshape(5, 3)
    t_data = np.arange(15, 30).reshape(5, 3)
    s = symbol("s", discover(s_data))
    t = symbol("t", discover(t_data))
    scope = {s: s_data, t: t_data}
    result = compute(getattr(blaze, funcname)(s, t), scope)
    expected = getattr(np, binary_name_map.get(funcname, funcname))(s_data, t_data)
    assert np.all(result == expected)
Exemplo n.º 13
0
def test_floating_binary_math(func, kwargs):
    s_data = np.arange(15).reshape(5, 3)
    t_data = np.arange(15, 30).reshape(5, 3)
    s = symbol('s', discover(s_data))
    t = symbol('t', discover(t_data))
    scope = {s: s_data, t: t_data}
    result = compute(getattr(blaze, func)(s, t), scope, **kwargs)
    expected = getattr(np, binary_name_map.get(func, func))(s_data, t_data)
    np.testing.assert_allclose(result, expected)
Exemplo n.º 14
0
def test_concat_mat():
    s_data = np.arange(15).reshape(5, 3)
    t_data = np.arange(15, 30).reshape(5, 3)

    s = symbol("s", discover(s_data))
    t = symbol("t", discover(t_data))

    assert (compute(concat(s, t), {s: s_data, t: t_data}) == np.arange(30).reshape(10, 3)).all()
    assert (compute(concat(s, t, axis=1), {s: s_data, t: t_data}) == np.concatenate((s_data, t_data), axis=1)).all()
Exemplo n.º 15
0
def test_least_mixed(dtype):
    s_data = np.array([2, 1], dtype=dtype)
    t_data = np.array([1, 2], dtype=dtype)
    s = symbol("s", discover(s_data))
    t = symbol("t", discover(t_data))
    expr = least(s, t)
    result = compute(expr, {s: s_data, t: t_data})
    expected = np.minimum(s_data, t_data)
    assert np.all(result == expected)
Exemplo n.º 16
0
def test_least(dtype):
    s_data = np.arange(15, dtype=dtype).reshape(5, 3)
    t_data = np.arange(15, 30, dtype=dtype).reshape(5, 3)
    s = symbol("s", discover(s_data))
    t = symbol("t", discover(t_data))
    expr = least(s, t)
    result = compute(expr, {s: s_data, t: t_data})
    expected = np.minimum(s_data, t_data)
    assert np.all(result == expected)
Exemplo n.º 17
0
def test_datetimes_persist():
    typs = [list, tuple, np.ndarray, tuple]
    L = [datetime.datetime.now()] * 3
    ds = discover(L)

    x = L
    for cls in typs:
        x = convert(cls, x)
        assert discover(x) == ds
Exemplo n.º 18
0
def discover_jsonlines(j, n=10, encoding='utf-8', **kwargs):
    with json_lines(j.path, encoding=encoding) as lines:
        data = pipe(lines, filter(nonempty), map(json.loads), take(n), list)

    if len(data) < n:
        ds = discover(data)
    else:
        ds = var * discover(data).subshape[0]
    return date_to_datetime_dshape(ds)
Exemplo n.º 19
0
def test_greatest(dtype):
    s_data = np.arange(15, dtype=dtype).reshape(5, 3)
    t_data = np.arange(15, 30, dtype=dtype).reshape(5, 3)
    s = symbol('s', discover(s_data))
    t = symbol('t', discover(t_data))
    expr = greatest(s, t)
    result = compute(expr, {s: s_data, t: t_data})
    expected = np.maximum(s_data, t_data)
    assert np.all(result == expected)
Exemplo n.º 20
0
def test_greatest_mixed(dtype):
    s_data = np.array([2, 1], dtype=dtype)
    t_data = np.array([1, 2], dtype=dtype)
    s = symbol('s', discover(s_data))
    t = symbol('t', discover(t_data))
    expr = greatest(s, t)
    result = compute(expr, {s: s_data, t: t_data})
    expected = np.maximum(s_data, t_data)
    assert np.all(result == expected)
Exemplo n.º 21
0
def test_discover():
    assert discover(sa.String()) == datashape.string
    metadata = sa.MetaData()
    s = sa.Table('accounts', metadata,
                 sa.Column('name', sa.String),
                 sa.Column('amount', sa.Integer),
                 sa.Column('timestamp', sa.DateTime, primary_key=True))

    assert discover(s) == \
        dshape('var * {name: ?string, amount: ?int32, timestamp: datetime}')
Exemplo n.º 22
0
def test_discover_views():
    engine, t = single_table_engine()
    metadata = t.metadata
    with engine.connect() as conn:
        conn.execute('''CREATE VIEW myview AS
                        SELECT name, amount
                        FROM accounts
                        WHERE amount > 0''')

    assert str(discover(metadata)) == str(discover({'accounts': t, 'myview': t}))
Exemplo n.º 23
0
Arquivo: test_sql.py Projeto: EGQM/odo
def test_discover():
    assert discover(sa.String()) == datashape.string
    metadata = sa.MetaData()
    s = sa.Table('accounts', metadata,
                 sa.Column('name', sa.String),
                 sa.Column('amount', sa.Integer),
                 sa.Column('timestamp', sa.DateTime, primary_key=True))
    ds = dshape('var * {name: ?string, amount: ?int32, timestamp: datetime}')
    assert_dshape_equal(discover(s), ds)
    for name in ds.measure.names:
        assert isinstance(name, string_types)
Exemplo n.º 24
0
def test_compute_up_on_dict():
    d = {'a': [1, 2, 3], 'b': [4, 5, 6]}

    assert_dshape_equal(
        discover(d),
        dshape('{a: 3 * int64, b: 3 * int64}').measure,
        check_record_order=False,  # dict order undefined
    )

    s = symbol('s', discover(d))
    assert compute(s.a, {s: d}) == [1, 2, 3]
Exemplo n.º 25
0
    def _dtype(self):
        lmeasure = discover(self.lhs).measure
        rmeasure = discover(self.rhs).measure
        if not (isinstance(getattr(lmeasure, 'ty', lmeasure), String) and
                getattr(rmeasure, 'ty', rmeasure) in integral):
            raise TypeError(
                'can only repeat strings by an integer amount, got: %s * %s' %
                (lmeasure, rmeasure),
            )

        return optionify(lmeasure, rmeasure, lmeasure)
Exemplo n.º 26
0
def test_discover():
    assert discover(sa.String()) == datashape.string
    metadata = sa.MetaData()
    s = sa.Table(
        "accounts",
        metadata,
        sa.Column("name", sa.String),
        sa.Column("amount", sa.Integer),
        sa.Column("timestamp", sa.DateTime, primary_key=True),
    )

    assert discover(s) == dshape("var * {name: ?string, amount: ?int32, timestamp: datetime}")
Exemplo n.º 27
0
def test_groups():
    with tmpfile('.hdf5') as fn:
        df.to_hdf(fn, '/data/fixed')

        hdf = data('hdfstore://%s' % fn)
        assert dshape(discover(hdf)) == dshape(discover({'data': {'fixed': df}}))

        s = symbol('s', discover(hdf))

        assert list(compute(s.data.fixed, hdf).a) == [1, 2, 3, 4]

        hdf.data.close()
Exemplo n.º 28
0
def Data(data, dshape=None, name=None, fields=None, columns=None, schema=None,
         **kwargs):
    if columns:
        raise ValueError("columns argument deprecated, use fields instead")
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")

    if isinstance(data, InteractiveSymbol):
        return Data(data.data, dshape, name, fields, columns, schema, **kwargs)

    if isinstance(data, _strtypes):
        data = resource(data, schema=schema, dshape=dshape, columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator) and
            not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure,) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema,)))
        elif isrecord(dshape.measure) and fields:
            ds = discover(data)
            assert isrecord(ds.measure)
            names = ds.measure.names
            if names != fields:
                raise ValueError('data column names %s\n'
                                 '\tnot equal to fields parameter %s,\n'
                                 '\tuse Data(data).relabel(%s) to rename '
                                 'fields' % (names,
                                             fields,
                                             ', '.join('%s=%r' % (k, v)
                                                       for k, v in
                                                       zip(names, fields))))
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema,)))

    ds = datashape.dshape(dshape)
    return InteractiveSymbol(data, ds, name)
Exemplo n.º 29
0
Arquivo: test_sql.py Projeto: EGQM/odo
def test_foreign_keys_as_compound_primary_key():
    with tmpfile('db') as fn:
        suppliers = resource(
            'sqlite:///%s::suppliers' % fn,
            dshape='var * {id: int64, name: string}',
            primary_key=['id']
        )
        parts = resource(
            'sqlite:///%s::parts' % fn,
            dshape='var * {id: int64, name: string, region: string}',
            primary_key=['id']
        )
        suppart = resource(
            'sqlite:///%s::suppart' % fn,
            dshape='var * {supp_id: map[int64, T], part_id: map[int64, U]}',
            foreign_keys={
                'supp_id': suppliers.c.id,
                'part_id': parts.c.id
            },
            primary_key=['supp_id', 'part_id']
        )
        expected = dshape("""
            var * {
                supp_id: map[int64, {id: int64, name: string}],
                part_id: map[int64, {id: int64, name: string, region: string}]
            }
        """)
        result = discover(suppart)
        assert result == expected
Exemplo n.º 30
0
Arquivo: test_sql.py Projeto: EGQM/odo
def test_compound_primary_key_with_single_reference():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    product_sku: string,
                                    name: ?string,
                                    price: ?float64
                                }
                            """, primary_key=['product_no', 'product_sku'])
        # TODO: should this fail everywhere? e.g., this fails in postgres, but
        # not in sqlite because postgres doesn't allow partial foreign keys
        # might be best to let the backend handle this
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          foreign_keys=dict(product_no=products.c.product_no),
                          primary_key=['order_id'])
        assert discover(orders) == dshape(
            """var * {
                order_id: int32,
                product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                quantity: ?int32
            }
            """
        )
Exemplo n.º 31
0
def test_discovery_engine():
    engine, t = single_table_engine()

    assert discover(engine, 'accounts') == discover(t)

    assert str(discover(engine)) == str(discover({'accounts': t}))
Exemplo n.º 32
0
def test_discover_selectable():
    t = resource('sqlite:///:memory:::mytable',
                 dshape='var * {x: int, y: int}')
    q = sa.select([t.c.x]).limit(5)
    assert discover(q) == dshape('var * {x: int}')
Exemplo n.º 33
0
Arquivo: h5py.py Projeto: quasiben/odo
def discover_h5py_group_file(g):
    return DataShape(Record([[k, discover(v)] for k, v in g.items()]))
Exemplo n.º 34
0
from datetime import datetime, date

from blaze.compute.core import compute, compute_up
from blaze.expr import symbol, by, exp, summary, Broadcast, join, concat
from blaze.expr import greatest, least
from blaze import sin
import blaze
from odo import into
from datashape import discover, to_numpy, dshape

x = np.array([(1, 'Alice', 100), (2, 'Bob', -200), (3, 'Charlie', 300),
              (4, 'Denis', 400), (5, 'Edith', -500)],
             dtype=[('id', 'i8'), ('name', 'S7'), ('amount', 'i8')])

t = symbol('t', discover(x))


def eq(a, b):
    c = a == b
    if isinstance(c, np.ndarray):
        return c.all()
    return c


def test_symbol():
    assert eq(compute(t, x), x)


def test_eq():
    assert eq(compute(t['amount'] == 100, x), x['amount'] == 100)
Exemplo n.º 35
0
def test_query_with_strings():
    b = np.array([('a', 1), ('b', 2), ('c', 3)],
                 dtype=[('x', 'S1'), ('y', 'i4')])

    s = symbol('s', discover(b))
    assert compute(s[s.x == b'b'], b).tolist() == [(b'b', 2)]
Exemplo n.º 36
0
def test_str_interp():
    a = np.array(('%s', '%s', '%s'))
    s = symbol('s', discover(a))
    expr = s.interp(1)
    assert all(compute(expr, a) == np.char.mod(a, 1))
Exemplo n.º 37
0
 def __init__(self, categories, type=None, ordered=False):
     self.categories = tuple(categories)
     self.type = (type or datashape.discover(self.categories)).measure
     self.ordered = ordered
Exemplo n.º 38
0
Arquivo: sql.py Projeto: luizirber/odo
def discover(engine):
    metadata = metadata_of_engine(engine)
    return discover(metadata)
Exemplo n.º 39
0
Arquivo: sql.py Projeto: luizirber/odo
def discover_sqlalchemy_selectable(t):
    records = list(sum([discover(c).parameters[0] for c in t.columns], ()))
    return var * Record(records)
Exemplo n.º 40
0
Arquivo: sql.py Projeto: luizirber/odo
def discover_sqlalchemy_column(col):
    optionify = Option if col.nullable else identity
    return Record([[col.name, optionify(discover(col.type))]])
Exemplo n.º 41
0
def test_discover_null_columns():
    assert dshape(discover(sa.Column('name', sa.String, nullable=True))) == \
        dshape('{name: ?string}')
    assert dshape(discover(sa.Column('name', sa.String, nullable=False))) == \
        dshape('{name: string}')
Exemplo n.º 42
0
def test_discover_oracle_intervals(freq):
    typ = sa.dialects.oracle.base.INTERVAL(day_precision={'D': 9}.get(freq),
                                           second_precision=prec.get(freq, 0))
    t = sa.Table('t', sa.MetaData(), sa.Column('dur', typ))
    assert discover(t) == dshape('var * {dur: ?timedelta[unit="%s"]}' % freq)
Exemplo n.º 43
0
def test_discover_postgres_intervals(freq):
    precision = prec.get(freq)
    typ = sa.dialects.postgresql.base.INTERVAL(precision=precision)
    t = sa.Table('t', sa.MetaData(), sa.Column('dur', typ))
    assert discover(t) == dshape('var * {dur: ?timedelta[unit="%s"]}' % freq)
Exemplo n.º 44
0
def discover_tables_node(f):
    return discover(f.getNode('/'))
Exemplo n.º 45
0
def discover_tables_node(n):
    return discover(n._v_children)  # subclasses dict
Exemplo n.º 46
0
Arquivo: sql.py Projeto: user32000/odo
def discover_foreign_key_relationship(fk, parent, parent_measure=None):
    if fk.column.table is not parent:
        parent_measure = discover(fk.column.table).measure
    return {fk.parent.name: Map(discover(fk.parent.type), parent_measure)}
Exemplo n.º 47
0
def test_coerce():
    x = np.arange(1, 3)
    s = symbol('s', discover(x))
    np.testing.assert_array_equal(compute(s.coerce('float64'), x),
                                  np.arange(1.0, 3.0))
Exemplo n.º 48
0
Arquivo: sql.py Projeto: user32000/odo
def discover_sqlalchemy_column(c):
    meta = Option if getattr(c, 'nullable', True) else identity
    return Record([(c.name, meta(discover(c.type)))])
Exemplo n.º 49
0
def test_str_repeat():
    a = np.array(('a', 'b', 'c'))
    s = symbol('s', discover(a))
    expr = s.repeat(3)
    assert all(compute(expr, a) == np.char.multiply(a, 3))
Exemplo n.º 50
0
Arquivo: sql.py Projeto: user32000/odo
def discover(engine):
    return discover(metadata_of_engine(engine))
Exemplo n.º 51
0
def test_subexpr_datetime():
    data = pd.date_range(start='01/01/2010', end='01/04/2010', freq='D').values
    s = symbol('s', discover(data))
    result = compute(s.truncate(days=2).day, data)
    expected = np.array([31, 2, 2, 4])
    np.testing.assert_array_equal(result, expected)
Exemplo n.º 52
0
def discover_postgresql_interval(t):
    return discover(sa.Interval(day_precision=0, second_precision=t.precision))
Exemplo n.º 53
0
def dataset():
    return str(discover(_get_data()))
Exemplo n.º 54
0
def numpy_to_dynd(x, **kwargs):
    return nd.array(x, type=str(discover(x)))
Exemplo n.º 55
0
def discover_oracle_interval(t):
    return discover(t.adapt(sa.Interval))
Exemplo n.º 56
0
def test_discovery_metadata():
    engine, t = single_table_engine()
    metadata = t.metadata
    assert str(discover(metadata)) == str(discover({'accounts': t}))
Exemplo n.º 57
0
Arquivo: core.py Projeto: syonoki/odo
def _transform(graph,
               target,
               source,
               excluded_edges=None,
               ooc_types=ooc_types,
               **kwargs):
    """ Transform source to target type using graph of transformations """
    # take a copy so we can mutate without affecting the input
    excluded_edges = (excluded_edges.copy()
                      if excluded_edges is not None else set())

    with ignoring(NotImplementedError):
        if 'dshape' not in kwargs or kwargs['dshape'] is None:
            kwargs['dshape'] = discover(source)

    pth = path(graph,
               type(source),
               target,
               excluded_edges=excluded_edges,
               ooc_types=ooc_types)

    x = source
    path_proxy = IterProxy(pth)
    for convert_from, convert_to, f, cost in path_proxy:
        try:
            x = f(x, excluded_edges=excluded_edges, **kwargs)
        except NotImplementedError as e:
            if kwargs.get('raise_on_errors'):
                raise
            warn(FailedConversionWarning(convert_from, convert_to, e))

            # exclude the broken edge
            excluded_edges |= {(convert_from, convert_to)}

            # compute the path from `source` to `target` excluding
            # the edge that broke
            fresh_path = list(
                path(graph,
                     type(source),
                     target,
                     excluded_edges=excluded_edges,
                     ooc_types=ooc_types))
            fresh_path_cost = path_cost(fresh_path)

            # compute the path from the current `convert_from` type
            # to the `target`
            try:
                greedy_path = list(
                    path(graph,
                         convert_from,
                         target,
                         excluded_edges=excluded_edges,
                         ooc_types=ooc_types))
            except nx.exception.NetworkXNoPath:
                greedy_path_cost = np.inf
            else:
                greedy_path_cost = path_cost(greedy_path)

            if fresh_path_cost < greedy_path_cost:
                # it is faster to start over from `source` with a new path
                x = source
                pth = fresh_path
            else:
                # it is faster to work around our broken edge from our
                # current location
                pth = greedy_path

            path_proxy.it = pth

    return x
Exemplo n.º 58
0
def dataframe_to_numpy(df, dshape=None, **kwargs):
    dtype = dshape_to_numpy(dshape or discover(df))
    x = df.to_records(index=False)
    if x.dtype != dtype:
        x = x.astype(dtype)
    return x
Exemplo n.º 59
0
def test_timedelta_sql_discovery(freq):
    ds = '{name: string, amount: int, duration: timedelta[unit="%s"]}' % freq
    t = dshape_to_table('td_bank', ds)
    assert discover(t).measure['duration'] == datashape.TimeDelta(freq)
Exemplo n.º 60
0
def test_discover_fixed_length_string():
    t = resource('sqlite:///:memory:::mytable', dshape='var * {x: string[30]}')
    assert discover(t) == dshape('var * {x: string[30]}')