def test_csv_to_compressed_csv(): with tmpfile(".csv") as fn: with open(fn, "w") as f: f.write("a,1\nb,2\nc,3") with tmpfile(".csv.gz") as gfn: result = odo(fn, gfn) assert odo(result, list) == odo(fn, list)
def test_csv_to_compressed_csv(): with tmpfile('.csv') as fn: with open(fn, 'w') as f: f.write('a,1\nb,2\nc,3') with tmpfile('.csv.gz') as gfn: result = odo(fn, gfn) assert odo(result, list) == odo(fn, list)
def test_resource_gzip(): with tmpfile('json.gz') as fn: assert isinstance(resource(fn), (JSON, JSONLines)) assert isinstance(resource('json://' + fn), (JSON, JSONLines)) assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines)) with tmpfile('jsonlines.gz'): assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines))
def test_pytables_to_csv(): ndim = 2 with tmpfile('.h5') as fn: h5file = tb.openFile(fn, mode='w', title="Test Array") h5file.createArray('/', "test", np.zeros((ndim, ndim), dtype=float)) h5file.close() with tmpfile('csv') as csv: t = odo('pytables://%s::/test' % fn, csv) assert odo(t, list) == [(0.0, 0.0), (0.0, 0.0)]
def test_copy_one_table_to_a_foreign_engine(): data = [(1, 1), (2, 4), (3, 9)] ds = dshape("var * {x: int, y: int}") with tmpfile("db") as fn1: with tmpfile("db") as fn2: src = into("sqlite:///%s::points" % fn1, data, dshape=ds) tgt = into("sqlite:///%s::points" % fn2, sa.select([src]), dshape=ds) assert into(set, src) == into(set, tgt) assert into(set, data) == into(set, tgt)
def test_resource_on_file(): with tmpfile('.db') as fn: uri = 'sqlite:///' + fn sql = resource(uri, 'foo', dshape='var * {x: int, y: int}') assert isinstance(sql, sa.Table) with tmpfile('.db') as fn: uri = 'sqlite:///' + fn sql = resource(uri + '::' + 'foo', dshape='var * {x: int, y: int}') assert isinstance(sql, sa.Table)
def test_resource_on_file(): with tmpfile(".db") as fn: uri = "sqlite:///" + fn sql = resource(uri, "foo", dshape="var * {x: int, y: int}") assert isinstance(sql, sa.Table) with tmpfile(".db") as fn: uri = "sqlite:///" + fn sql = resource(uri + "::" + "foo", dshape="var * {x: int, y: int}") assert isinstance(sql, sa.Table)
def test_into_sqlite_with_header_and_different_sep(): df = pd.DataFrame([('Alice', 100), ('Bob', 200)], columns=['name', 'amount']) with tmpfile('.csv') as fn: csv = into(fn, df, delimiter='|') with tmpfile('.db') as sql: db = resource('sqlite:///%s::df' % sql, dshape=discover(csv)) result = into(db, csv) assert into(list, result) == into(list, df)
def test_send_parameterized_query_to_csv(): with tmpfile('db') as dbfilename: with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename: t = odo( csvfilename, 'sqlite:///%s::mytable' % dbfilename, ) with tmpfile('.csv') as fn: q = t.select(t.c.a == 1) r = odo(q, fn) assert sorted(odo(q, list)) == sorted(odo(r, list))
def test_into_sqlite(): data = [('Alice', 100), ('Bob', 200)] ds = datashape.dshape('var * {name: string, amount: int}') with tmpfile('.db') as dbpath: with tmpfile('.csv') as csvpath: csv = into(csvpath, data, dshape=ds, has_header=False) sql = resource('sqlite:///%s::mytable' % dbpath, dshape=ds) with ignoring(NotImplementedError): append_csv_to_sql_table(sql, csv) assert into(list, sql) == data
def test_different_encoding_to_csv(): with tmpfile('db') as dbfilename: with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename: t = odo( csvfilename, 'sqlite:///%s::mytable' % dbfilename, encoding='latin1' ) with tmpfile('.csv') as fn: with pytest.raises(ValueError): odo(t, fn, encoding='latin1')
def test_sqlite_to_csv(sep, header): with tmpfile("db") as dbfilename: with filetext("a,b\n1,2\n3,4", extension="csv") as csvfilename: t = odo(csvfilename, "sqlite:///%s::mytable" % dbfilename) with tmpfile(".csv") as fn: odo(t, fn, header=header, delimiter=sep) with open(fn, "rt") as f: lines = f.readlines() expected = [tuple(map(int, row)) for row in map(lambda x: x.split(sep), lines[header:])] assert odo(fn, list, delimiter=sep, has_header=header, dshape=discover(t)) == expected
def test_missing_to_csv(): data = [dict(a=1, b=2), dict(a=2, c=4)] with tmpfile('.json') as fn: js = JSON(fn) js = odo(data, js) with tmpfile('.csv') as csvf: csv = odo(js, csvf) with open(csv.path, 'rt') as f: result = f.read() expected = 'a,b,c\n1,2.0,\n2,,4.0\n' assert result == expected
def test_into_sqlite_with_different_sep(): df = pd.DataFrame([('Alice', 100), ('Bob', 200)], columns=['name', 'amount']) with tmpfile('.csv') as fn: # TODO: get the header argument to work in into(CSV, other) df.to_csv(fn, sep='|', header=False, index=False) csv = CSV(fn, delimiter='|', has_header=False) with tmpfile('.db') as sql: db = resource('sqlite:///%s::df' % sql, dshape=discover(csv)) result = into(db, csv) assert into(list, result) == into(list, df)
def test_resource_shape(): with tmpfile('.hdf5') as fn: r = resource(fn+'::/data', dshape='10 * int') assert r.shape == (10,) r.file.close() with tmpfile('.hdf5') as fn: r = resource(fn+'::/data', dshape='10 * 10 * int') assert r.shape == (10, 10) r.file.close() with tmpfile('.hdf5') as fn: r = resource(fn+'::/data', dshape='var * 10 * int') assert r.shape == (0, 10) r.file.close()
def test_tuples_to_json(): ds = dshape('var * {a: int, b: int}') with tmpfile('json') as fn: j = JSON(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read() with tmpfile('json') as fn: j = JSONLines(fn) append(j, [(1, 2), (10, 20)], dshape=ds) with open(fn) as f: assert '"a": 1' in f.read()
def test_compound_primary_key_with_single_reference(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, product_sku: string, name: ?string, price: ?float64 } """, primary_key=['product_no', 'product_sku']) # TODO: should this fail everywhere? e.g., this fails in postgres, but # not in sqlite because postgres doesn't allow partial foreign keys # might be best to let the backend handle this ds = dshape("""var * { order_id: int32, product_no: map[int32, T], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=ds, foreign_keys=dict(product_no=products.c.product_no), primary_key=['order_id']) assert discover(orders) == dshape( """var * { order_id: int32, product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}], quantity: ?int32 } """ )
def test_string_dshape_doc_example(): x = np.zeros((10, 2)) with tmpfile('.db') as fn: t = odo( x, 'sqlite:///%s::x' % fn, dshape='var * {a: float64, b: float64}' ) assert all(row == (0, 0) for row in t.select().execute().fetchall())
def tbfile(): with tmpfile('.h5') as filename: f = tb.open_file(filename, mode='w') d = f.create_table('/', 'title', x) d.close() f.close() yield filename
def test_discover_foreign_keys(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, name: ?string, price: ?float64 } """, primary_key=['product_no']) expected = dshape("""var * { order_id: int32, product_no: map[int32, { product_no: int32, name: ?string, price: ?float64 }], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=expected, foreign_keys=dict(product_no=products.c.product_no)) result = discover(orders) assert result == expected
def test_compound_primary_key_with_fkey(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, product_sku: string, name: ?string, price: ?float64 } """, primary_key=['product_no', 'product_sku']) ds = dshape("""var * { order_id: int32, product_no: map[int32, T], product_sku: map[int32, U], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=ds, primary_key=['order_id'], foreign_keys={ 'product_no': products.c.product_no, 'product_sku': products.c.product_sku }) assert discover(orders) == dshape( """var * { order_id: int32, product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}], product_sku: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}], quantity: ?int32 } """ )
def test_na_value(sql, csv): sql = odo(null_data, sql) with tmpfile('.csv') as fn: csv = odo(sql, fn, na_value='NA') with open(csv.path, 'rt') as f: raw = f.read() assert raw == 'a,b\n1,NA\n10,20\n100,200\n'
def test_sql_to_csv(sql, csv, tmpdir): sql, bind = sql sql = odo(csv, sql, bind=bind) with tmpfile('.csv', dir=tmpdir) as fn: csv = odo(sql, fn, bind=bind) assert odo(csv, list) == data assert discover(csv).measure.names == discover(sql).measure.names
def test_foreign_keys_auto_construct(): with tmpfile('db') as fn: products = resource('sqlite:///%s::products' % fn, dshape=""" var * { product_no: int32, name: ?string, price: ?float64 } """, primary_key=['product_no']) ds = dshape("""var * { order_id: int32, product_no: map[int32, T], quantity: ?int32 }""") orders = resource('sqlite:///%s::orders' % fn, dshape=ds, foreign_keys=dict(product_no=products.c.product_no), primary_key=['order_id']) assert discover(orders) == dshape(""" var * { order_id: int32, product_no: map[int32, { product_no: int32, name: ?string, price: ?float64 }], quantity: ?int32 } """)
def test_transaction(): with tmpfile('.db') as fn: rsc = resource('sqlite:///%s::table' % fn, dshape='var * {a: int}') data = [(1,), (2,), (3,)] conn_1 = rsc.bind.connect() conn_2 = rsc.bind.connect() trans_1 = conn_1.begin() conn_2.begin() odo(data, rsc, bind=conn_1) # inside the transaction the write should be there assert odo(rsc, list, bind=conn_1) == data # outside of a transaction or in a different transaction the write is not # there assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == [] trans_1.commit() # now the data should appear outside the transaction assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == data
def test_foreign_keys_as_compound_primary_key(): with tmpfile('db') as fn: suppliers = resource( 'sqlite:///%s::suppliers' % fn, dshape='var * {id: int64, name: string}', primary_key=['id'] ) parts = resource( 'sqlite:///%s::parts' % fn, dshape='var * {id: int64, name: string, region: string}', primary_key=['id'] ) suppart = resource( 'sqlite:///%s::suppart' % fn, dshape='var * {supp_id: map[int64, T], part_id: map[int64, U]}', foreign_keys={ 'supp_id': suppliers.c.id, 'part_id': parts.c.id }, primary_key=['supp_id', 'part_id'] ) expected = dshape(""" var * { supp_id: map[int64, {id: int64, name: string}], part_id: map[int64, {id: int64, name: string, region: string}] } """) result = discover(suppart) assert result == expected
def test_csv_with_header(): with tmpfile('db') as dbfilename: with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename: t = into('sqlite:///%s::mytable' % dbfilename, csvfilename, has_header=True) assert discover(t) == dshape('var * {a: int64, b: int64}') assert into(set, t) == set([(1, 2), (3, 4)])
def test_discover_with_dotted_names(): with tmpfile('.csv') as fn: with open(fn, 'w') as f: f.write('a.b,c.d\n1,2\n3,4') dshape = discover(resource(fn)) assert dshape == datashape.dshape('var * {"a.b": int64, "c.d": int64}') assert dshape.measure.names == [u'a.b', u'c.d']
def test_fixed_shape(): with tmpfile('.hdf5') as fn: df.to_hdf(fn, 'foo') r = resource('hdfstore://'+fn+'::/foo') assert isinstance(r.shape, list) assert discover(r).shape == (len(df),) r.parent.close()
def test_sql_select_to_csv(sql, csv, tmpdir): sql, bind = sql sql = odo(csv, sql, bind=bind) query = sa.select([sql.c.a]) with tmpfile('.csv', dir=tmpdir) as fn: csv = odo(query, fn, bind=bind) assert odo(csv, list) == [(x,) for x, _ in data]
def complex_csv(): path = os.path.join(os.path.dirname(__file__), 'dummydata.csv') with tmpfile('.csv') as fn: shutil.copy(path, fn) yield CSV(fn, has_header=True)
def test_load_from_jsonlines(ctx): with tmpfile('.json') as fn: js = odo(df, 'jsonlines://%s' % fn) result = odo(js, ctx, name='r') assert (list(map(set, odo(result, list))) == list(map(set, odo(df, list))))
def test_encoding_is_none(): with tmpfile('.csv') as fn: with open(fn, 'w') as f: f.write('a,1\nb,2\nc,3'.encode('utf-8').decode('utf-8')) assert CSV(fn, encoding=None).encoding == 'utf-8'
def csv(): with tmpfile('.csv') as fn: create_csv(data, fn) yield CSV(fn)
def fcsv(): with tmpfile('.csv') as fn: create_csv(data_floats, fn) yield CSV(fn, columns=list('ab'))
def test_csv_infer_header(): with tmpfile('db') as dbfilename: with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename: t = odo(csvfilename, 'sqlite:///%s::mytable' % dbfilename) assert discover(t) == dshape('var * {a: int64, b: int64}') assert odo(t, set) == set([(1, 2), (3, 4)])
def csv(): with tmpfile('csv') as filename: yield odo(data, filename, dshape=ds, has_header=False)
def csv(tmpdir): s = '\n'.join(','.join(map(str, row)) for row in data).encode('utf8') with tmpfile('.csv', dir=tmpdir) as fn: with open(fn, 'wb') as f: f.write(s) yield CSV(fn)
def tmpcsv(): with tmpfile('.csv') as fn: with open(fn, mode='w') as f: df.to_csv(f, index=False) yield fn
def test_ftp_to_local_txt(): with tmpfile('.txt') as fn: txt = odo(ftp_url, fn) path = os.path.abspath(txt.path) assert os.path.exists(path)
def test_failed_url(): failed_url = "http://foo.com/myfile.csv" with tmpfile('.csv') as fn: odo(failed_url, fn)
def test_sql_select_to_csv(sql, csv): sql = odo(csv, sql) query = sa.select([sql.c.a]) with tmpfile('.csv') as fn: csv = odo(query, fn) assert odo(csv, list) == [(x, ) for x, _ in data]
def test_has_header_on_tsv(): with tmpfile('.csv') as fn: with open(fn, 'wb') as f: f.write(b'a\tb\n1\t2\n3\t4') csv = CSV(fn) assert csv.has_header
def test_url_to_local_csv(): with tmpfile('.csv') as fn: csv = odo(iris_url, fn) path = os.path.abspath(csv.path) assert os.path.exists(path)
def test_decimal_conversion(): data = [(1.0,), (2.0,)] with tmpfile('.db') as fn: t = odo(data, 'sqlite:///%s::x' % fn, dshape='var * {x: decimal[11, 2]}') result = odo(sa.select([sa.func.sum(t.c.x)]), Decimal) assert result == sum(Decimal(r[0]) for r in data)
def test_s3_to_local_csv(): with tmpfile('.csv') as fn: csv = into(fn, tips_uri) path = os.path.abspath(csv.path) assert os.path.exists(path)
def complex_csv(tmpdir): path = os.path.join(os.path.dirname(__file__), 'dummydata.csv') with tmpfile('.csv', dir=tmpdir) as fn: shutil.copy(path, fn) os.chmod(fn, 0o777) yield CSV(fn, has_header=True)
def test_resource_to_engine(): with tmpfile('.db') as fn: uri = 'sqlite:///' + fn r = resource(uri) assert isinstance(r, sa.engine.Engine) assert r.dialect.name == 'sqlite'
def test_append_empty_iterator_returns_table(): with tmpfile('.db') as fn: t = resource('sqlite:///%s::x' % fn, dshape='var * {a: int32}') assert odo(iter([]), t) is t
def csv(): with tmpfile('csv') as filename: csv = into(filename, data, dshape=ds, has_header=False) yield csv
def cities(sc): with tmpfile('.txt') as fn: cities_df.to_csv(fn, header=False, index=False) raw = sc.textFile(fn) parts = raw.map(lambda line: line.split(',')) yield parts.map(lambda person: Row(name=person[0], city=person[1]))
def quoted_sql(): with tmpfile('.db') as db: try: yield resource('sqlite:///%s::foo bar' % db, dshape=ds) except sa.exc.OperationalError as e: pytest.skip(str(e))
def encoding_csv(tmpdir): path = os.path.join(os.path.dirname(__file__), 'encoding.csv') with tmpfile('.csv', dir=tmpdir) as fn: with open(fn, 'wb') as f, open(path, 'r') as g: f.write(g.read().encode('latin1')) yield CSV(fn)
def test_quoted_name(csv, quoted_sql): with tmpfile('csv') as filename: csv = odo(data, filename, dshape=ds, has_header=True) s = odo(csv, quoted_sql) t = odo(csv, list) assert sorted(odo(s, list)) == sorted(t)
def json_file(data): with tmpfile('.json') as fn: with open(fn, 'w') as f: json.dump(data, f, default=json_dumps) yield fn