def test_hive_resource(): db = resource('hive://hdfs@%s:10000/default' % host) assert isinstance(db, sa.engine.Engine) db = resource('hive://%s/' % host) assert isinstance(db, sa.engine.Engine) assert str(db.url) == 'hive://hdfs@%s:10000/default' % host
def test_hdfs_resource(): r = resource('hdfs://user@hostname:1234:/path/to/myfile.json') assert isinstance(r, HDFS(JSONLines)) assert r.hdfs.user_name == 'user' assert r.hdfs.host == 'hostname' assert r.hdfs.port == '1234' assert r.path == '/path/to/myfile.json' assert isinstance(resource('hdfs://path/to/myfile.csv', host='host', user='******', port=1234), HDFS(CSV)) assert isinstance(resource('hdfs://path/to/*.csv', host='host', user='******', port=1234), HDFS(Directory(CSV)))
from datashape import dshape from into.directory import Directory import os host = '' or os.environ.get('HDFS_TEST_HOST') if not host: import pytest pytest.importorskip('does_not_exist') hdfs = PyWebHdfsClient(host=host, port='14000', user_name='hdfs') hdfs_csv= HDFS(CSV)('/user/hive/mrocklin/accounts/accounts.csv', hdfs=hdfs) hdfs_directory = HDFS(Directory(CSV))('/user/hive/mrocklin/accounts/', hdfs=hdfs) ds = dshape('var * {id: ?int64, name: ?string, amount: ?int64}') engine = resource('hive://hdfs@%s:10000/default' % host) def test_discover(): assert discover(hdfs_csv) == \ dshape('var * {id: int64, name: string, amount: int64}') def test_discover_hdfs_directory(): assert discover(hdfs_directory) == \ dshape('var * {id: int64, name: string, amount: int64}') def normalize(s): return ' '.join(s.split())