def test_discover(): with accounts_data() as (directory, (a, b, c)): assert str(discover(a)).replace('?', '') == \ 'var * {id: int64, name: string, amount: int64}' assert str(discover(directory)).replace('?', '') == \ 'var * {id: int64, name: string, amount: int64}'
def test_copy_local_files_to_hdfs(): with tmpfile_hdfs() as target: with filetext('name,amount\nAlice,100\nBob,200') as source: csv = CSV(source) scsv = HDFS(CSV)(target, hdfs=hdfs) into(scsv, csv, blocksize=10) # 10 bytes per message assert discover(scsv) == discover(csv)
def test_ssh_directory_hive_creation(): with hive_table(host) as uri: with accounts_ssh() as (directory, _): t = odo(directory, uri, **auth) assert isinstance(t, sa.Table) assert discover(t) == ds assert len(into(list, t)) > 0
def test_hdfs_directory_hive_creation(): with accounts_data() as (hdfs_directory, (a, b, c)): with hive_table(host) as uri: t = into(uri, hdfs_directory) assert isinstance(t, sa.Table) result = into(set, t) assert result > 0 assert discover(t) == ds t2 = into(uri, c) # append new singleton file assert len(into(list, t2)) > len(result)