示例#1
0
def test_fn_chain():
    fn1 = lambda x: x.strip()
    fn3 = lambda x: x*10
    reader = AlphaReader(open(parent / 'fixtures' / 'nums.csv', 'rb'), terminator=10, delimiter=44, encoding='UTF-8', fn_transform=[fn1, int, fn3])
    assert sum(next(reader)) == 60
    assert sum(next(reader)) == 600
    assert sum(next(reader)) == 6000
示例#2
0
def test_list():
    reader = AlphaReader(open(parent / 'fixtures' / 'nums.csv', 'rb'),
                         terminator=10,
                         delimiter=44,
                         encoding='UTF-8',
                         fn_transform=[lambda x: x.strip(), int])
    n = list(reader)
    assert len(n) == 3
示例#3
0
def test_fn_transform():
    fn = lambda x: int()
    reader = AlphaReader(open(parent / 'fixtures' / 'nums.csv', 'rb'),
                         terminator=10,
                         delimiter=44,
                         encoding='UTF-8',
                         fn_transform=fn)
    assert all(list(map(lambda x: isinstance(x, int), next(reader))))
示例#4
0
def test_multibyte():
    reader = AlphaReader(open(parent / 'fixtures' / 'nums.csv', 'rb'),
                         terminator=10,
                         delimiter=198,
                         encoding='UTF-8',
                         fn_transform=[lambda x: x.strip(), int])
    with pytest.raises(ValueError):
        next(reader)
示例#5
0
def convert_dat_to_parquet(spark, path, url, entity='user_en', version='latest'):
    fs = pa.hdfs.connect()
    schema = get_schema(get_registry(url, entity, version))
    df = spark.createDataFrame(AlphaReader(fs.open(path, 'rb')))
    data_types = get_types(schema)
    _new = map(lambda x: x[0], data_types)
    _old = df.columns
    
    return df\
        .select([F.col(o).alias(n) for o, n in zip(_old, _new)])\
        .select([F.col(c).cast(t) for c, t in data_types])
示例#6
0
def test_alphareader_with_encoding():
    with open(parent / 'fixtures' / 'large.dat', 'rb') as infile:
        pr = cProfile.Profile()
        pr.enable()
        reader = AlphaReader(infile,
                             delimiter=171,
                             terminator=172,
                             encoding='cp1252')
        list(reader)
        pr.disable()
        s = io.StringIO()
        sortby = SortKey.CUMULATIVE
        ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
        ps.print_stats()
        logger.info(s.getvalue())
示例#7
0
def test_iterator():
    reader = AlphaReader(open(parent / 'fixtures' / 'file.csv', 'rb'), terminator=10, delimiter=44, encoding='UTF-8')
    assert hasattr(reader, '__iter__')
示例#8
0
def test_writer():
    reader = AlphaReader(open(parent / 'fixtures' / 'alpha.dat', "rb"))
    total_size = AlphaWriter(str(parent / 'fixtures' / 'alpha-copy.dat'), reader)
    assert os.path.exists(str(parent / 'fixtures' / 'alpha-copy.dat'))
    assert total_size == os.path.getsize(parent / 'fixtures' / 'alpha.dat')
示例#9
0
def test_custom_delimiter():
    reader = AlphaReader(open(parent / 'fixtures' / 'file.xsv', 'rb'), terminator=10, delimiter=124, encoding='UTF-8')
    assert len(next(reader)) == 4
示例#10
0
def test_content():
    reader = AlphaReader(open(parent / 'fixtures' / 'file.csv', 'rb'), terminator=10, delimiter=44, encoding='UTF-8')
    assert next(reader) == ['1', 'John', 'Doe', '2020']
示例#11
0
def test_records():
    reader = AlphaReader(open(parent / 'fixtures' / 'file.csv', 'rb'), terminator=10, delimiter=44, encoding='UTF-8')
    assert len(next(reader)) == 4
示例#12
0
def test_instance():
    reader = AlphaReader(open(parent / 'fixtures' / 'file.csv', 'rb'), terminator=10, delimiter=44, encoding='UTF-8')
    assert isinstance(next(reader), list)