def test_empty_tz(): warnings.simplefilter("error", DeprecationWarning) with pytest.warns(None) as e: empty([DatetimeTZDtype(unit="ns", tz="UTC")], 10, cols=['a'], timezones={'a': 'UTC'}) assert len(e) == 0, e
def test_timestamps(): z = 'US/Eastern' # single column df, views = empty('M8', 100, cols=['t']) assert df.t.dt.tz is None views['t'].dtype.kind == "M" df, views = empty('M8', 100, cols=['t'], timezones={'t': z}) assert df.t.dt.tz.zone == z views['t'].dtype.kind == "M" # one time column, one normal df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z}) assert df.t.dt.tz.zone == z views['t'].dtype.kind == "M" views['i'].dtype.kind == 'i' # no effect of timezones= on non-time column df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z, 'i': z}) assert df.t.dt.tz.zone == z assert df.i.dtype.kind == 'i' views['t'].dtype.kind == "M" views['i'].dtype.kind == 'i' # multi-timezones z2 = 'US/Central' df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={ 't1': z, 't2': z }) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz.zone == z df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z}) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz is None df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={ 't1': z, 't2': 'UTC' }) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz.zone == 'UTC' df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={ 't1': z, 't2': z2 }) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz.zone == z2
def read_avro_bytes(URL, open_with, start_byte, length, header, nrows=None): """Pass a specific file/bytechunk and convert to dataframe with cyavro Both a python dict version of the header, and the original bytes that define it, are required. The bytes are prepended to the data, so that the C avro reader can interpret them. """ with open_with(URL, 'rb') as f: f.seek(start_byte) if start_byte == 0: header = read_header(f) f.seek(header['header_size']) data = header['head_bytes'] + f.read(length) if nrows is None: b = io.BytesIO(data) header['blocks'] = [] scan_blocks(b, header, len(data)) nrows = sum(b['nrows'] for b in header['blocks']) f = cyavro.AvroReader() f.init_bytes(data) df, arrs = empty(header['dtypes'].values(), nrows, cols=header['dtypes']) f.init_reader() f.init_buffers(10000) for i in range(0, nrows, 10000): d = f.read_chunk() for c in d: s = [f for f in header['schema']['fields'] if f['name'] == c][0] if 'logicalType' in s: df[c].values[i:i + 10000] = time_convert(d[c], s) else: df[c].values[i:i + 10000] = d[c] return df
def test_empty(): n = 100 df, views = empty("category", size=n, cols=["c"]) assert df.shape == (n, 1) assert df.dtypes.tolist() == ["category"] assert views["c"].dtype == "int16" df, views = empty("category", size=n, cols=["c"], cats={"c": 2 ** 20}) assert df.shape == (n, 1) assert df.dtypes.tolist() == ["category"] assert views["c"].dtype == "int32" df, views = empty("category", size=n, cols=["c"], cats={"c": ["one", "two"]}) views["c"][0] = 1 assert df.c[:2].tolist() == ["two", "one"] df, views = empty("i4,i8,f8,f8,O", size=n, cols=["i4", "i8", "f8_1", "f8_2", "O"]) assert df.shape == (n, 5) assert len(views) == 5
def test_empty(): n = 100 df, views = empty('category', size=n, cols=['c']) assert df.shape == (n, 1) assert df.dtypes.tolist() == ['category'] assert views['c'].dtype == 'int16' df, views = empty('category', size=n, cols=['c'], cats={'c': 2**20}) assert df.shape == (n, 1) assert df.dtypes.tolist() == ['category'] assert views['c'].dtype == 'int32' df, views = empty('category', size=n, cols=['c'], cats={'c': ['one', 'two']}) views['c'][0] = 1 assert df.c[:2].tolist() == ['two', 'one'] df, views = empty('i4,i8,f8,f8,O', size=n, cols=['i4', 'i8', 'f8_1', 'f8_2', 'O']) assert df.shape == (n, 5) assert len(views) == 5
def test_empty_tz_nonutc(): df, views = empty(types=[DatetimeTZDtype(unit="ns", tz="CET")], size=8784, cols=['a'], timezones={ 'a': 'CET', 'index': 'CET' }, index_types=["datetime64[ns]"], index_names=["index"]) assert df.index.tz.zone == "CET" assert df.a.dtype.tz.zone == "CET"
def test_timestamps(): z = 'US/Eastern' # single column df, views = empty('M8', 100, cols=['t']) assert df.t.dt.tz is None views['t'].dtype.kind == "M" df, views = empty('M8', 100, cols=['t'], timezones={'t': z}) assert df.t.dt.tz.zone == z views['t'].dtype.kind == "M" # one time column, one normal df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z}) assert df.t.dt.tz.zone == z views['t'].dtype.kind == "M" views['i'].dtype.kind == 'i' # no effect of timezones= on non-time column df, views = empty('M8,i', 100, cols=['t', 'i'], timezones={'t': z, 'i': z}) assert df.t.dt.tz.zone == z assert df.i.dtype.kind == 'i' views['t'].dtype.kind == "M" views['i'].dtype.kind == 'i' # multi-timezones z2 = 'US/Central' df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z, 't2': z}) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz.zone == z df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z}) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz is None df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z, 't2': 'UTC'}) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz.zone == 'UTC' df, views = empty('M8,M8', 100, cols=['t1', 't2'], timezones={'t1': z, 't2': z2}) assert df.t1.dt.tz.zone == z assert df.t2.dt.tz.zone == z2