def _normalized_dshape(input_dshape, utc=False): """ 关闭`dshape`中可选,保留原始数据类型 option[int] -> int 如`utc==True`, 则修正ctable中丢失时区信息 """ fields = OrderedDict(input_dshape.measure.fields) out_dshape = [] for name, type_ in fields.items(): if name in (AD_FIELD_NAME, TS_FIELD_NAME): if utc: out_dshape.append([name, DateTime(tz='UTC')]) else: out_dshape.append([name, DateTime()]) else: # if isinstance(type_, Option): # type_ = type_.ty out_dshape.append([name, type_]) # out_dshape.append([name, datashape_type_to_numpy(type_)]) return var * Record(out_dshape)
def gen_odo_kwargs(expr, utc=False): """生成odo转换参数 Arguments: expr {Expr} -- 要使用的blaze表达式 Keyword Arguments: utc {bool} -- [是否将日期转换为utc] (default: {True}) """ fields = OrderedDict(expr.dshape.measure.fields) out_dshape = [] for name, type_ in fields.items(): if name in (AD_FIELD_NAME, TS_FIELD_NAME): if utc: out_dshape.append([name, DateTime(tz='UTC')]) else: out_dshape.append([name, DateTime()]) else: if isinstance(type_, Option): type_ = type_.ty out_dshape.append([name, type_]) return {'dshape': var * Record(out_dshape)}
def dshape_from_pandas(col): if isinstance(col.dtype, categorical): return Categorical(col.cat.categories.tolist()) elif col.dtype.kind == 'M': tz = getattr(col.dtype, 'tz', None) if tz is not None: # Pandas stores this as a pytz.tzinfo, but DataShape wants a # string. tz = str(tz) return Option(DateTime(tz=tz)) dshape = datashape.CType.from_numpy_dtype(col.dtype) dshape = string if dshape == object_ else dshape return Option(dshape) if dshape in possibly_missing else dshape
def _check_datetime_field(name, measure): """Check that a field is a datetime inside some measure. Parameters ---------- name : str The name of the field to check. measure : Record The record to check the field of. Raises ------ TypeError If the field is not a datetime inside ``measure``. """ if not isinstance(measure[name], (Date, DateTime)): raise TypeError( "'{name}' field must be a '{dt}', not: '{dshape}'".format( name=name, dt=DateTime(), dshape=measure[name], ), )
def test_datetimetz_pandas(): df = pd.DataFrame( OrderedDict([ ('naive', pd.date_range('2014', periods=5)), ('Europe/Moscow', pd.date_range('2014', periods=5, tz='Europe/Moscow')), ('UTC', pd.date_range('2014', periods=5, tz='UTC')), ('US/Eastern', pd.date_range('2014', periods=5, tz='US/Eastern')), ])) assert_dshape_equal( discover(df), 5 * Record['naive':Option(DateTime(tz=None)), 'Europe/Moscow':Option(DateTime(tz='Europe/Moscow')), 'UTC':Option(DateTime(tz='UTC')), 'US/Eastern':Option(DateTime(tz='US/Eastern')), ]) assert_dshape_equal(discover(df.naive), 5 * Option(DateTime(tz=None))) for tz in ('Europe/Moscow', 'UTC', 'US/Eastern'): assert_dshape_equal(discover(df[tz]), 5 * Option(DateTime(tz=tz)))
def test_datetime_index(tz): ix = pd.DatetimeIndex(['2014-01-01', '2014-01-02', '2014-01-03'], tz=tz) actual = discover(ix) expected = 3 * Option(DateTime(tz=tz)) assert_dshape_equal(actual, expected)