def get_torate(universe, start_time, end_time): ''' 获取换手率,使用当天交易量/流通股数来计算 ''' volume = query('TO_VOLUME', (start_time, end_time)) float_shares = query('FLOAT_SHARE', (start_time, end_time)) res = volume / float_shares res = res.loc[:, sorted(universe)] assert checkdata_completeness(res, start_time, end_time), "Error, data missed!" return res
def inner(universe, start_time, end_time): adj_factor = query('ADJ_FACTOR', (start_time, end_time)) data = query(price_type, (start_time, end_time)) assert len(adj_factor) == len(data), "Error, basic data length does not match! " + \ "adj_factor data = {sd_len}, while close data = {cd_len}".format(sd_len=len(adj_factor), cd_len=len(data)) res = adj_factor * data res = res.loc[:, sorted(universe)] assert checkdata_completeness(res, start_time, end_time), "Error, data missed!" return res
def _inner(universe, start_time, end_time): share_data = query(share_factor_name, (start_time, end_time)) close_data = query('CLOSE', (start_time, end_time)) assert len(share_data) == len(close_data), "Error, basic data length does not match! " + \ "share data = {sd_len}, while close data = {cd_len}".format(sd_len=len(share_data), cd_len=len(close_data)) res = share_data * close_data res = res.loc[:, sorted(universe)] assert checkdata_completeness(res, start_time, end_time), "Error, data missed!" return res
def get_valid_mask(start_time, end_time): ''' 获取给定期间内(包含起始时间)数据是否有效的掩码。 Parameter --------- start_time: datetime like 开始时间 end_time: datetime like 结束时间 Return ------ out: pd.DataFrame index为时间,columns为股票代码 Notes ----- 数据是否有效是根据当前股票是否退市或者终止上市来判断的,凡是LIST_STATUS为退市或者终止上市(3和4) 状态的股票均被视作为无效数据,即False ''' from fmanager.factors.query import query ls_status = query('LIST_STATUS', (start_time, end_time)) valid_mask = np.logical_or(ls_status == 1, ls_status == 2) return valid_mask
def get_lntotalmktv(universe, start_time, end_time): ''' 对数总市值 ''' tmktv = query('TOTAL_MKTVALUE', (start_time, end_time)) data = np.log(tmktv) data = data.loc[:, sorted(universe)] assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_lnfloatmktv(universe, start_time, end_time): ''' 对数流通市值 ''' fmktv = query('FLOAT_MKTVALUE', (start_time, end_time)) data = np.log(fmktv) data = data.loc[:, sorted(universe)] assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_dailyret(universe, start_time, end_time): ''' 获取日收益率,使用后复权收盘价计算 ''' new_start = pd.to_datetime(start_time) - pd.Timedelta('30 day') data = query('ADJ_CLOSE', (new_start, end_time)) data = data.pct_change() mask = data.index >= start_time data = data.loc[mask, sorted(universe)] assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_avgtorate(universe, start_time, end_time): ''' 指过去20个交易日平均换手率 ''' start_time = pd.to_datetime(start_time) new_start = start_time - pd.Timedelta('60 day') daily_torate = query('TO_RATE', (new_start, end_time)) data = daily_torate.rolling(20, min_periods=20).mean().dropna(how='all') mask = (data.index >= start_time) & (data.index <= end_time) data = data.loc[mask, sorted(universe)] if start_time > pd.to_datetime(START_TIME): # 第一次更新从START_TIME开始,必然会有缺失数据 assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def inner(universe, start_time, end_time): start_time = pd.to_datetime(start_time) threshold = 10e-6 new_start = dateshandle.tds_shift(start_time, offset) daily_torate = query('TO_RATE', (new_start, end_time)) data = daily_torate.rolling(offset, min_periods=offset).sum().dropna(how='all') data[data <= threshold] = np.NaN data = data / month_num data = np.log(data) mask = (data.index >= start_time) & (data.index <= end_time) data = data.loc[mask, sorted(universe)] if start_time > pd.to_datetime(START_TIME): # 第一次更新从START_TIME开始,必然会有缺失数据 assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_nonlinearmktv(universe, start_time, end_time): ''' 非线性市值,并不针对市值数据进行正交化 ''' lnmktv = query('LN_TMKV', (start_time, end_time)) # def get_nlsize(df): # # 市值3次方然后通过OLS来获取与市值正交的残差 # raw_index = df.index # df = df.dropna() # nonlsize = np.power(df, 3) # mod = OLS(nonlsize, add_constant(df)) # mod_res = mod.fit() # return mod_res.resid.reindex(raw_index) # data = lnmktv.apply(get_nlsize, axis=1) data = np.power(lnmktv, 3) data = data.loc[:, sorted(universe)] assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data