def update_factor_details(factor, entity_type, entity, levels, columns, trader_index, schema_name): if factor and entity_type and entity and levels: sub_df = None # add sub graph if columns: if type(columns) == str: columns = [columns] columns = columns + ['entity_id', 'timestamp'] schema: Mixin = get_schema_by_name(name=schema_name) sub_df = schema.query_data(entity_id=entity, columns=columns) # add trading signals as annotation annotation_df = None if trader_index is not None: order_reader = order_readers[trader_index] annotation_df = order_reader.data_df.copy() annotation_df = annotation_df[annotation_df.entity_id == entity].copy() if pd_is_not_null(annotation_df): annotation_df['value'] = annotation_df['order_price'] annotation_df['flag'] = annotation_df['order_type'].apply( lambda x: order_type_flag(x)) annotation_df['color'] = annotation_df['order_type'].apply( lambda x: order_type_color(x)) print(annotation_df.tail()) if type(levels) is list and len(levels) >= 2: levels.sort() drawers = [] for level in levels: drawers.append(zvt_context.factor_cls_registry[factor]( entity_schema=zvt_context.entity_schema_map[entity_type], level=level, entity_ids=[entity]).drawer()) stacked = StackedDrawer(*drawers) return dcc.Graph(id=f'{factor}-{entity_type}-{entity}', figure=stacked.draw_kline(show=False, height=900)) else: if type(levels) is list: level = levels[0] else: level = levels drawer = zvt_context.factor_cls_registry[factor]( entity_schema=zvt_context.entity_schema_map[entity_type], level=level, entity_ids=[entity], need_persist=False).drawer() if pd_is_not_null(sub_df): drawer.add_sub_df(sub_df) if pd_is_not_null(annotation_df): drawer.annotation_df = annotation_df return dcc.Graph(id=f'{factor}-{entity_type}-{entity}', figure=drawer.draw_kline(show=False, height=800)) raise dash.PreventUpdate()
def on_time(self, timestamp: pd.Timestamp): recent_report_date = to_pd_timestamp(get_recent_report_date(timestamp)) if self.finish_date and is_same_date(recent_report_date, self.finish_date): return filters = [ StockActorSummary.actor_type == ActorType.raised_fund.value, StockActorSummary.report_date == recent_report_date ] if self.entity_ids: filters = filters + [ StockActorSummary.entity_id.in_(self.entity_ids) ] df = StockActorSummary.query_data(filters=filters) if pd_is_not_null(df): self.logger.info(f'{df}') self.finish_date = recent_report_date long_df = df[df['change_ratio'] > 0.05] short_df = df[df['change_ratio'] < -0.5] try: self.trade_the_targets( due_timestamp=timestamp, happen_timestamp=timestamp, long_selected=set(long_df['entity_id'].to_list()), short_selected=set(short_df['entity_id'].to_list())) except Exception as e: self.logger.error(e)
def init_entities(self): """ init the entities which we would record data for """ if self.entity_provider == self.provider and self.entity_schema == self.data_schema: self.entity_session = self.session else: self.entity_session = get_db_session(provider=self.entity_provider, data_schema=self.entity_schema) filters = None if self.day_data: df = self.data_schema.query_data(start_timestamp=now_time_str(), columns=['entity_id', 'timestamp'], provider=self.provider) if pd_is_not_null(df): entity_ids = df['entity_id'].tolist() self.logger.info(f'ignore entity_ids:{entity_ids}') filters = [self.entity_schema.entity_id.notin_(entity_ids)] # init the entity list self.entities = get_entities(session=self.entity_session, entity_schema=self.entity_schema, entity_type=self.entity_type, exchanges=self.exchanges, entity_ids=self.entity_ids, codes=self.codes, return_type='domain', provider=self.entity_provider, filters=filters)
def tag(self, timestamp): df = get_recent_report( data_schema=StockActorSummary, timestamp=timestamp, filters=[ StockActorSummary.actor_type == ActorType.raised_fund.value ], ) if not pd_is_not_null(df): logger.error(f"no StockActorSummary data at {timestamp}") return df = df.set_index("entity_id") fund_love_ids = df[(df["holding_ratio"] >= 0.05) & (df["change_ratio"] >= -0.3)].index.tolist() fund_not_care_ids = df[(df["holding_ratio"] < 0.05) | (df["change_ratio"] < -0.3)].index.tolist() fund_love_domains = self.get_tag_domains( entity_ids=fund_love_ids, timestamp=timestamp, actor_tag=ActorTag.fund_love.value) fund_not_care_domains = self.get_tag_domains( entity_ids=fund_not_care_ids, timestamp=timestamp, actor_tag=ActorTag.fund_not_care.value) self.session.add_all(fund_love_domains) self.session.add_all(fund_not_care_domains) self.session.commit()
def get_performance_stats( entity_type="stock", start_timestamp=None, end_timestamp=None, adjust_type: Union[AdjustType, str] = None, data_provider=None, changes=((-1, -0.5), (-0.5, -0.2), (-0.2, 0), (0, 0.2), (0.2, 0.5), (0.5, 1), (1, 1000)), ): if not adjust_type: adjust_type = default_adjust_type(entity_type=entity_type) data_schema = get_kdata_schema(entity_type=entity_type, adjust_type=adjust_type) score_df, _ = get_top_entities( data_schema=data_schema, column="close", start_timestamp=start_timestamp, end_timestamp=end_timestamp, pct=1, method=WindowMethod.change, return_type=TopType.positive, data_provider=data_provider, ) if pd_is_not_null(score_df): result = {} for change in changes: range_start = change[0] range_end = change[1] key = f"pct_{range_start}_{range_end}" df = score_df[(score_df["score"] >= range_start) & (score_df["score"] < range_end)] result[key] = len(df) return result
def record(self, entity, start, end, size, timestamps): # 上证 if entity.code == '000001': all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like('stock_sh%')]) # 深证 elif entity.code == '399001': all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like('stock_sz%')]) # 创业板 elif entity.code == '399006': all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like('300%')]) if pd_is_not_null(all_df): g = all_df.groupby('timestamp') for timestamp, df in g: se = pd.Series({ 'id': "{}_{}".format(entity.id, to_time_str(timestamp)), 'entity_id': entity.id, 'timestamp': timestamp, 'code': entity.code, 'name': entity.name }) for col in [ 'net_main_inflows', 'net_huge_inflows', 'net_big_inflows', 'net_medium_inflows', 'net_small_inflows' ]: se[col] = df[col].sum() for col in [ 'net_main_inflow_rate', 'net_huge_inflow_rate', 'net_big_inflow_rate', 'net_medium_inflow_rate', 'net_small_inflow_rate' ]: se[col] = df[col].sum() / len(df) index_df = se.to_frame().T self.logger.info(index_df) df_to_db(df=index_df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame, state: dict) -> (pd.DataFrame, dict): self.logger.info(f'acc_one:{entity_id}') if pd_is_not_null(acc_df): df = df[df.index > acc_df.index[-1]] if pd_is_not_null(df): self.logger.info(f'compute from {df.iloc[0]["timestamp"]}') acc_df = pd.concat([acc_df, df]) else: self.logger.info('no need to compute') return acc_df, state else: acc_df = df for window in self.windows: col = 'ma{}'.format(window) self.indicators.append(col) ma_df = acc_df['close'].rolling(window=window, min_periods=window).mean() acc_df[col] = ma_df acc_df['live'] = (acc_df['ma5'] > acc_df['ma10']).apply(lambda x: live_or_dead(x)) acc_df['distance'] = (acc_df['ma5'] - acc_df['ma10']) / acc_df['close'] live = acc_df['live'] acc_df['count'] = live * (live.groupby( (live != live.shift()).cumsum()).cumcount() + 1) acc_df['bulk'] = (live != live.shift()).cumsum() area_df = acc_df[['distance', 'bulk']] acc_df['area'] = area_df.groupby('bulk').cumsum() for vol_window in self.vol_windows: col = 'vol_ma{}'.format(vol_window) self.indicators.append(col) vol_ma_df = acc_df['turnover'].rolling( window=vol_window, min_periods=vol_window).mean() acc_df[col] = vol_ma_df acc_df = acc_df.set_index('timestamp', drop=False) return acc_df, state
def record(self, entity, start, end, size, timestamps): df = get_kdata(entity_id=entity.id, limit=size, adjust_type=self.adjust_type) if pd_is_not_null(df): df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) else: self.logger.info(f'no kdata for {entity.id}')
def get_recent_report(data_schema: Type[Mixin], timestamp, entity_id=None, filters=None, max_step=2): i = 0 while i < max_step: report_date = get_recent_report_date(the_date=timestamp, step=i) if filters: filters = filters + [data_schema.report_date == to_pd_timestamp(report_date)] else: filters = [data_schema.report_date == to_pd_timestamp(report_date)] df = data_schema.query_data(entity_id=entity_id, filters=filters) if pd_is_not_null(df): return df i = i + 1
def drawer_annotation_df(self) -> Optional[pd.DataFrame]: def order_type_flag(df): return "<br>".join(df.tolist()) if pd_is_not_null(self.player_df): annotation_df = self.player_df.copy() annotation_df["value"] = self.factor_df.loc[ annotation_df.index]["close"] annotation_df["flag"] = annotation_df[[ "dep1", "dep2", "dep3", "dep4", "dep5" ]].apply(lambda x: order_type_flag(x), axis=1) annotation_df["color"] = "#ff7f0e" return annotation_df
def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame, state: dict) -> (pd.DataFrame, dict): self.logger.info(f"acc_one:{entity_id}") if pd_is_not_null(acc_df): df = df[df.index > acc_df.index[-1]] if pd_is_not_null(df): self.logger.info(f'compute from {df.iloc[0]["timestamp"]}') acc_df = pd.concat([acc_df, df]) else: self.logger.info("no need to compute") return acc_df, state else: acc_df = df for window in self.windows: col = "ma{}".format(window) self.indicators.append(col) ma_df = acc_df["close"].rolling(window=window, min_periods=window).mean() acc_df[col] = ma_df acc_df["live"] = (acc_df["ma5"] > acc_df["ma10"]).apply(lambda x: live_or_dead(x)) acc_df["distance"] = (acc_df["ma5"] - acc_df["ma10"]) / acc_df["close"] live = acc_df["live"] acc_df["count"] = live * (live.groupby((live != live.shift()).cumsum()).cumcount() + 1) acc_df["bulk"] = (live != live.shift()).cumsum() area_df = acc_df[["distance", "bulk"]] acc_df["area"] = area_df.groupby("bulk").cumsum() for vol_window in self.vol_windows: col = "vol_ma{}".format(vol_window) self.indicators.append(col) vol_ma_df = acc_df["turnover"].rolling(window=vol_window, min_periods=vol_window).mean() acc_df[col] = vol_ma_df acc_df = acc_df.set_index("timestamp", drop=False) return acc_df, state
def show_month_performance(): dfs = [] for timestamp, df in got_top_performance_by_month(start_timestamp='2005-01-01', list_days=250): if pd_is_not_null(df): df = df.reset_index(drop=True) df['entity_id'] = 'stock_cn_performance' df['timestamp'] = timestamp dfs.append(df) all_df = pd.concat(dfs) print(all_df) drawer = Drawer(main_df=all_df) drawer.draw_scatter(show=True)
def tag(self, timestamp): for index_id in index_map_market_value: df = IndexStock.query_data(entity_id=index_id, start_timestamp=month_start_date(timestamp), end_timestamp=month_end_date(timestamp)) if not pd_is_not_null(df): logger.error(f'no IndexStock data at {timestamp} for {index_id}') continue stock_tags = [self.get_tag_domain(entity_id=stock_id, timestamp=timestamp) for stock_id in df['stock_id'].tolist()] for stock_tag in stock_tags: stock_tag.market_value_tag = index_map_market_value.get(index_id).value self.session.add_all(stock_tags) self.session.commit()
def get_entity_list_by_cap(timestamp, cap_start, cap_end, entity_type="stock", provider=None, adjust_type=None, retry_times=20): if not adjust_type: adjust_type = default_adjust_type(entity_type=entity_type) kdata_schema = get_kdata_schema(entity_type, level=IntervalLevel.LEVEL_1DAY, adjust_type=adjust_type) df = kdata_schema.query_data( provider=provider, filters=[kdata_schema.timestamp == to_pd_timestamp(timestamp)], index="entity_id", ) if pd_is_not_null(df): df["cap"] = df["turnover"] / df["turnover_rate"] df_result = df.copy() if cap_start: df_result = df_result.loc[(df["cap"] >= cap_start)] if cap_end: df_result = df_result.loc[(df["cap"] <= cap_end)] return df_result.index.tolist() else: if retry_times == 0: return [] return get_entity_list_by_cap( timestamp=next_date(timestamp, 1), cap_start=cap_start, cap_end=cap_end, entity_type=entity_type, provider=provider, adjust_type=adjust_type, retry_times=retry_times - 1, )
def init_entities(self): """ init the entities which we would record data for """ if self.entity_provider == self.provider and self.entity_schema == self.data_schema: self.entity_session = self.session else: self.entity_session = get_db_session( provider=self.entity_provider, data_schema=self.entity_schema) if self.day_data: df = self.data_schema.query_data( start_timestamp=now_time_str(), columns=["entity_id", "timestamp"], provider=self.provider) if pd_is_not_null(df): entity_ids = df["entity_id"].tolist() self.logger.info(f"ignore entity_ids:{entity_ids}") if self.entity_filters: self.entity_filters.append( self.entity_schema.entity_id.notin_(entity_ids)) else: self.entity_filters = [ self.entity_schema.entity_id.notin_(entity_ids) ] #: init the entity list self.entities = get_entities( session=self.entity_session, entity_schema=self.entity_schema, exchanges=self.exchanges, entity_ids=self.entity_ids, codes=self.codes, return_type="domain", provider=self.entity_provider, filters=self.entity_filters, )
def update_factor_details(factor, entity_type, code, levels, columns, schema_name): if factor and entity_type and code and levels: sub_df = None if columns: if type(columns) == str: columns = [columns] columns = columns + ['entity_id', 'timestamp'] schema: Mixin = get_schema_by_name(name=schema_name) sub_df = schema.query_data(code=code, columns=columns) if type(levels) is list and len(levels) >= 2: levels.sort() drawers = [] for level in levels: drawers.append(zvt_context.factor_cls_registry[factor]( entity_schema=zvt_context.entity_schema_map[entity_type], level=level, codes=[code]).drawer()) stacked = StackedDrawer(*drawers) return dcc.Graph(id=f'{factor}-{entity_type}-{code}', figure=stacked.draw_kline(show=False, height=900)) else: if type(levels) is list: level = levels[0] else: level = levels drawer = zvt_context.factor_cls_registry[factor]( entity_schema=zvt_context.entity_schema_map[entity_type], level=level, codes=[code], need_persist=False).drawer() if pd_is_not_null(sub_df): drawer.add_sub_df(sub_df) return dcc.Graph(id=f'{factor}-{entity_type}-{code}', figure=drawer.draw_kline(show=False, height=800)) raise dash.PreventUpdate()
s = block_df["name"].value_counts() cycle_df = pd.DataFrame(columns=s.index, data=[s.tolist()]) cycle_df["entity_id"] = "stock_cn_industry" cycle_df["timestamp"] = timestamp drawer = Drawer(main_df=cycle_df) drawer.draw_pie(show=True) if __name__ == "__main__": df = get_performance_stats_by_month() print(df) dfs = [] for timestamp, _, df in get_top_performance_by_month( start_timestamp="2012-01-01", list_days=250): if pd_is_not_null(df): entity_ids = df.index.tolist() the_date = pre_month_end_date(timestamp) show_industry_composition(entity_ids=entity_ids, timestamp=timestamp) for entity_id in df.index: from zvt.utils.time_utils import month_end_date, pre_month_start_date end_date = month_end_date(pre_month_start_date(timestamp)) TechnicalFactor(entity_ids=[entity_id], end_timestamp=end_date).draw(show=True) # the __all__ is generated __all__ = [ "WindowMethod", "TopType",
def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame, state: dict) -> (pd.DataFrame, dict): self.logger.info(f"acc_one:{entity_id}") if pd_is_not_null(acc_df): df = df[df.index > acc_df.index[-1]] if pd_is_not_null(df): self.logger.info(f'compute from {df.iloc[0]["timestamp"]}') # 遍历的开始位置 start_index = len(acc_df) acc_df = pd.concat([acc_df, df]) zen_state = ZState(state) acc_df = acc_df.reset_index(drop=True) current_interval = acc_df.iloc[start_index - 1]["current_interval"] else: self.logger.info("no need to compute") return acc_df, state else: acc_df = df # 笔的底 acc_df["bi_di"] = False # 笔的顶 acc_df["bi_ding"] = False # 记录笔顶/底分型的值,bi_di取low,bi_ding取high,其他为None,绘图时取有值的连线即为 笔 acc_df["bi_value"] = np.NAN # 笔的变化 acc_df["bi_change"] = np.NAN # 笔的斜率 acc_df["bi_slope"] = np.NAN # 持续的周期 acc_df["bi_interval"] = np.NAN # 记录临时分型,不变 acc_df["tmp_ding"] = False acc_df["tmp_di"] = False # 分型的力度 acc_df["fenxing_power"] = np.NAN # 目前分型确定的方向 acc_df["current_direction"] = None acc_df["current_change"] = np.NAN acc_df["current_interval"] = np.NAN acc_df["current_slope"] = np.NAN # 最近的一个笔中枢 # acc_df['current_zhongshu'] = np.NAN acc_df["current_zhongshu_change"] = np.NAN acc_df["current_zhongshu_y0"] = np.NAN acc_df["current_zhongshu_y1"] = np.NAN # 目前走势的临时方向 其跟direction的的关系 确定了下一个分型 acc_df["tmp_direction"] = None acc_df["opposite_change"] = np.NAN acc_df["opposite_interval"] = np.NAN acc_df["opposite_slope"] = np.NAN acc_df["duan_state"] = "yi" # 段的底 acc_df["duan_di"] = False # 段的顶 acc_df["duan_ding"] = False # 记录段顶/底的值,为duan_di时取low,为duan_ding时取high,其他为None,绘图时取有值的连线即为 段 acc_df["duan_value"] = np.NAN # 段的变化 acc_df["duan_change"] = np.NAN # 段的斜率 acc_df["duan_slope"] = np.NAN # 持续的周期 acc_df["duan_interval"] = np.NAN # 记录在确定中枢的最后一个段的终点x1,值为Rect(x0,y0,x1,y1) acc_df["zhongshu"] = None acc_df["zhongshu_change"] = np.NAN acc_df["bi_zhongshu"] = None acc_df["bi_zhongshu_change"] = np.NAN acc_df = acc_df.reset_index(drop=True) zen_state = ZState( dict( fenxing_list=[], direction=None, can_fenxing=None, can_fenxing_index=None, opposite_count=0, current_duan_state="yi", duans=[], pre_bi=None, pre_duan=None, )) zen_state.fenxing_list: List[Fenxing] = [] # 取前11条k线,至多出现一个顶分型+底分型 # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一 # start_index 为遍历开始的位置 # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up fenxing, start_index, direction, current_interval = handle_first_fenxing( acc_df, step=11) if not fenxing: return None, None zen_state.fenxing_list.append(fenxing) zen_state.direction = direction # list of (timestamp,value) zen_state.duans = [] zen_state.bis = [] pre_kdata = acc_df.iloc[start_index - 1] pre_index = start_index - 1 tmp_direction = zen_state.direction current_zhongshu = None current_zhongshu_change = None for index, kdata in acc_df.iloc[start_index:].iterrows(): # print(f'timestamp: {kdata.timestamp}') # 临时方向 tmp_direction = get_direction(kdata, pre_kdata, current=tmp_direction) # current states current_interval = current_interval + 1 if zen_state.direction == Direction.up: pre_value = acc_df.loc[zen_state.fenxing_list[0].index, "low"] current_value = kdata["high"] else: pre_value = acc_df.loc[zen_state.fenxing_list[0].index, "high"] current_value = kdata["low"] acc_df.loc[index, "current_direction"] = zen_state.direction.value acc_df.loc[index, "current_interval"] = current_interval change = (current_value - pre_value) / pre_value acc_df.loc[index, "current_change"] = change acc_df.loc[index, "current_slope"] = change / current_interval if current_zhongshu: # acc_df.loc[index, 'current_zhongshu'] = current_zhongshu acc_df.loc[index, "current_zhongshu_y0"] = current_zhongshu.y0 acc_df.loc[index, "current_zhongshu_y1"] = current_zhongshu.y1 acc_df.loc[index, "current_zhongshu_change"] = current_zhongshu_change else: # acc_df.loc[index, 'current_zhongshu'] = acc_df.loc[index - 1, 'current_zhongshu'] acc_df.loc[index, "current_zhongshu_y0"] = acc_df.loc[ index - 1, "current_zhongshu_y0"] acc_df.loc[index, "current_zhongshu_y1"] = acc_df.loc[ index - 1, "current_zhongshu_y1"] acc_df.loc[index, "current_zhongshu_change"] = acc_df.loc[ index - 1, "current_zhongshu_change"] # 处理包含关系 handle_including( one_df=acc_df, index=index, kdata=kdata, pre_index=pre_index, pre_kdata=pre_kdata, tmp_direction=tmp_direction, ) # 根据方向,寻找对应的分型 和 段 if zen_state.direction == Direction.up: tmp_fenxing_col = "tmp_ding" fenxing_col = "bi_ding" else: tmp_fenxing_col = "tmp_di" fenxing_col = "bi_di" # 方向一致,延续中 if tmp_direction == zen_state.direction: zen_state.opposite_count = 0 # 反向,寻找反 分型 else: zen_state.opposite_count = zen_state.opposite_count + 1 # opposite states current_interval = zen_state.opposite_count if tmp_direction == Direction.up: pre_value = acc_df.loc[index - zen_state.opposite_count, "low"] current_value = kdata["high"] else: pre_value = acc_df.loc[index - zen_state.opposite_count, "high"] current_value = kdata["low"] acc_df.loc[index, "tmp_direction"] = tmp_direction.value acc_df.loc[index, "opposite_interval"] = current_interval change = (current_value - pre_value) / pre_value acc_df.loc[index, "opposite_change"] = change acc_df.loc[index, "opposite_slope"] = change / current_interval # 第一次反向 if zen_state.opposite_count == 1: acc_df.loc[pre_index, tmp_fenxing_col] = True acc_df.loc[pre_index, "fenxing_power"] = fenxing_power( acc_df.loc[pre_index - 1], pre_kdata, kdata, fenxing=tmp_fenxing_col) if zen_state.can_fenxing is not None: # 候选底分型 if tmp_direction == Direction.up: # 取小的 if pre_kdata["low"] <= zen_state.can_fenxing["low"]: zen_state.can_fenxing = pre_kdata[[ "low", "high" ]] zen_state.can_fenxing_index = pre_index # 候选顶分型 else: # 取大的 if pre_kdata["high"] >= zen_state.can_fenxing[ "high"]: zen_state.can_fenxing = pre_kdata[[ "low", "high" ]] zen_state.can_fenxing_index = pre_index else: zen_state.can_fenxing = pre_kdata[["low", "high"]] zen_state.can_fenxing_index = pre_index # 分型确立 if zen_state.can_fenxing is not None: if zen_state.opposite_count >= 4 or ( index - zen_state.can_fenxing_index >= 8): acc_df.loc[zen_state.can_fenxing_index, fenxing_col] = True # 记录笔的值 if fenxing_col == "bi_ding": bi_value = acc_df.loc[zen_state.can_fenxing_index, "high"] else: bi_value = acc_df.loc[zen_state.can_fenxing_index, "low"] acc_df.loc[zen_state.can_fenxing_index, "bi_value"] = bi_value # 计算笔斜率 if zen_state.pre_bi: change = (bi_value - zen_state.pre_bi[1] ) / zen_state.pre_bi[1] interval = zen_state.can_fenxing_index - zen_state.pre_bi[ 0] bi_slope = change / interval acc_df.loc[zen_state.can_fenxing_index, "bi_change"] = change acc_df.loc[zen_state.can_fenxing_index, "bi_slope"] = bi_slope acc_df.loc[zen_state.can_fenxing_index, "bi_interval"] = interval # 记录用于计算笔中枢的笔 zen_state.bis.append(( acc_df.loc[zen_state.can_fenxing_index, "timestamp"], bi_value, zen_state.can_fenxing_index, )) # 计算笔中枢,当下来说这个 中枢 是确定的,并且是不可变的 # 但标记的点为 过去,注意在回测时最近的一个中枢可能用到未来函数,前一个才是 已知的 # 所以记了一个 current_zhongshu_y0 current_zhongshu_y1 这个是可直接使用的 end_index = zen_state.can_fenxing_index ( zen_state.bis, current_zhongshu, current_zhongshu_change, current_zhongshu_interval, ) = handle_zhongshu( points=zen_state.bis, acc_df=acc_df, end_index=end_index, zhongshu_col="bi_zhongshu", zhongshu_change_col="bi_zhongshu_change", ) zen_state.pre_bi = (zen_state.can_fenxing_index, bi_value) zen_state.opposite_count = 0 zen_state.direction = zen_state.direction.opposite() zen_state.can_fenxing = None # 确定第一个段 if zen_state.fenxing_list != None: zen_state.fenxing_list.append( Fenxing( state=fenxing_col, kdata={ "low": float(acc_df.loc[ zen_state.can_fenxing_index] ["low"]), "high": float(acc_df.loc[ zen_state.can_fenxing_index] ["high"]), }, index=zen_state.can_fenxing_index, )) if len(zen_state.fenxing_list) == 4: duan_state = handle_duan( fenxing_list=zen_state.fenxing_list, pre_duan_state=zen_state.current_duan_state ) change = duan_state != zen_state.current_duan_state if change: zen_state.current_duan_state = duan_state # 确定状态 acc_df.loc[ zen_state.fenxing_list[0]. index:zen_state.fenxing_list[-1].index, "duan_state"] = zen_state.current_duan_state duan_index = zen_state.fenxing_list[ 0].index if zen_state.current_duan_state == "up": acc_df.loc[duan_index, "duan_di"] = True duan_value = acc_df.loc[duan_index, "low"] else: duan_index = zen_state.fenxing_list[ 0].index acc_df.loc[duan_index, "duan_ding"] = True duan_value = acc_df.loc[duan_index, "high"] # 记录段的值 acc_df.loc[duan_index, "duan_value"] = duan_value # 计算段斜率 if zen_state.pre_duan: change = (duan_value - zen_state.pre_duan[1] ) / zen_state.pre_duan[1] interval = duan_index - zen_state.pre_duan[ 0] duan_slope = change / interval acc_df.loc[duan_index, "duan_change"] = change acc_df.loc[duan_index, "duan_slope"] = duan_slope acc_df.loc[duan_index, "duan_interval"] = interval zen_state.pre_duan = (duan_index, duan_value) # 记录用于计算中枢的段 zen_state.duans.append( (acc_df.loc[duan_index, "timestamp"], duan_value, duan_index)) # 计算中枢 zen_state.duans, _, _, _ = handle_zhongshu( points=zen_state.duans, acc_df=acc_df, end_index=duan_index, zhongshu_col="zhongshu", zhongshu_change_col="zhongshu_change", ) # 只留最后一个 zen_state.fenxing_list = zen_state.fenxing_list[ -1:] else: # 保持之前的状态并踢出候选 acc_df.loc[ zen_state.fenxing_list[0].index, "duan_state"] = zen_state.current_duan_state zen_state.fenxing_list = zen_state.fenxing_list[ 1:] pre_kdata = kdata pre_index = index acc_df = acc_df.set_index("timestamp", drop=False) return acc_df, zen_state
def transform_one(self, entity_id, df: pd.DataFrame) -> pd.DataFrame: # 记录段区间 if entity_id not in self.entity_duan_intervals: self.entity_duan_intervals[entity_id] = [] df = df.reset_index(drop=True) # 笔的底 df['bi_di'] = False # 笔的顶 df['bi_ding'] = False # 记录临时分型,不变 df['tmp_ding'] = False df['tmp_di'] = False df['duan_state'] = 'yi' # 段的底 df['duan_di'] = False # 段的顶 df['duan_ding'] = False fenxing_list: List[Fenxing] = [] # 取前11条k线,至多出现一个顶分型+底分型 # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一 # start_index 为遍历开始的位置 # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up fenxing, start_index, direction = handle_first_fenxing(df, step=11) fenxing_list.append(fenxing) # 临时方向 tmp_direction = direction # 候选分型(candidate) can_fenxing = None can_fenxing_index = None # 正向count count = 0 # 反方向count opposite_count = 0 # 目前段的方向 current_duan_state = 'yi' pre_kdata = df.iloc[start_index - 1] pre_index = start_index - 1 for index, kdata in df.iloc[start_index:].iterrows(): # print(f'timestamp: {kdata.timestamp}') # 临时方向 tmp_direction = get_direction(kdata, pre_kdata, current=tmp_direction) # 处理包含关系 handle_including(one_df=df, index=index, kdata=kdata, pre_index=pre_index, pre_kdata=pre_kdata, tmp_direction=tmp_direction) # 根据方向,寻找对应的分型 和 段 if direction == Direction.up: tmp_fenxing_col = 'tmp_ding' fenxing_col = 'bi_ding' else: tmp_fenxing_col = 'tmp_di' fenxing_col = 'bi_di' # 方向一致,延续中 if tmp_direction == direction: opposite_count = 0 # 反向,寻找反 分型 else: opposite_count = opposite_count + 1 # 第一次反向 if opposite_count == 1: df.loc[pre_index, tmp_fenxing_col] = True if pd_is_not_null(can_fenxing): # 候选底分型 if tmp_direction == Direction.up: # 取小的 if pre_kdata['low'] <= can_fenxing['low']: can_fenxing = pre_kdata can_fenxing_index = pre_index # 候选顶分型 else: # 取大的 if pre_kdata['high'] >= can_fenxing['high']: can_fenxing = pre_kdata can_fenxing_index = pre_index else: can_fenxing = pre_kdata can_fenxing_index = pre_index # 分型确立 if pd_is_not_null(can_fenxing): if opposite_count >= 4 or (index - can_fenxing_index >= 8): df.loc[can_fenxing_index, fenxing_col] = True opposite_count = 0 direction = direction.opposite() can_fenxing = None # 确定第一个段 if fenxing_list != None: fenxing_list.append( Fenxing(state=fenxing_col, kdata=df.loc[can_fenxing_index], index=can_fenxing_index)) if len(fenxing_list) == 4: duan_state = handle_duan( fenxing_list=fenxing_list, pre_duan_state=current_duan_state) change = duan_state != current_duan_state if change: current_duan_state = duan_state # 确定状态 df.loc[fenxing_list[0]. index:fenxing_list[-1].index, 'duan_state'] = current_duan_state if current_duan_state == 'up': df.loc[fenxing_list[0].index, 'duan_di'] = True else: df.loc[fenxing_list[0].index, 'duan_ding'] = True # 只留最后一个 fenxing_list = fenxing_list[-1:] else: # 保持之前的状态并踢出候选 df.loc[fenxing_list[0].index, 'duan_state'] = current_duan_state fenxing_list = fenxing_list[1:] pre_kdata = kdata pre_index = index return df
def record(self, entity, start, end, size, timestamps): # 上证 if entity.code == "000001": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like("stock_sh%")] ) # 深证 elif entity.code == "399001": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.entity_id.like("stock_sz%")] ) # 创业板 elif entity.code == "399006": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like("300%")] ) # 科创板 elif entity.code == "000688": all_df = StockMoneyFlow.query_data( provider=self.provider, start_timestamp=start, filters=[StockMoneyFlow.code.like("688%")] ) if pd_is_not_null(all_df): g = all_df.groupby("timestamp") for timestamp, df in g: se = pd.Series( { "id": "{}_{}".format(entity.id, to_time_str(timestamp)), "entity_id": entity.id, "timestamp": timestamp, "code": entity.code, "name": entity.name, } ) for col in [ "net_main_inflows", "net_huge_inflows", "net_big_inflows", "net_medium_inflows", "net_small_inflows", ]: se[col] = df[col].sum() for col in [ "net_main_inflow_rate", "net_huge_inflow_rate", "net_big_inflow_rate", "net_medium_inflow_rate", "net_small_inflow_rate", ]: se[col] = df[col].sum() / len(df) index_df = se.to_frame().T self.logger.info(index_df) df_to_db( df=index_df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update ) return None
def acc_one(self, entity_id, df: pd.DataFrame, acc_df: pd.DataFrame, state: dict) -> (pd.DataFrame, dict): self.logger.info(f'acc_one:{entity_id}') if pd_is_not_null(acc_df): df = df[df.index > acc_df.index[-1]] if pd_is_not_null(df): self.logger.info(f'compute from {df.iloc[0]["timestamp"]}') # 遍历的开始位置 start_index = len(acc_df) acc_df = pd.concat([acc_df, df]) zen_state = state acc_df = acc_df.reset_index(drop=True) else: self.logger.info('no need to compute') return acc_df, state else: acc_df = df # 笔的底 acc_df['bi_di'] = False # 笔的顶 acc_df['bi_ding'] = False # 记录笔顶/底分型的值,bi_di取low,bi_ding取high,其他为None,绘图时取有值的连线即为 笔 acc_df['bi_value'] = np.NAN # 记录临时分型,不变 acc_df['tmp_ding'] = False acc_df['tmp_di'] = False # 分型的力度 acc_df['fenxing_power'] = np.NAN acc_df['duan_state'] = 'yi' # 段的底 acc_df['duan_di'] = False # 段的顶 acc_df['duan_ding'] = False # 记录段顶/底的值,为duan_di时取low,为duan_ding时取high,其他为None,绘图时取有值的连线即为 段 acc_df['duan_value'] = np.NAN # 记录在确定中枢的最后一个段的终点x1,值为Rect(x0,y0,x1,y1) acc_df['zhongshu'] = np.NAN acc_df = acc_df.reset_index(drop=True) zen_state = ZenState( dict(fenxing_list=[], direction=None, can_fenxing=None, can_fenxing_index=None, opposite_count=0, current_duan_state='yi', duans=[], pre_bi=None, pre_duan=None)) zen_state.fenxing_list: List[Fenxing] = [] # 取前11条k线,至多出现一个顶分型+底分型 # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一 # start_index 为遍历开始的位置 # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up fenxing, start_index, direction = handle_first_fenxing(acc_df, step=11) if not fenxing: return None, None zen_state.fenxing_list.append(fenxing) zen_state.direction = direction # list of (timestamp,value) zen_state.duans = [] pre_kdata = acc_df.iloc[start_index - 1] pre_index = start_index - 1 tmp_direction = zen_state.direction for index, kdata in acc_df.iloc[start_index:].iterrows(): # print(f'timestamp: {kdata.timestamp}') # 临时方向 tmp_direction = get_direction(kdata, pre_kdata, current=tmp_direction) # 处理包含关系 handle_including(one_df=acc_df, index=index, kdata=kdata, pre_index=pre_index, pre_kdata=pre_kdata, tmp_direction=tmp_direction) # 根据方向,寻找对应的分型 和 段 if zen_state.direction == Direction.up: tmp_fenxing_col = 'tmp_ding' fenxing_col = 'bi_ding' else: tmp_fenxing_col = 'tmp_di' fenxing_col = 'bi_di' # 方向一致,延续中 if tmp_direction == zen_state.direction: zen_state.opposite_count = 0 # 反向,寻找反 分型 else: zen_state.opposite_count = zen_state.opposite_count + 1 # 第一次反向 if zen_state.opposite_count == 1: acc_df.loc[pre_index, tmp_fenxing_col] = True acc_df.loc[pre_index, 'fenxing_power'] = fenxing_power( acc_df.loc[pre_index - 1], pre_kdata, kdata, fenxing=tmp_fenxing_col) if pd_is_not_null(zen_state.can_fenxing): # 候选底分型 if tmp_direction == Direction.up: # 取小的 if pre_kdata['low'] <= zen_state.can_fenxing['low']: zen_state.can_fenxing = pre_kdata zen_state.can_fenxing_index = pre_index # 候选顶分型 else: # 取大的 if pre_kdata['high'] >= zen_state.can_fenxing[ 'high']: zen_state.can_fenxing = pre_kdata zen_state.can_fenxing_index = pre_index else: zen_state.can_fenxing = pre_kdata zen_state.can_fenxing_index = pre_index # 分型确立 if pd_is_not_null(zen_state.can_fenxing): if zen_state.opposite_count >= 4 or ( index - zen_state.can_fenxing_index >= 8): acc_df.loc[zen_state.can_fenxing_index, fenxing_col] = True # 记录笔的值 if fenxing_col == 'bi_ding': bi_value = acc_df.loc[zen_state.can_fenxing_index, 'high'] else: bi_value = acc_df.loc[zen_state.can_fenxing_index, 'low'] acc_df.loc[zen_state.can_fenxing_index, 'bi_value'] = bi_value zen_state.pre_bi = (zen_state.can_fenxing_index, bi_value) zen_state.opposite_count = 0 zen_state.direction = zen_state.direction.opposite() zen_state.can_fenxing = None # 确定第一个段 if zen_state.fenxing_list != None: zen_state.fenxing_list.append( Fenxing(state=fenxing_col, kdata=acc_df.loc[ zen_state.can_fenxing_index, ['open', 'close', 'high', 'low']], index=zen_state.can_fenxing_index)) if len(zen_state.fenxing_list) == 4: duan_state = handle_duan( fenxing_list=zen_state.fenxing_list, pre_duan_state=zen_state.current_duan_state ) change = duan_state != zen_state.current_duan_state if change: zen_state.current_duan_state = duan_state # 确定状态 acc_df.loc[ zen_state.fenxing_list[0]. index:zen_state.fenxing_list[-1].index, 'duan_state'] = zen_state.current_duan_state duan_index = zen_state.fenxing_list[ 0].index if zen_state.current_duan_state == 'up': acc_df.loc[duan_index, 'duan_di'] = True duan_value = acc_df.loc[duan_index, 'low'] else: duan_index = zen_state.fenxing_list[ 0].index acc_df.loc[duan_index, 'duan_ding'] = True duan_value = acc_df.loc[duan_index, 'high'] # 记录段的值 acc_df.loc[duan_index, 'duan_value'] = duan_value # 记录用于计算中枢的段 zen_state.duans.append( (acc_df.loc[duan_index, 'timestamp'], duan_value)) # 计算中枢 if len(zen_state.duans) == 4: x1 = zen_state.duans[0][0] x2 = zen_state.duans[3][0] if zen_state.duans[0][ 1] < zen_state.duans[1][1]: # 向下段 range = intersect( (zen_state.duans[0][1], zen_state.duans[1][1]), (zen_state.duans[2][1], zen_state.duans[3][1])) if range: y1, y2 = range # 记录中枢 acc_df.loc[duan_index, 'zhongshu'] = Rect( x0=x1, x1=x2, y0=y1, y1=y2) zen_state.duans = zen_state.duans[ -1:] else: zen_state.duans = zen_state.duans[ 1:] else: # 向上段 range = intersect( (zen_state.duans[1][1], zen_state.duans[0][1]), (zen_state.duans[3][1], zen_state.duans[2][1])) if range: y1, y2 = range # 记录中枢 acc_df.loc[duan_index, 'zhongshu'] = Rect( x0=x1, x1=x2, y0=y1, y1=y2) zen_state.duans = zen_state.duans[ -1:] else: zen_state.duans = zen_state.duans[ 1:] # 只留最后一个 zen_state.fenxing_list = zen_state.fenxing_list[ -1:] else: # 保持之前的状态并踢出候选 acc_df.loc[ zen_state.fenxing_list[0].index, 'duan_state'] = zen_state.current_duan_state zen_state.fenxing_list = zen_state.fenxing_list[ 1:] pre_kdata = kdata pre_index = index acc_df = acc_df.set_index('timestamp', drop=False) return acc_df, zen_state
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): df["name"] = entity.name df.rename( columns={ "date": "timestamp", "net_amount_main": "net_main_inflows", "net_pct_main": "net_main_inflow_rate", "net_amount_xl": "net_huge_inflows", "net_pct_xl": "net_huge_inflow_rate", "net_amount_l": "net_big_inflows", "net_pct_l": "net_big_inflow_rate", "net_amount_m": "net_medium_inflows", "net_pct_m": "net_medium_inflow_rate", "net_amount_s": "net_small_inflows", "net_pct_s": "net_small_inflow_rate", }, inplace=True, ) # 转换到标准float inflows_cols = [ "net_main_inflows", "net_huge_inflows", "net_big_inflows", "net_medium_inflows", "net_small_inflows", ] for col in inflows_cols: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.dropna() if not pd_is_not_null(df): return None df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000) inflow_rate_cols = [ "net_main_inflow_rate", "net_huge_inflow_rate", "net_big_inflow_rate", "net_medium_inflow_rate", "net_small_inflow_rate", ] for col in inflow_rate_cols: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.dropna() if not pd_is_not_null(df): return None df[inflow_rate_cols] = df[inflow_rate_cols].apply( lambda x: x / 100) # 计算总流入 df["net_inflows"] = (df["net_huge_inflows"] + df["net_big_inflows"] + df["net_medium_inflows"] + df["net_small_inflows"]) # 计算总流入率 amount = df["net_main_inflows"] / df["net_main_inflow_rate"] df["net_inflow_rate"] = df["net_inflows"] / amount df["entity_id"] = entity.id df["timestamp"] = pd.to_datetime(df["timestamp"]) df["provider"] = "joinquant" df["code"] = entity.code def generate_kdata_id(se): return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)) df["id"] = df[["entity_id", "timestamp"]].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset="id", keep="last") df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def get_top_entities( data_schema: Mixin, column: str, start_timestamp=None, end_timestamp=None, pct=0.1, method: WindowMethod = WindowMethod.change, return_type: TopType = None, kdata_filters=None, show_name=False, data_provider=None, ): """ get top entities in specific domain between time range :param data_schema: schema in domain :param column: schema column :param start_timestamp: :param end_timestamp: :param pct: range (0,1] :param method: :param return_type: :param entity_filters: :param kdata_filters: :param show_name: show entity name :return: """ if type(method) == str: method = WindowMethod(method) if type(return_type) == str: return_type = TopType(return_type) if show_name: columns = ["entity_id", column, "name"] else: columns = ["entity_id", column] all_df = data_schema.query_data( start_timestamp=start_timestamp, end_timestamp=end_timestamp, columns=columns, filters=kdata_filters, provider=data_provider, ) if not pd_is_not_null(all_df): return None, None g = all_df.groupby("entity_id") tops = {} names = {} for entity_id, df in g: if method == WindowMethod.change: start = df[column].iloc[0] end = df[column].iloc[-1] if start != 0: change = (end - start) / abs(start) else: change = 0 tops[entity_id] = change elif method == WindowMethod.avg: tops[entity_id] = df[column].mean() elif method == WindowMethod.sum: tops[entity_id] = df[column].sum() if show_name: names[entity_id] = df["name"].iloc[0] positive_df = None negative_df = None top_index = int(len(tops) * pct) if return_type is None or return_type == TopType.positive: # from big to small positive_tops = { k: v for k, v in sorted( tops.items(), key=lambda item: item[1], reverse=True) } positive_tops = dict(itertools.islice(positive_tops.items(), top_index)) positive_df = pd.DataFrame.from_dict(positive_tops, orient="index") col = "score" positive_df.columns = [col] positive_df.sort_values(by=col, ascending=False) if return_type is None or return_type == TopType.negative: # from small to big negative_tops = { k: v for k, v in sorted(tops.items(), key=lambda item: item[1]) } negative_tops = dict(itertools.islice(negative_tops.items(), top_index)) negative_df = pd.DataFrame.from_dict(negative_tops, orient="index") col = "score" negative_df.columns = [col] negative_df.sort_values(by=col) if names: if pd_is_not_null(positive_df): positive_df["name"] = positive_df.index.map(lambda x: names[x]) if pd_is_not_null(negative_df): negative_df["name"] = negative_df.index.map(lambda x: names[x]) return positive_df, negative_df
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={ 'date': 'timestamp', 'net_amount_main': 'net_main_inflows', 'net_pct_main': 'net_main_inflow_rate', 'net_amount_xl': 'net_huge_inflows', 'net_pct_xl': 'net_huge_inflow_rate', 'net_amount_l': 'net_big_inflows', 'net_pct_l': 'net_big_inflow_rate', 'net_amount_m': 'net_medium_inflows', 'net_pct_m': 'net_medium_inflow_rate', 'net_amount_s': 'net_small_inflows', 'net_pct_s': 'net_small_inflow_rate' }, inplace=True) # 转换到标准float inflows_cols = [ 'net_main_inflows', 'net_huge_inflows', 'net_big_inflows', 'net_medium_inflows', 'net_small_inflows' ] for col in inflows_cols: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() if not pd_is_not_null(df): return None df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000) inflow_rate_cols = [ 'net_main_inflow_rate', 'net_huge_inflow_rate', 'net_big_inflow_rate', 'net_medium_inflow_rate', 'net_small_inflow_rate' ] for col in inflow_rate_cols: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() if not pd_is_not_null(df): return None df[inflow_rate_cols] = df[inflow_rate_cols].apply( lambda x: x / 100) # 计算总流入 df['net_inflows'] = df['net_huge_inflows'] + df[ 'net_big_inflows'] + df['net_medium_inflows'] + df[ 'net_small_inflows'] # 计算总流入率 amount = df['net_main_inflows'] / df['net_main_inflow_rate'] df['net_inflow_rate'] = df['net_inflows'] / amount df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['code'] = entity.code def generate_kdata_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def transform_one(self, entity_id, df: pd.DataFrame) -> pd.DataFrame: # 记录段区间 if entity_id not in self.entity_duan_intervals: self.entity_duan_intervals[entity_id] = [] df = df.reset_index(drop=True) # 笔的底 df['bi_di'] = False # 笔的顶 df['bi_ding'] = False # 记录临时分型,不变 df['tmp_ding'] = False df['tmp_di'] = False df['duan_state'] = 'yi' # 段的底 df['duan_di'] = False # 段的顶 df['duan_ding'] = False # 记录段顶/底的值,为duan_di时取low,为duan_ding时取high,其他为None,绘图时取有值的连线即为 段 df['duan_value'] = np.NAN # 记录在确定中枢的最后一个段的终点x1,值为Rect(x0,y0,x1,y1) df['zhongshu'] = None fenxing_list: List[Fenxing] = [] # 取前11条k线,至多出现一个顶分型+底分型 # 注:只是一种方便的确定第一个分型的办法,有了第一个分型,后面的处理就比较统一 # start_index 为遍历开始的位置 # direction为一个确定分型后的方向,即顶分型后为:down,底分型后为:up fenxing, start_index, direction = handle_first_fenxing(df, step=11) fenxing_list.append(fenxing) # 临时方向 tmp_direction = direction # 候选分型(candidate) can_fenxing = None can_fenxing_index = None # 正向count count = 0 # 反方向count opposite_count = 0 # 目前段的方向 current_duan_state = 'yi' pre_kdata = df.iloc[start_index - 1] pre_index = start_index - 1 # list of (timestamp,value) duans = [] for index, kdata in df.iloc[start_index:].iterrows(): # print(f'timestamp: {kdata.timestamp}') # 临时方向 tmp_direction = get_direction(kdata, pre_kdata, current=tmp_direction) # 处理包含关系 handle_including(one_df=df, index=index, kdata=kdata, pre_index=pre_index, pre_kdata=pre_kdata, tmp_direction=tmp_direction) # 根据方向,寻找对应的分型 和 段 if direction == Direction.up: tmp_fenxing_col = 'tmp_ding' fenxing_col = 'bi_ding' else: tmp_fenxing_col = 'tmp_di' fenxing_col = 'bi_di' # 方向一致,延续中 if tmp_direction == direction: opposite_count = 0 # 反向,寻找反 分型 else: opposite_count = opposite_count + 1 # 第一次反向 if opposite_count == 1: df.loc[pre_index, tmp_fenxing_col] = True if pd_is_not_null(can_fenxing): # 候选底分型 if tmp_direction == Direction.up: # 取小的 if pre_kdata['low'] <= can_fenxing['low']: can_fenxing = pre_kdata can_fenxing_index = pre_index # 候选顶分型 else: # 取大的 if pre_kdata['high'] >= can_fenxing['high']: can_fenxing = pre_kdata can_fenxing_index = pre_index else: can_fenxing = pre_kdata can_fenxing_index = pre_index # 分型确立 if pd_is_not_null(can_fenxing): if opposite_count >= 4 or (index - can_fenxing_index >= 8): df.loc[can_fenxing_index, fenxing_col] = True # 记录笔的值 if fenxing_col == 'bi_ding': df.loc[can_fenxing_index, 'bi_value'] = df.loc[can_fenxing_index, 'high'] else: df.loc[can_fenxing_index, 'bi_value'] = df.loc[can_fenxing_index, 'low'] opposite_count = 0 direction = direction.opposite() can_fenxing = None # 确定第一个段 if fenxing_list != None: fenxing_list.append( Fenxing(state=fenxing_col, kdata=df.loc[can_fenxing_index], index=can_fenxing_index)) if len(fenxing_list) == 4: duan_state = handle_duan( fenxing_list=fenxing_list, pre_duan_state=current_duan_state) change = duan_state != current_duan_state if change: current_duan_state = duan_state # 确定状态 df.loc[fenxing_list[0]. index:fenxing_list[-1].index, 'duan_state'] = current_duan_state duan_index = fenxing_list[0].index if current_duan_state == 'up': df.loc[duan_index, 'duan_di'] = True duan_value = df.loc[duan_index, 'low'] else: duan_index = fenxing_list[0].index df.loc[duan_index, 'duan_ding'] = True duan_value = df.loc[duan_index, 'high'] # 记录段的值 df.loc[duan_index, 'duan_value'] = duan_value # 记录用于计算中枢的段 duans.append( (df.loc[duan_index, 'timestamp'], duan_value)) # 计算中枢 if len(duans) == 4: x1 = duans[0][0] x2 = duans[3][0] if duans[0][1] < duans[1][1]: # 向下段 range = intersect( (duans[0][1], duans[1][1]), (duans[2][1], duans[3][1])) if range: y1, y2 = range # 记录中枢 df.loc[duan_index, 'zhongshu'] = Rect( x0=x1, x1=x2, y0=y1, y1=y2) duans = duans[-1:] else: duans = duans[1:] else: # 向上段 range = intersect( (duans[1][1], duans[0][1]), (duans[3][1], duans[2][1])) if range: y1, y2 = range # 记录中枢 df.loc[duan_index, 'zhongshu'] = Rect( x0=x1, x1=x2, y0=y1, y1=y2) duans = duans[-1:] else: duans = duans[1:] # 只留最后一个 fenxing_list = fenxing_list[-1:] else: # 保持之前的状态并踢出候选 df.loc[fenxing_list[0].index, 'duan_state'] = current_duan_state fenxing_list = fenxing_list[1:] pre_kdata = kdata pre_index = index df = df.set_index('timestamp') return df
def on_data_loaded(self, data: pd.DataFrame): if pd_is_not_null(self.factor_df): self.factor_df['zhongshu'] = self.factor_df['zhongshu'].apply( lambda x: json.loads(x, object_hook=decode_rect)) return super().on_data_loaded(data)