def __init__(self, entity_ids=None, entity_schema=Stock, exchanges=None, codes=None, the_timestamp=None, start_timestamp=None, end_timestamp=None, long_threshold=0.8, short_threshold=0.2, level=IntervalLevel.LEVEL_1DAY, provider=None, portfolio_selector=None) -> None: self.entity_ids = entity_ids self.entity_schema = entity_schema self.exchanges = exchanges self.codes = codes self.provider = provider self.portfolio_selector: TargetSelector = portfolio_selector if self.portfolio_selector: assert self.portfolio_selector.entity_schema in [Etf, Block, Index] if the_timestamp: self.the_timestamp = to_pd_timestamp(the_timestamp) self.start_timestamp = self.the_timestamp self.end_timestamp = self.the_timestamp else: if start_timestamp: self.start_timestamp = to_pd_timestamp(start_timestamp) if end_timestamp: self.end_timestamp = to_pd_timestamp(end_timestamp) else: self.end_timestamp = now_pd_timestamp() self.long_threshold = long_threshold self.short_threshold = short_threshold self.level = level self.filter_factors: List[FilterFactor] = [] self.score_factors: List[ScoreFactor] = [] self.state_factors: List[StateFactor] = [] self.filter_result = None self.score_result = None self.open_long_df: DataFrame = None self.open_short_df: DataFrame = None self.init_factors(entity_ids=entity_ids, entity_schema=entity_schema, exchanges=exchanges, codes=codes, the_timestamp=the_timestamp, start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=self.level)
def get_etf_stocks(code=None, codes=None, ids=None, timestamp=now_pd_timestamp(), provider=None): latests: List[EtfStock] = EtfStock.query_data( provider=provider, code=code, end_timestamp=timestamp, order=EtfStock.timestamp.desc(), limit=1, return_type='domain') if latests: latest_record = latests[0] # 获取最新的报表 df = EtfStock.query_data( provider=provider, code=code, codes=codes, ids=ids, end_timestamp=timestamp, filters=[EtfStock.report_date == latest_record.report_date]) # 最新的为年报或者半年报 if latest_record.report_period == ReportPeriod.year or latest_record.report_period == ReportPeriod.half_year: return df # 季报,需要结合 年报或半年报 来算持仓 else: step = 0 while True: report_date = get_recent_report_date(latest_record.report_date, step=step) pre_df = EtfStock.query_data( provider=provider, code=code, codes=codes, ids=ids, end_timestamp=timestamp, filters=[ EtfStock.report_date == to_pd_timestamp(report_date) ]) df = df.append(pre_df) # 半年报和年报 if (ReportPeriod.half_year.value in pre_df['report_period'].tolist()) or ( ReportPeriod.year.value in pre_df['report_period'].tolist()): # 保留最新的持仓 df = df.drop_duplicates(subset=['stock_code'], keep='first') return df step = step + 1 if step >= 20: break
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) reportdate_list = list( {to_time_str(i)[:4] for i in pd.date_range(start, end)}) em_code = to_em_entity_id(entity) df = pd.DataFrame() columns_dict = { "RTISSANNCDATE": "配股公告日", "RTISSREGISTDATE": "股权登记日", "RTISSEXDIVDATE": "配股除权日", "RTISSLISTDATE": "配股上市日", "RTISSPAYSDATE": "缴款起始日", "RTISSPAYEDATE": "缴款终止日", "RTISSPERTISSHARE": "每股配股数", "RTISSBASESHARES": "基准股本", "RTISSPLANNEDVOL": "计划配股数", "RTISSACTVOL": "实际配股数", "RTISSPRICE": "配股价格", "RTISSCOLLECTION": "配股募集资金", "RTISSNETCOLLECTION": "配股募集资金净额", "RTISSEXPENSE": "配股费用", } div_columns_list = list(columns_dict.keys()) for reportdate in reportdate_list: # 方案 div_df = c.css(em_code, div_columns_list, "Year =" + reportdate + ",ispandas=1") df = df.append(div_df) df = df.dropna(subset=["RTISSEXDIVDATE"]) df = df.sort_values("RTISSEXDIVDATE", ascending=True) if pd_is_not_null(df): df.reset_index(drop=True, inplace=True) df.rename(columns=self.data_schema.get_data_map(self), inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.rtiss_date) df['provider'] = 'emquantapi' df['code'] = entity.code def generate_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def in_trading_time(cls, timestamp=None): if not timestamp: timestamp = now_pd_timestamp() else: timestamp = pd.Timestamp(timestamp) open_time = date_and_time(the_date=timestamp.date(), the_time=cls.get_trading_intervals()[0][0]) close_time = date_and_time(the_date=timestamp.date(), the_time=cls.get_trading_intervals()[-1][1]) return open_time < timestamp < close_time
def is_in_trading(security_type, exchange, timestamp): current = now_pd_timestamp() timestamp = to_pd_timestamp(timestamp) if is_same_date(current, timestamp): for start, end in get_trading_intervals(security_type=security_type, exchange=exchange): if current > date_and_time( current, start) and current < date_and_time(current, end): return True return False
def record(self, entity, start, end, size, timestamps, http_session): the_quarters = get_year_quarters(start, now_pd_timestamp(Region.CHN)) if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1: the_quarters = the_quarters[1:] param = { 'security_item': entity, 'quarters': the_quarters, 'level': self.level.value } security_item = param['security_item'] quarters = param['quarters'] level = param['level'] result_df = pd.DataFrame() for year, quarter in quarters: query_url = self.url.format(security_item.code, year, quarter) response = request_get(http_session, query_url) response.encoding = 'gbk' try: dfs = pd.read_html(response.text) except ValueError as error: self.logger.error( f'skip ({year}-{quarter:02d}){security_item.code}{security_item.name}({error})' ) self.sleep() continue if len(dfs) < 5: self.sleep() continue df = dfs[4].copy() df = df.iloc[1:] df.columns = [ 'timestamp', 'open', 'high', 'close', 'low', 'volume', 'turnover' ] df['name'] = security_item.name df['level'] = level df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = Provider.Sina result_df = pd.concat([result_df, df]) self.logger.info( f'({security_item.code}{security_item.name})({year}-{quarter:02d})' ) self.sleep() result_df = result_df.sort_values(by='timestamp') return result_df.to_dict(orient='records')
def download_sh_etf_component(self, df: pd.DataFrame, http_session): """ ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF 5. 债券 ETF 6. 黄金 ETF :param df: ETF 列表数据 :return: None """ query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \ 'isPagination=false&type={}&etfClass={}' etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')] etf_df = self.populate_sh_etf_type(etf_df, http_session) for _, etf in etf_df.iterrows(): url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS']) text = sync_get(http_session, url, headers=DEFAULT_SH_ETF_LIST_HEADER, return_type='text') if text is None: continue response_dict = demjson.decode(text) response_df = pd.DataFrame(response_dict.get('result', [])) etf_code = etf['FUND_ID'] etf_id = f'etf_sh_{etf_code}' response_df = response_df[['instrumentId', 'instrumentName']].copy() response_df.rename(columns={ 'instrumentId': 'stock_code', 'instrumentName': 'stock_name' }, inplace=True) response_df['entity_id'] = etf_id response_df['entity_type'] = EntityType.ETF.value response_df['exchange'] = 'sh' response_df['code'] = etf_code response_df['name'] = etf['FUND_NAME'] response_df['timestamp'] = now_pd_timestamp(Region.CHN) response_df['stock_id'] = response_df['stock_code'].apply( lambda code: china_stock_code_to_id(code)) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{etf_id}_{x}') df_to_db(df=response_df, ref_df=None, region=Region.CHN, data_schema=self.data_schema, provider=self.provider) self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...') self.sleep()
def get_fund_stocks(code=None, codes=None, ids=None, timestamp=now_pd_timestamp(), provider=None): return get_portfolio_stocks(portfolio_entity=Fund, code=code, codes=codes, ids=ids, timestamp=timestamp, provider=provider)
def record(self, entity, start, end, size, timestamps): # if entity.exchange == "swl1": # return None if not end: if (now_pd_timestamp() - start).days > 365: from datetime import timedelta end = to_time_str(start + timedelta(days=365)) else: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) df = c.csd(f"{entity.code}.SWI", "OPEN,CLOSE,HIGH,LOW,VOLUME,AMOUNT", start, end, "period=1,adjustflag=1,curtype=1,order=1,ispandas=1") if type(df) != pd.DataFrame: return None df.rename(columns={ 'DATES': 'timestamp', 'OPEN': 'open', 'CLOSE': 'close', 'HIGH': 'high', 'LOW': 'low', 'VOLUME': 'volume', 'AMOUNT': 'turnover', }, inplace=True) if pd_is_not_null(df): df['name'] = entity.name df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = '1d' df['code'] = entity.code def generate_kdata_id(se): return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def get_stocks(cls, code=None, codes=None, ids=None, timestamp=now_pd_timestamp(), provider=None): from zvt.api.portfolio import get_etf_stocks return get_etf_stocks(code=code, codes=codes, ids=ids, timestamp=timestamp, provider=provider)
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) reportdate_list = sorted(list({to_time_str(i)[:4] + '-12-31' for i in pd.date_range(start, end)})) em_code = to_em_entity_id(entity) df = pd.DataFrame() div_columns_dict = { "DIVAGMANNCDATE": "股东大会公告日", "DIVEXDATE": "除权除息日", "DIVRECORDDATE": "股权登记日", "DIVIMPLANNCDATE": "分红实施公告日", "DIVLASTTRDDATESHAREB": "B股最后交易日", "DIVCASHPSAFTAX": "每股股利(税后)", "DIVCASHPSBFTAX": "每股股利(税前)", "DIVPROGRESS": "分红方案进度", "DIVPAYDATE": "派息日", # "DIVCASHDATE": "最新现金分红报告期", "DIVSTOCKPS": "每股送股比例", "DIVCAPITALIZATIONPS": "每股转增比例", "DIVCASHANDSTOCKPS": "分红送转方案", } div_columns_list = [i for i in div_columns_dict.keys()] for reportdate in reportdate_list: # 方案 div_df = c.css(em_code, div_columns_list, "ReportDate =" + reportdate + ",ispandas=1,AssignFeature=1,YesNo=1") div_df['report_date'] = reportdate df = df.append(div_df) # df.rename(columns=div_columns_dict, inplace=True) df = df.dropna(subset=["DIVEXDATE"]) df = df.sort_values("DIVEXDATE", ascending=True) df['DIVCASHPSAFTAX'] = df['DIVCASHPSAFTAX'].apply(lambda x:str(x).split('或')[0]) df['DIVCASHANDSTOCKPS'] = df['DIVCASHANDSTOCKPS'].apply(lambda x: str(x).split('(')[0]) if pd_is_not_null(df): df.reset_index(drop=True,inplace=True) df.rename(columns=self.data_schema.get_data_map(self), inplace=True) df['dividend'] = df['dividend'].apply(lambda x: str(x).split('(')[0]) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.dividend_date) df['provider'] = 'emquantapi' df['code'] = entity.code def generate_id(se): return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df.replace('None',pd.NaT,inplace=True) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def get_top_fund_holding_stocks(timestamp=None, pct=0.3, by=None): if not timestamp: timestamp = now_pd_timestamp() # 季报一般在report_date后1个月内公布,年报2个月内,年报4个月内 # 所以取时间点的最近的两个公布点,保证取到数据 # 所以,这是个滞后的数据,只是为了看个大概,毕竟模糊的正确better than 精确的错误 report_date = get_recent_report_date(timestamp, 1) df = FundStock.query_data(region=Region.CHN, filters=[ FundStock.report_date >= report_date, FundStock.timestamp <= timestamp ], columns=['stock_id', 'market_cap']) fund_cap_df = fund_cap_df.groupby( 'stock_id')['market_cap'].sum().sort_values(ascending=False) # 直接根据持有市值返回 if not by: s = fund_cap_df.iloc[:int(len(fund_cap_df) * pct)] return s.to_frame() # 按流通盘比例 if by == 'trading': columns = ['entity_id', 'circulating_market_cap'] # 按市值比例 elif by == 'all': columns = ['entity_id', 'market_cap'] entity_ids = fund_cap_df.index.tolist() start_timestamp = next_date(timestamp, -30) cap_df = StockValuation.query_data( entity_ids=entity_ids, filters=[ StockValuation.timestamp >= start_timestamp, StockValuation.timestamp <= timestamp ], columns=columns) if by == 'trading': cap_df = cap_df.rename(columns={'circulating_market_cap': 'cap'}) elif by == 'all': cap_df = cap_df.rename(columns={'market_cap': 'cap'}) cap_df = cap_df.groupby('entity_id').mean() result_df = pd.concat([cap_df, fund_cap_df], axis=1, join='inner') result_df['pct'] = result_df['market_cap'] / result_df['cap'] pct_df = result_df['pct'].sort_values(ascending=False) s = pct_df.iloc[:int(len(pct_df) * pct)] return s.to_frame()
def __init__(self, region: Region, entity_ids=None, entity_schema=Stock, exchanges=None, codes=None, the_timestamp=None, start_timestamp=None, end_timestamp=None, long_threshold=0.8, short_threshold=0.2, level=IntervalLevel.LEVEL_1DAY, provider: Provider = Provider.Default) -> None: self.entity_ids = entity_ids self.entity_schema = entity_schema self.exchanges = exchanges self.codes = codes self.region = region self.provider = provider if the_timestamp: self.the_timestamp = to_pd_timestamp(the_timestamp) self.start_timestamp = self.the_timestamp self.end_timestamp = self.the_timestamp else: if start_timestamp: self.start_timestamp = to_pd_timestamp(start_timestamp) if end_timestamp: self.end_timestamp = to_pd_timestamp(end_timestamp) else: self.end_timestamp = now_pd_timestamp(self.region) self.long_threshold = long_threshold self.short_threshold = short_threshold self.level = level self.filter_factors: List[FilterFactor] = [] self.score_factors: List[ScoreFactor] = [] self.filter_result = None self.score_result = None self.open_long_df: DataFrame = None self.open_short_df: DataFrame = None self.init_factors(entity_ids=entity_ids, entity_schema=entity_schema, exchanges=exchanges, codes=codes, the_timestamp=the_timestamp, start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=self.level)
def record(self, entity, start, end, size, timestamps, http_session): the_quarters = get_year_quarters(start, now_pd_timestamp(Region.CHN)) if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1: the_quarters = the_quarters[1:] param = { 'security_item': entity, 'quarters': the_quarters, 'level': self.level.value } security_item = param['security_item'] quarters = param['quarters'] level = param['level'] result_df = pd.DataFrame() for year, quarter in quarters: query_url = self.url.format(security_item.code, year, quarter) text = sync_get(http_session, query_url, encoding='gbk', return_type='text') if text is None: continue try: dfs = pd.read_html(text) except ValueError as error: self.logger.error( f'skip ({year}-{quarter:02d}){security_item.code}{security_item.name}({error})' ) self.sleep() continue if len(dfs) < 5: self.sleep() continue df = dfs[4].copy() df = df.iloc[1:] df.columns = [ 'timestamp', 'open', 'high', 'close', 'low', 'volume', 'turnover' ] result_df = pd.concat([result_df, df]) self.sleep() if pd_is_not_null(result_df): result_df['level'] = level return result_df return None
def format(self, entity, df): df['timestamp'] = now_pd_timestamp(Region.CHN) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['name'] = entity.name df['level'] = self.level.value df['exchange'] = entity.exchange df['entity_type'] = EntityType.Block.value df['id'] = self.generate_domain_id(entity, df) return df
def record(self, entity, start, end, size, timestamps, http_session): end = min(now_pd_timestamp(self.region), start + Timedelta(days=500)) count: Timedelta = end - start df = jq_get_fundamentals(table='valuation', code=to_jq_entity_id(entity), date=to_time_str(end), count=min(count.days, 500)) if pd_is_not_null(df): return df return None
def generate_request_param(self, security_item, start, end, size, timestamp): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) end = now_pd_timestamp() + timedelta(days=1) return { 'security_item': security_item, 'start_timestamp': to_time_str(start), 'end_timestamp': to_time_str(end), 'level': self.level.value, 'jq_level': self.jq_trading_level }
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) em_code = to_em_entity_id(entity) columns_list = list(self.data_schema.get_data_map(self)) data = c.ctr( "HoldTradeDetailInfo", columns_list, "secucode=" + em_code + ",StartDate=" + start + ",EndDate=" + end + ",HoldType=0") if data.Data == {}: return None df = pd.DataFrame(data.Data).T df.columns = data.Indicators df = df.sort_values("NOTICEDATE", ascending=True) df['TOTALSHARE'] = df.NOTICEDATE.apply( lambda x: c.css(em_code, "TOTALSHARE", "EndDate=" + x + ",ispandas=1").TOTALSHARE[0]) # 变动比例(千分位) h = (df['变动_流通股数量(万股)'] / (df['变动后_持股总数(万股)'] / (df['变动后_占总股本比例(%)'] / 100))) df['CHANGENUM'] = df['CHANGENUM'] * 10000 df['BDHCGZS'] = df['BDHCGZS'] * 10000 # 变动后_持股总数 df['change_pct'] = abs( df['CHANGENUM'] / df['TOTALSHARE']).astype(float) * 1000 df['change_pct'] = df['change_pct'].round(5) if pd_is_not_null(df): df.reset_index(drop=True, inplace=True) df.rename(columns=self.data_schema.get_data_map(self), inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.holder_end_date) df['provider'] = 'emquantapi' df['code'] = entity.code def generate_id(se): return "{}_{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name) df_res = pd.concat([ i.reset_index(drop=True) for i in dict(list(df.groupby('timestamp'))).values() ]) df_res.index += 1 df_res['id'] = df_res[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df_res, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def report_core_company(): while True: error_count = 0 email_action = EmailInformer() try: # StockTradeDay.record_data(provider='joinquant') # Stock.record_data(provider='joinquant') # FinanceFactor.record_data(provider='eastmoney') # BalanceSheet.record_data(provider='eastmoney') target_date = to_time_str(now_pd_timestamp()) my_selector: TargetSelector = FundamentalSelector(start_timestamp='2016-01-01', end_timestamp=target_date) my_selector.run() long_targets = my_selector.get_open_long_targets(timestamp=target_date) if long_targets: stocks = get_entities(provider='joinquant', entity_schema=Stock, entity_ids=long_targets, return_type='domain') # add them to eastmoney try: try: eastmoneypy.del_group('core') except: pass eastmoneypy.create_group('core') for stock in stocks: eastmoneypy.add_to_group(stock.code, group_name='core') except Exception as e: email_action.send_message(zvt_config['email_username'], f'report_core_company error', 'report_core_company error:{}'.format(e)) infos = stocks_with_info(stocks) msg = '\n'.join(infos) else: msg = 'no targets' logger.info(msg) email_action.send_message(get_subscriber_emails(), f'{to_time_str(target_date)} 核心资产选股结果', msg) break except Exception as e: logger.exception('report_core_company error:{}'.format(e)) time.sleep(60 * 3) error_count = error_count + 1 if error_count == 10: email_action.send_message(zvt_config['email_username'], f'report_core_company error', 'report_core_company error:{}'.format(e))
def report_core_company(region: Region, provider: Provider): while True: error_count = 0 email_action = EmailInformer() try: # StockTradeDay.record_data(provider=Provider.JoinQuant) # Stock.record_data(provider=Provider.JoinQuant) # FinanceFactor.record_data(provider=Provider.EastMoney) # BalanceSheet.record_data(provider=Provider.EastMoney) target_date = to_time_str(now_pd_timestamp(region)) my_selector: TargetSelector = FundamentalSelector(region=region, start_timestamp='2016-01-01', end_timestamp=target_date) my_selector.run() long_targets = my_selector.get_open_long_targets(timestamp=target_date) if long_targets: stocks = get_entities(region=region, provider=provider, entity_schema=Stock, entity_ids=long_targets, return_type='domain') # add them to eastmoney try: try: eastmoneypy.del_group('core') except: pass eastmoneypy.create_group('core') for stock in stocks: eastmoneypy.add_to_group(stock.code, group_name='core') except Exception as e: email_action.send_message("*****@*****.**", 'report_core_company error', 'report_core_company error:{}'.format(e)) info = [f'{stock.name}({stock.code})' for stock in stocks] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action.send_message(get_subscriber_emails(), f'{to_time_str(target_date)} 核心资产选股结果', msg) break except Exception as e: logger.exception('report_core_company error:{}'.format(e)) time.sleep(60 * 3) error_count = error_count + 1 if error_count == 10: email_action.send_message("*****@*****.**", f'report_core_company error', 'report_core_company error:{}'.format(e))
def report_core_company(): while True: error_count = 0 email_action = EmailInformer() try: # StockTradeDay.record_data(provider='joinquant') # Stock.record_data(provider='joinquant') # FinanceFactor.record_data(provider='eastmoney') # BalanceSheet.record_data(provider='eastmoney') target_date = to_time_str(now_pd_timestamp()) my_selector: TargetSelector = FundamentalSelector(start_timestamp="2016-01-01", end_timestamp=target_date) my_selector.run() long_targets = my_selector.get_open_long_targets(timestamp=target_date) if long_targets: stocks = get_entities( provider="joinquant", entity_schema=Stock, entity_ids=long_targets, return_type="domain" ) # add them to eastmoney try: codes = [stock.code for stock in stocks] add_to_eastmoney(codes=codes, entity_type="stock", group="core") except Exception as e: email_action.send_message( zvt_config["email_username"], f"report_core_company error", "report_core_company error:{}".format(e), ) infos = stocks_with_info(stocks) msg = "\n".join(infos) else: msg = "no targets" logger.info(msg) email_action.send_message(get_subscriber_emails(), f"{to_time_str(target_date)} 核心资产选股结果", msg) break except Exception as e: logger.exception("report_core_company error:{}".format(e)) time.sleep(60 * 3) error_count = error_count + 1 if error_count == 10: email_action.send_message( zvt_config["email_username"], f"report_core_company error", "report_core_company error:{}".format(e) )
def run(self): # get stock blocks from sina for category_map_dict in self.category_map: # df = get_industries(name=category, date=None) category, name_ch = category_map_dict.items() df = pd.DataFrame(index=[0]) if '一级板块代码' in category: df['code'] = category[1] if category[1].startswith('003'): df['exchange'] = 'cn' elif category[1].startswith('204'): df['exchange'] = 'us' elif category[1].startswith('402'): df['exchange'] = 'hk' df['block_type'] = 'gicsl1' elif '二级板块代码' in category: df['code'] = category[1] if category[1].startswith('003'): df['exchange'] = 'cn' elif category[1].startswith('204'): df['exchange'] = 'us' elif category[1].startswith('402'): df['exchange'] = 'hk' df['block_type'] = 'gicsl2' elif '三级板块代码' in category: df['code'] = category[1] if category[1].startswith('003'): df['exchange'] = 'cn' elif category[1].startswith('204'): df['exchange'] = 'us' elif category[1].startswith('402'): df['exchange'] = 'hk' df['block_type'] = 'gicsl3' elif '四级板块代码' in category: df['code'] = category[1] if category[1].startswith('003'): df['exchange'] = 'cn' elif category[1].startswith('204'): df['exchange'] = 'us' elif category[1].startswith('402'): df['exchange'] = 'hk' df['block_type'] = 'gicsl4' df['timestamp'] = now_pd_timestamp() df['name'] = name_ch[1] df['entity_type'] = 'block' df['category'] = "industry" df['id'] = df['entity_id'] = df.apply(lambda x: "block_" + x.exchange + "_" + x.code, axis=1) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info(f"完成choice数据行业数据保存:{category[1],name_ch[1]}")
def fetch_csi_index_component(self, df: pd.DataFrame, http_session): """ 抓取上证、中证指数成分股 """ query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) try: response = request_get(http_session, url) response.raise_for_status() except requests.HTTPError as error: self.logger.error( f'{index["name"]} - {index_code} 成分股抓取错误 ({error})') continue response_df = pd.read_excel(io.BytesIO(response.content)) response_df = response_df[[ '成分券代码Constituent Code', '成分券名称Constituent Name' ]].rename( columns={ '成分券代码Constituent Code': 'stock_code', '成分券名称Constituent Name': 'stock_name' }) index_id = f'index_cn_{index_code}' response_df['entity_id'] = index_id response_df['entity_type'] = EntityType.Index.value response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index['name'] response_df['timestamp'] = now_pd_timestamp(Region.CHN) response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(df=response_df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=True) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def fetch_cni_index_component(self, df: pd.DataFrame, http_session): """ 抓取国证指数成分股 """ query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) try: response = request_get(http_session, url) response.raise_for_status() except requests.HTTPError as error: self.logger.error( f'{index["name"]} - {index_code} 成分股抓取错误 ({error})') continue response_df = pd.read_excel(io.BytesIO(response.content), dtype='str') index_id = f'index_cn_{index_code}' try: response_df = response_df[['样本股代码']] except KeyError: response_df = response_df[['证券代码']] response_df['entity_id'] = index_id response_df['entity_type'] = EntityType.Index.value response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index['name'] response_df['timestamp'] = now_pd_timestamp(Region.CHN) response_df.columns = ['stock_code'] response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(df=response_df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def record(self, entity, start, end, size, timestamps, http_session): if self.adjust_type == AdjustType.hfq: fq_ref_date = '2000-01-01' else: fq_ref_date = to_time_str(now_pd_timestamp(Region.CHN)) if not self.end_timestamp: df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=fq_ref_date) else: end_timestamp = to_time_str(self.end_timestamp) df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp, fq_ref_date=fq_ref_date) # self.logger.info("record {} for {}, size:{}".format(self.data_schema.__name__, entity.id, len(df))) if pd_is_not_null(df): # start_timestamp = to_time_str(df.iloc[1]['timestamp']) # end_timestamp = to_time_str(df.iloc[-1]['timestamp']) # 判断是否需要重新计算之前保存的前复权数据 if self.adjust_type == AdjustType.qfq: check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(region=self.region, entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level, adjust_type=self.adjust_type) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): qfq_factor = new / old last_timestamp = pd.Timestamp(check_date) self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp) return df return None
def finance_score(data_schema, security_id=None, codes=None, provider='eastmoney', fields=None, timestamp=now_pd_timestamp(), report_count=20): fields = fields + ['security_id', 'timestamp', 'report_date'] data_df = get_data(data_schema=data_schema, security_id=security_id, codes=codes, provider=provider, columns=fields, end_timestamp=timestamp) time_series = data_df['report_date'].drop_duplicates() time_series = time_series[-report_count:] data_df = index_df_with_security_time(data_df) idx = pd.IndexSlice df = data_df.loc[idx[:, time_series], ] print(df) df = df.groupby(df['security_id']).mean() print(df) quantile = df.quantile([0.1, 0.3, 0.5, 0.7, 0.9]) def evaluate_score(s, column): the_column = column if s > quantile.loc[0.9, the_column]: return 0.9 if s > quantile.loc[0.7, the_column]: return 0.7 if s > quantile.loc[0.5, the_column]: return 0.5 if s > quantile.loc[0.3, the_column]: return 0.3 if s > quantile.loc[0.1, the_column]: return 0.1 return 0 for item in quantile.columns: df[item] = df[item].apply(lambda x: evaluate_score(x, item)) print(df)
def __init__( self, entity_ids=None, entity_schema=Stock, exchanges=None, codes=None, start_timestamp=None, end_timestamp=None, long_threshold=0.8, short_threshold=0.2, level=IntervalLevel.LEVEL_1DAY, provider=None, select_mode: SelectMode = SelectMode.condition_and, ) -> None: self.entity_ids = entity_ids self.entity_schema = entity_schema self.exchanges = exchanges self.codes = codes self.provider = provider self.select_mode = select_mode if start_timestamp: self.start_timestamp = to_pd_timestamp(start_timestamp) if end_timestamp: self.end_timestamp = to_pd_timestamp(end_timestamp) else: self.end_timestamp = now_pd_timestamp() self.long_threshold = long_threshold self.short_threshold = short_threshold self.level = level self.factors: List[Factor] = [] self.filter_result = None self.score_result = None self.open_long_df: Optional[DataFrame] = None self.open_short_df: Optional[DataFrame] = None self.keep_df: Optional[DataFrame] = None self.init_factors( entity_ids=entity_ids, entity_schema=entity_schema, exchanges=exchanges, codes=codes, start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=self.level, )
def process_entity(self, entity_item, trade_day, stock_detail, http_session): step1 = time.time() now = now_pd_timestamp(self.region) start_timestamp, end_timestamp, end_date, size, timestamps = \ self.evaluate_start_end_size_timestamps(now, entity_item, trade_day, stock_detail, http_session) size = int(size) # self.logger.info("evaluate entity_item:{}, time cost:{}".format(entity_item.id, time.time()-step1)) # no more to record if size == 0: start = start_timestamp.strftime('%Y-%m-%d') if start_timestamp else None # self.logger.info("no update {} {}, {}, cost: {}".format( # self.data_schema.__name__, start_timestamp, entity_item.id, time.time()-step1)) self.on_finish_entity(entity_item, http_session) return True # fetch and save start = start_timestamp.strftime('%Y-%m-%d') if start_timestamp else None trade_day = trade_day[0].strftime('%Y-%m-%d') if trade_day else None end = end_date.strftime('%Y-%m-%d') if end_date else None self.logger.info('request {}, {}, {}, {}, {}, {}'.format(entity_item.id, size, jq_get_query_count(), trade_day, start, end)) original_list = self.record(entity_item, start=start_timestamp, end=end_timestamp, size=size, timestamps=timestamps, http_session=http_session) # self.logger.info("record entity_item:{}, time cost:{}".format(entity_item.id, time.time()-step1)) # handle duplicate items entity_finished, all_duplicated = self.process_duplicate(original_list, entity_item) if entity_finished: # self.logger.info("ignore original duplicate item: {}, time cost: {}".format(domain_item.id, time.time()-step1)) return True # handle realtime items entity_finished = self.process_realtime(entity_item, original_list, all_duplicated, now, http_session) if entity_finished: # if zvt_env['zvt_debug']: # latest_saved_record = self.get_latest_saved_record(entity=entity_item) # if latest_saved_record: # start_timestamp = eval('latest_saved_record.{}'.format(self.get_evaluated_time_field())) # self.logger.info("finish recording {} id: {}, latest_timestamp: {}, time cost: {}".format( # self.data_schema.__name__, entity_item.id, start_timestamp, time.time()-step1)) # else: # self.logger.info("finish recording {} id: {}, time cost: {}".format( # self.data_schema.__name__, entity_item.id, time.time()-step1)) return True self.logger.info("update recording {} id: {}, time cost: {}".format( self.data_schema.__name__, entity_item.id, time.time()-step1)) return False
def download_sh_etf_component(self, df: pd.DataFrame): """ ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF 5. 债券 ETF 6. 黄金 ETF :param df: ETF 列表数据 :return: None """ query_url = ( "http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?" "isPagination=false&type={}&etfClass={}") etf_df = df[(df["ETF_CLASS"] == "1") | (df["ETF_CLASS"] == "2")] etf_df = self.populate_sh_etf_type(etf_df) for _, etf in etf_df.iterrows(): url = query_url.format(etf["ETF_TYPE"], etf["ETF_CLASS"]) response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER) response_dict = demjson3.decode(response.text) response_df = pd.DataFrame(response_dict.get("result", [])) etf_code = etf["FUND_ID"] etf_id = f"etf_sh_{etf_code}" response_df = response_df[["instrumentId", "instrumentName"]].copy() response_df.rename(columns={ "instrumentId": "stock_code", "instrumentName": "stock_name" }, inplace=True) response_df["entity_id"] = etf_id response_df["entity_type"] = "etf" response_df["exchange"] = "sh" response_df["code"] = etf_code response_df["name"] = etf["FUND_NAME"] response_df["timestamp"] = now_pd_timestamp() response_df["stock_id"] = response_df["stock_code"].apply( lambda code: china_stock_code_to_id(code)) response_df["id"] = response_df["stock_id"].apply( lambda x: f"{etf_id}_{x}") df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...') self.sleep()
def on_trading_signals(self, trading_signals: List[TradingSignal]): # 发送交易信号 target_date = trading_signals[0].happen_timestamp # 发送最近20天的交易信号 if target_date + datetime.timedelta(20) > now_pd_timestamp( self.region): email_action = EmailInformer() msg = '' # 目前持仓情况 positions = self.get_current_positions() if positions: current_stocks = [position.entity_id for position in positions] msg = msg + '目前持仓: ' + entity_ids_to_msg( self.region, current_stocks) + '\n' # 多空信号 long_stocks = [] short_stocks = [] for trading_signal in trading_signals: if trading_signal.trading_signal_type == TradingSignalType.open_long: long_stocks.append(trading_signal.entity_id) elif trading_signal.trading_signal_type == TradingSignalType.close_long: short_stocks.append(trading_signal.entity_id) if long_stocks: msg = msg + '买入: ' + entity_ids_to_msg(self.region, long_stocks) + '\n' if short_stocks: msg = msg + '卖出: ' + entity_ids_to_msg(self.region, short_stocks) + '\n' # 账户情况 account = self.get_current_account() pct = round((account.all_value - account.input_money) / account.input_money * 100, 4) msg = msg + f'投入金额:{account.input_money},目前总市值:{account.all_value},收益率:{pct}%' email_action.send_message("*****@*****.**", f'{target_date} 交易信号', msg) super().on_trading_signals(trading_signals)