def record(self, entity, start, end, size, timestamps): for page in range(1, 5): resp = requests.get(self.category_stocks_url.format(page, entity.code)) try: if resp.text == 'null' or resp.text is None: break category_jsons = demjson.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category['code'] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append({ 'id': '{}_{}'.format(block_id, stock_id), 'entity_id': block_id, 'entity_type': 'block', 'exchange': entity.exchange, 'code': entity.code, 'name': entity.name, 'timestamp': now_pd_timestamp(), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': category['name'], }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def fetch_szse_index_component(self, df: pd.DataFrame): """ 抓取深证指数成分股 """ query_url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1747_zs&TABKEY=tab1&ZSDM={}' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) response = requests.get(url) response_df = pd.read_excel(io.BytesIO(response.content), dtype='str') index_id = f'index_cn_{index_code}' response_df = response_df[['证券代码']] response_df['id'] = response_df['证券代码'].apply( lambda x: f'{index_id}_{china_stock_code_to_id(str(x))}') response_df['entity_id'] = response_df['id'] response_df['stock_id'] = response_df['证券代码'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['index_id'] = index_id response_df.drop('证券代码', axis=1, inplace=True) df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def run(self): for category, url in self.category_map_url.items(): resp = requests.get(url) results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(',') code = items[1] name = items[2] entity_id = f'block_cn_{code}' the_list.append({ 'id': entity_id, 'entity_id': entity_id, 'entity_type': 'block', 'exchange': 'cn', 'code': code, 'name': name, 'category': category.value }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info(f"finish record sina blocks:{category.value}")
def record(self, entity, start, end, size, timestamps): q = query(valuation).filter(valuation.code == to_jq_entity_id(entity)) count: pd.Timedelta = now_pd_timestamp() - start df = get_fundamentals_continuously(q, end_date=now_time_str(), count=count.days + 1, panel=False) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['day']) df['code'] = entity.code df['name'] = entity.name df['id'] = df['timestamp'].apply( lambda x: "{}_{}".format(entity.id, to_time_str(x))) df = df.rename( { 'pe_ratio_lyr': 'pe', 'pe_ratio': 'pe_ttm', 'pb_ratio': 'pb', 'ps_ratio': 'ps', 'pcf_ratio': 'pcf' }, axis='columns') df['market_cap'] = df['market_cap'] * 100000000 df['circulating_cap'] = df['circulating_cap'] * 100000000 df['capitalization'] = df['capitalization'] * 10000 df['circulating_cap'] = df['circulating_cap'] * 10000 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def run(self): # get stock blocks from sina for category, url in self.category_map_url.items(): resp = requests.get(url) resp.encoding = 'GBK' tmp_str = resp.text json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1] tmp_json = json.loads(json_str) the_list = [] for code in tmp_json: name = tmp_json[code].split(',')[1] entity_id = 'index_cn_{}'.format(code) the_list.append({ 'id': entity_id, 'entity_id': entity_id, 'entity_type': 'block', 'exchange': 'cn', 'code': code, 'name': name, 'category': category.value }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info(f"finish record sina block:{category.value}")
def record(self, entity, start, end, size, timestamps): q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter( finance.FUND_PORTFOLIO_STOCK.code == entity.code) df = finance.run_query(q) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info(f"persist etf {entity.code} portfolio success") return None
def record(self, entity_item, start, end, size, timestamps): self.seed += 1 timestamp = timestamps[0] the_url = self.url.format(to_time_str(timestamp), to_time_str(timestamp)) items = get_all_results(url=the_url, token=GithubAccount.get_token(seed=self.seed)) current_time = now_pd_timestamp() results = [{ 'id': f'user_github_{item["login"]}', 'entity_id': f'user_github_{item["login"]}', 'timestamp': timestamp, 'exchange': 'github', 'entity_type': 'user', 'code': item['login'], 'node_id': item['node_id'], 'created_timestamp': current_time, 'updated_timestamp': None } for item in items] # for save faster df = pd.DataFrame(data=results[:-1]) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force=True) return results[-1:]
def run(self): for category, url in self.category_map_url.items(): resp = requests.get(url) results = json_callback_param(resp.text) for result in results: items = result.split(',') code = items[1] name = items[2] id = 'index_cn_{}'.format(code) if id in self.index_ids: continue self.session.add( Index(id=id, entity_id=id, entity_type='index', exchange='cn', code=code, name=name, category=category.value)) self.session.commit() indices = get_entities(session=self.session, entity_type='index', return_type='domain', filters=[ Index.category.in_([ StockCategory.concept.value, StockCategory.industry.value ]) ], provider=self.provider) for index_item in indices: resp = requests.get( self.category_stocks_url.format(index_item.code, '1')) try: results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(',') stock_code = items[1] stock_id = china_stock_code_to_id(stock_code) index_id = index_item.id the_list.append({ 'id': '{}_{}'.format(index_id, stock_id), 'index_id': index_id, 'stock_id': stock_id }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider) self.logger.info('finish recording index:{},{}'.format( index_item.category, index_item.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def run(self): # 抓取etf列表 df_index = self.to_zvt_entity(get_all_securities(['etf']), entity_type='etf', category='etf') df_to_db(df_index, data_schema=Etf, provider=self.provider) # self.logger.info(df_index) self.logger.info("persist etf list success") logout()
def record(self, entity, start, end, size, timestamps): df = pd.DataFrame() dates = get_trade_days(start_date=start) df['timestamp'] = pd.to_datetime(dates) df['id'] = [to_time_str(date) for date in dates] df['entity_id'] = 'stock_sz_000001' df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def download_sz_etf_component(self, df: pd.DataFrame): query_url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml' self.parse_sz_etf_underlying_index(df) for _, etf in df.iterrows(): underlying_index = etf['拟合指数'] etf_code = etf['证券代码'] if len(underlying_index) == 0: self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数,跳过...') continue url = query_url.format(underlying_index) response = requests.get(url) response.encoding = 'gbk' try: dfs = pd.read_html(response.text, header=1) except ValueError as error: self.logger.error( f'HTML parse error: {error}, response: {response.text}') continue if len(dfs) < 4: continue response_df = dfs[3].copy() response_df = response_df.dropna(axis=1, how='any') response_df['品种代码'] = response_df['品种代码'].apply( lambda x: f'{x:06d}') etf_id = f'etf_sz_{etf_code}' response_df = response_df[['品种代码', '品种名称']].copy() response_df.rename(columns={ '品种代码': 'stock_code', '品种名称': 'stock_name' }, inplace=True) response_df['entity_id'] = etf_id response_df['entity_type'] = 'etf' response_df['exchange'] = 'sz' response_df['code'] = etf_code response_df['name'] = etf['证券简称'] response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda code: china_stock_code_to_id(code)) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{etf_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...') self.sleep()
def run(self): # 抓取股票列表 df_stock = self.to_zvt_entity(get_all_securities(['stock']), entity_type='stock') df_to_db(df_stock, data_schema=Stock, provider=self.provider) # persist StockDetail too df_to_db(df=df_stock, data_schema=StockDetail, provider=self.provider, force_update=False) # self.logger.info(df_stock) self.logger.info("persist stock list success") logout()
def record(self, entity, start, end, size, timestamps): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) # if self.level < IntervalLevel.LEVEL_1HOUR: # start = '2019-01-01' end = now_pd_timestamp() start_timestamp = to_time_str(start) # 聚宽get_price函数必须指定结束时间,否则会有未来数据 end_timestamp = to_time_str(end, fmt=TIME_FORMAT_MINUTE2) # 不复权 df = get_price( to_jq_entity_id(entity), start_date=to_time_str(start_timestamp), end_date=end_timestamp, frequency=self.jq_trading_level, fields=['open', 'close', 'low', 'high', 'volume', 'money'], skip_paused=True, fq=None) if df_is_not_null(df): df.index.name = 'timestamp' df.reset_index(inplace=True) df['name'] = entity.name df.rename(columns={'money': 'turnover'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force=self.force_update) return None
def record(self, entity, start, end, size, timestamps): # 只要前复权数据 if not self.end_timestamp: df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=to_time_str(now_pd_timestamp()), include_now=True) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_dt=end_timestamp, fq_ref_date=to_time_str(now_pd_timestamp()), include_now=False) if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code # 判断是否需要重新计算之前保存的前复权数据 check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): self.factor = new / old self.last_timestamp = pd.Timestamp(check_date) def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def run(self): for exchange_str in self.exchanges: exchange = CCXTAccount.get_ccxt_exchange(exchange_str) try: markets = exchange.fetch_markets() df = pd.DataFrame() # markets有些为key=symbol的dict,有些为list markets_type = type(markets) if markets_type != dict and markets_type != list: self.logger.exception( "unknown return markets type {}".format(markets_type)) return aa = [] for market in markets: if markets_type == dict: name = market code = market if markets_type == list: code = market['symbol'] name = market['symbol'] if name not in COIN_PAIRS: continue aa.append(market) security_item = { 'id': '{}_{}_{}'.format('coin', exchange_str, code), 'entity_id': '{}_{}_{}'.format('coin', exchange_str, code), 'exchange': exchange_str, 'entity_type': 'coin', 'code': code, 'name': name } df = df.append(security_item, ignore_index=True) # 存储该交易所的数字货币列表 if not df.empty: df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=True) self.logger.info( "init_markets for {} success".format(exchange_str)) except Exception as e: self.logger.exception( f"init_markets for {exchange_str} failed", e)
def download_stock_list(self, response, exchange): df = None if exchange == 'sh': df = pd.read_csv(io.BytesIO(response.content), sep='\s+', encoding='GB2312', dtype=str, parse_dates=['上市日期']) if df is not None: df = df.loc[:, ['公司代码', '公司简称', '上市日期']] elif exchange == 'sz': df = pd.read_excel(io.BytesIO(response.content), sheet_name='A股列表', dtype=str, parse_dates=['A股上市日期']) if df is not None: df = df.loc[:, ['A股代码', 'A股简称', 'A股上市日期']] if df is not None: df.columns = ['code', 'name', 'list_date'] df = df.dropna(subset=['code']) # handle the dirty data # 600996,贵广网络,2016-12-26,2016-12-26,sh,stock,stock_sh_600996,,次新股,贵州,, df.loc[df['code'] == '600996', 'list_date'] = '2016-12-26' print(df[df['list_date'] == '-']) df['list_date'] = df['list_date'].apply( lambda x: to_pd_timestamp(x)) df['exchange'] = exchange df['entity_type'] = 'stock' df['id'] = df[['entity_type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['entity_id'] = df['id'] df['timestamp'] = df['list_date'] df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset=('id'), keep='last') df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=False) # persist StockDetail too df_to_db(df=df, data_schema=StockDetail, provider=self.provider, force_update=False) self.logger.info(df.tail()) self.logger.info("persist stock list successs")
def run(self): # get stock category from sina for category, url in self.category_map_url.items(): resp = requests.get(url) resp.encoding = 'GBK' tmp_str = resp.text json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1] tmp_json = json.loads(json_str) for code in tmp_json: name = tmp_json[code].split(',')[1] id = 'index_cn_{}'.format(code) if id in self.index_ids: continue self.session.add(Index(id=id, entity_type='index', exchange='cn', code=code, name=name, category=category.value)) self.session.commit() indices = get_entities(session=self.session, entity_type='index', return_type='domain', filters=[ Index.category.in_([StockCategory.industry.value, StockCategory.concept.value])], provider=self.provider) for index_item in indices: for page in range(1, 5): resp = requests.get(self.category_stocks_url.format(page, index_item.code)) try: if resp.text == 'null' or resp.text is None: break category_jsons = demjson.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category['code'] stock_id = china_stock_code_to_id(stock_code) index_id = index_item.id the_list.append({ 'id': '{}_{}'.format(index_id, stock_id), 'index_id': index_id, 'stock_id': stock_id }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider) self.logger.info('finish recording index:{},{}'.format(index_item.category, index_item.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def fetch_csi_index_component(self, df: pd.DataFrame): """ 抓取上证、中证指数成分股 """ query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) try: response = requests.get(url) response.raise_for_status() except requests.HTTPError as error: self.logger.error( f'{index["name"]} - {index_code} 成分股抓取错误 ({error})') continue response_df = pd.read_excel(io.BytesIO(response.content)) response_df = response_df[[ '成分券代码Constituent Code', '成分券名称Constituent Name' ]].rename( columns={ '成分券代码Constituent Code': 'stock_code', '成分券名称Constituent Name': 'stock_name' }) index_id = f'index_cn_{index_code}' response_df['entity_id'] = index_id response_df['entity_type'] = 'index' response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index['name'] response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider, force_update=True) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def persist_index(self, df) -> None: df['timestamp'] = df['timestamp'].apply(lambda x: to_pd_timestamp(x)) df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x)) df['id'] = df['code'].apply(lambda code: f'index_cn_{code}') df['entity_id'] = df['id'] df['exchange'] = 'cn' df['entity_type'] = 'index' df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=Index, provider=self.provider, force_update=False)
def fetch_cni_index_component(self, df: pd.DataFrame): """ 抓取国证指数成分股 """ query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) try: response = requests.get(url) response.raise_for_status() except requests.HTTPError as error: self.logger.error( f'{index["name"]} - {index_code} 成分股抓取错误 ({error})') continue response_df = pd.read_excel(io.BytesIO(response.content), dtype='str') index_id = f'index_cn_{index_code}' try: response_df = response_df[['样本股代码']] except KeyError: response_df = response_df[['证券代码']] response_df['entity_id'] = index_id response_df['entity_type'] = 'index' response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index['name'] response_df['timestamp'] = now_pd_timestamp() response_df.columns = ['stock_code'] response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def download_sh_etf_component(self, df: pd.DataFrame): """ ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF 5. 债券 ETF 6. 黄金 ETF :param df: ETF 列表数据 :return: None """ query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \ 'isPagination=false&type={}&etfClass={}' etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')] etf_df = self.populate_sh_etf_type(etf_df) for _, etf in etf_df.iterrows(): url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS']) response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER) response_dict = demjson.decode(response.text) response_df = pd.DataFrame(response_dict.get('result', [])) etf_code = etf['FUND_ID'] etf_id = f'etf_sh_{etf_code}' response_df = response_df[['instrumentId', 'instrumentName']].copy() response_df.rename(columns={ 'instrumentId': 'stock_code', 'instrumentName': 'stock_name' }, inplace=True) response_df['entity_id'] = etf_id response_df['entity_type'] = 'etf' response_df['exchange'] = 'sh' response_df['code'] = etf_code response_df['name'] = etf['FUND_NAME'] response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda code: china_stock_code_to_id(code)) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{etf_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...') self.sleep()
def persist_etf_list(self, df: pd.DataFrame, exchange: str): if df is None: return df = df.copy() if exchange == 'sh': df = df[['FUND_ID', 'FUND_NAME']] elif exchange == 'sz': df = df[['证券代码', '证券简称']] df.columns = ['code', 'name'] df['id'] = df['code'].apply(lambda code: f'etf_{exchange}_{code}') df['entity_id'] = df['id'] df['exchange'] = exchange df['entity_type'] = 'etf' df['category'] = BlockCategory.etf.value df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=Etf, provider=self.provider, force_update=False)
def record(self, entity, start, end, size, timestamps): resp = requests.get(self.category_stocks_url.format(entity.code, '1')) try: results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(',') stock_code = items[1] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append({ 'id': '{}_{}'.format(block_id, stock_id), 'entity_id': block_id, 'entity_type': 'block', 'exchange': entity.exchange, 'code': entity.code, 'name': entity.name, 'timestamp': now_pd_timestamp(), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': items[2], }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording block:{},{}'.format( entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def persist_result(self): df_to_db(df=self.factor_df, data_schema=self.factor_schema, provider='zvt')
def record(self, entity, start, end, size, timestamps): if not end: end = now_pd_timestamp() date_range = pd.date_range(start=start, end=end, freq='1D').tolist() for date in date_range: # etf包含的个股和比例 etf_stock_df = get_etf_stocks(code=entity.code, timestamp=date, provider=self.provider) if pd_is_not_null(etf_stock_df): all_pct = etf_stock_df['proportion'].sum() if all_pct >= 1.2 or all_pct <= 0.8: self.logger.error( f'ignore etf:{entity.id} date:{date} proportion sum:{all_pct}' ) break etf_stock_df.set_index('stock_id', inplace=True) # 个股的估值数据 stock_valuation_df = StockValuation.query_data( entity_ids=etf_stock_df.index.to_list(), filters=[StockValuation.timestamp == date], index='entity_id') if pd_is_not_null(stock_valuation_df): stock_count = len(etf_stock_df) valuation_count = len(stock_valuation_df) self.logger.info( f'etf:{entity.id} date:{date} stock count: {stock_count},' f'valuation count:{valuation_count}') pct = abs(stock_count - valuation_count) / stock_count if pct >= 0.2: self.logger.error( f'ignore etf:{entity.id} date:{date} pct:{pct}') break se = pd.Series({ 'id': "{}_{}".format(entity.id, date), 'entity_id': entity.id, 'timestamp': date, 'code': entity.code, 'name': entity.name }) for col in ['pe', 'pe_ttm', 'pb', 'ps', 'pcf']: # PE=P/E # 这里的算法为:将其价格都设为PE,那么Earning为1(亏钱为-1),结果为 总价格(PE)/总Earning value = 0 price = 0 # 权重估值 positive_df = stock_valuation_df[[ col ]][stock_valuation_df[col] > 0] positive_df['count'] = 1 positive_df = positive_df.multiply( etf_stock_df["proportion"], axis="index") if pd_is_not_null(positive_df): value = positive_df['count'].sum() price = positive_df[col].sum() negative_df = stock_valuation_df[[ col ]][stock_valuation_df[col] < 0] if pd_is_not_null(negative_df): negative_df['count'] = 1 negative_df = negative_df.multiply( etf_stock_df["proportion"], axis="index") value = value - negative_df['count'].sum() price = price + negative_df[col].sum() se[f'{col}1'] = price / value # 简单算术平均估值 positive_df = stock_valuation_df[col][ stock_valuation_df[col] > 0] positive_count = len(positive_df) negative_df = stock_valuation_df[col][ stock_valuation_df[col] < 0] negative_count = len(negative_df) value = positive_count - negative_count price = positive_df.sum() + abs(negative_df.sum()) se[col] = price / value df = se.to_frame().T self.logger.info(df) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): if not end: end = now_pd_timestamp() date_range = pd.date_range(start=start, end=end, freq='1D').tolist() for date in date_range: # etf包含的个股和比例 etf_stock_df = get_etf_stocks(code=entity.code, timestamp=date, provider=self.provider) all_pct = etf_stock_df['proportion'].sum() if all_pct >= 1.1 or all_pct <= 0.9: self.logger.info( f'etf:{entity.id} date:{date} proportion sum:{all_pct}') if pd_is_not_null(etf_stock_df): etf_stock_df.set_index('stock_id', inplace=True) # 个股的估值数据 stock_valuation_df = StockValuation.query_data( entity_ids=etf_stock_df.index.to_list(), filters=[StockValuation.timestamp == date], index='entity_id') if pd_is_not_null(stock_valuation_df): # 暂时只支持 简单算术平均估值,理由:模糊的正确比精确的错误有用 # A股个股的市值往往相差很大,按市值权重的话,这样的估值很难反映整体 self.logger.info( f'etf:{entity.id} date:{date} stock count: {len(etf_stock_df)},valuation count:{len(stock_valuation_df)}' ) # # 静态pe # pe = Column(Float) # # 动态pe # pe_ttm = Column(Float) # # 市净率 # pb = Column(Float) # # 市销率 # ps = Column(Float) # # 市现率 # pcf = Column(Float) se = pd.Series({ 'id': "{}_{}".format(entity.id, date), 'entity_id': entity.id, 'timestamp': date, 'code': entity.code, 'name': entity.name }) for col in ['pe', 'pe_ttm', 'pb', 'ps', 'pcf']: # PE=P/E # 这里的算法为:将其价格都设为1,算出总earning,再相除 positive_df = stock_valuation_df[col][ stock_valuation_df[col] > 0] positive_count = len(positive_df) negative_df = stock_valuation_df[col][ stock_valuation_df[col] < 0] negative_count = len(negative_df) result = (positive_count + negative_count) / ( positive_count / positive_df.mean() + negative_count / negative_df.mean()) se[col] = result df = se.to_frame().T self.logger.info(df) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def init_main_index(provider='exchange'): for item in CHINA_STOCK_MAIN_INDEX: item['timestamp'] = to_pd_timestamp(item['timestamp']) df = pd.DataFrame(CHINA_STOCK_MAIN_INDEX) # print(df) df_to_db(df=df, data_schema=Index, provider=provider, force_update=False)
def persist_result(self): df_to_db(df=self.factor_df, data_schema=self.factor_schema, provider='zvt', force_update=False)