def run(self): # get stock blocks from sina http_session = get_http_session() for category, url in self.category_map_url.items(): resp = request_get(http_session, url) resp.encoding = 'GBK' tmp_str = resp.text json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1] tmp_json = json.loads(json_str) the_list = [] for code in tmp_json: name = tmp_json[code].split(',')[1] entity_id = f'block_cn_{code}' the_list.append({ 'id': entity_id, 'entity_id': entity_id, 'entity_type': EntityType.Block.value, 'exchange': 'cn', 'code': code, 'name': name, 'category': category.value }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(df=df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=True) self.logger.info(f"finish record sina blocks:{category.value}")
def run(self): http_session = get_http_session() for category, url in self.category_map_url.items(): resp = request_get(http_session, url) results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(',') code = items[1] name = items[2] entity_id = f'block_cn_{code}' the_list.append({ 'id': entity_id, 'entity_id': entity_id, 'entity_type': EntityType.Block.value, 'exchange': 'cn', 'code': code, 'name': name, 'category': category.value }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(df=df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=True) self.logger.info(f"finish record sina blocks:{category.value}")
def run(self): http_session = get_http_session() # 抓取沪市 ETF 列表 url = 'http://query.sse.com.cn/commonQuery.do?sqlId=COMMON_SSE_ZQPZ_ETFLB_L_NEW' response = request_get(http_session, url, headers=DEFAULT_SH_ETF_LIST_HEADER) response_dict = demjson.decode(response.text) df = pd.DataFrame(response_dict.get('result', [])) self.persist_etf_list(df, exchange='sh') self.logger.info('沪市 ETF 列表抓取完成...') # 抓取沪市 ETF 成分股 self.download_sh_etf_component(df, http_session) self.logger.info('沪市 ETF 成分股抓取完成...') # 抓取深市 ETF 列表 url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1945' response = request_get(http_session, url) df = pd.read_excel(io.BytesIO(response.content), dtype=str) self.persist_etf_list(df, exchange='sz') self.logger.info('深市 ETF 列表抓取完成...') # 抓取深市 ETF 成分股 self.download_sz_etf_component(df, http_session) self.logger.info('深市 ETF 成分股抓取完成...')
def run(self): http_session = get_http_session() # 上证、中证 self.fetch_csi_index(http_session) # 深证 self.fetch_szse_index(http_session)
def run(self): http_session = get_http_session() exchanges = ['NYSE', 'NASDAQ', 'AMEX'] for exchange in exchanges: url = 'https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&render=download&exchange={}'.format(exchange) resp = request_get(http_session, url, headers=YAHOO_STOCK_LIST_HEADER) self.download_stock_list(response=resp, exchange=exchange)
def run(self): http_session = get_http_session() url = 'http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=1' resp = request_get(http_session, url, headers=DEFAULT_SH_HEADER) self.download_stock_list(response=resp, exchange='sh') url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1110&TABKEY=tab1&random=0.20932135244582617' resp = request_get(http_session, url, headers=DEFAULT_SZ_HEADER) self.download_stock_list(response=resp, exchange='sz')
def run(self): http_session = get_http_session() trade_days= StockTradeDay.query_data(region=self.region, order=StockTradeDay.timestamp.desc(), return_type='domain') trade_day = [day.timestamp for day in trade_days] stock_detail = StockDetail.query_data(region=self.region, columns=['entity_id', 'end_date'], index=['entity_id'], return_type='df') time.sleep(random.randint(0, self.share_para[1])) process_identity = multiprocessing.current_process()._identity if len(process_identity) > 0: # The worker process tqdm bar shall start at Position 1 worker_id = (process_identity[0]-1)%self.share_para[1] + 1 else: worker_id = 0 desc = "{:02d}: {}".format(worker_id, self.share_para[0]) with tqdm(total=len(self.entities), ncols=80, position=worker_id, desc=desc, leave=self.share_para[3]) as pbar: for entity_item in self.entities: self.process_loop(entity_item, trade_day, stock_detail, http_session) self.share_para[2].acquire() pbar.update() self.share_para[2].release() self.on_finish()
def run(self): time.sleep(random.randint(0, self.share_para[1])) process_identity = multiprocessing.current_process()._identity if len(process_identity) > 0: # The worker process tqdm bar shall start at Position 1 worker_id = (process_identity[0] - 1) % self.share_para[1] + 1 else: worker_id = 0 desc = "{:02d} : {}".format(worker_id, self.share_para[0]) with tqdm(total=len(self.entities), ncols=80, position=worker_id, desc=desc, leave=self.share_para[3]) as pbar: http_session = get_http_session() for security_item in self.entities: assert isinstance(security_item, StockDetail) if security_item.exchange == 'sh': fc = "{}01".format(security_item.code) if security_item.exchange == 'sz': fc = "{}02".format(security_item.code) # 基本资料 param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"} resp = request_post( http_session, 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['JiBenZiLiao'] security_item.profile = resp_json['CompRofile'] security_item.main_business = resp_json['MainBusiness'] security_item.date_of_establishment = to_pd_timestamp( resp_json['FoundDate']) # 关联行业 industry = ','.join(resp_json['Industry'].split('-')) security_item.industry = industry # 关联概念 security_item.concept_indices = resp_json['Block'] # 关联地区 security_item.area_indices = resp_json['Provice'] # 发行相关 param = {"color": "w", "fc": fc} resp = request_post( http_session, 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['FaXingXiangGuan'] security_item.issue_pe = to_float(resp_json['PEIssued']) security_item.price = to_float(resp_json['IssuePrice']) security_item.issues = to_float(resp_json['ShareIssued']) security_item.raising_fund = to_float( (resp_json['NetCollection'])) security_item.net_winning_rate = pct_to_float( resp_json['LotRateOn']) self.session.commit() self.logger.info('finish recording stock meta for: {}'.format( security_item.code)) self.share_para[2].acquire() pbar.update() self.share_para[2].release() self.sleep()
def __init__(self) -> None: self.http_session = get_http_session() self.refresh_token() schedule.every(10).minutes.do(self.refresh_token)