def run(self):
        # get stock blocks from sina
        http_session = get_http_session()
        for category, url in self.category_map_url.items():
            resp = request_get(http_session, url)
            resp.encoding = 'GBK'

            tmp_str = resp.text
            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)

            the_list = []

            for code in tmp_json:
                name = tmp_json[code].split(',')[1]
                entity_id = f'block_cn_{code}'
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': EntityType.Block.value,
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=True)

            self.logger.info(f"finish record sina blocks:{category.value}")
Exemplo n.º 2
0
    def run(self):
        http_session = get_http_session()

        for category, url in self.category_map_url.items():
            resp = request_get(http_session, url)
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(',')
                code = items[1]
                name = items[2]
                entity_id = f'block_cn_{code}'
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': EntityType.Block.value,
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=True)
            self.logger.info(f"finish record sina blocks:{category.value}")
Exemplo n.º 3
0
    def run(self):
        http_session = get_http_session()

        # 抓取沪市 ETF 列表
        url = 'http://query.sse.com.cn/commonQuery.do?sqlId=COMMON_SSE_ZQPZ_ETFLB_L_NEW'
        response = request_get(http_session,
                               url,
                               headers=DEFAULT_SH_ETF_LIST_HEADER)
        response_dict = demjson.decode(response.text)

        df = pd.DataFrame(response_dict.get('result', []))
        self.persist_etf_list(df, exchange='sh')
        self.logger.info('沪市 ETF 列表抓取完成...')

        # 抓取沪市 ETF 成分股
        self.download_sh_etf_component(df, http_session)
        self.logger.info('沪市 ETF 成分股抓取完成...')

        # 抓取深市 ETF 列表
        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1945'
        response = request_get(http_session, url)

        df = pd.read_excel(io.BytesIO(response.content), dtype=str)
        self.persist_etf_list(df, exchange='sz')
        self.logger.info('深市 ETF 列表抓取完成...')

        # 抓取深市 ETF 成分股
        self.download_sz_etf_component(df, http_session)
        self.logger.info('深市 ETF 成分股抓取完成...')
    def run(self):
        http_session = get_http_session()

        # 上证、中证
        self.fetch_csi_index(http_session)

        # 深证
        self.fetch_szse_index(http_session)
    def run(self):
        http_session = get_http_session()

        exchanges = ['NYSE', 'NASDAQ', 'AMEX']

        for exchange in exchanges:
            url = 'https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&render=download&exchange={}'.format(exchange)
            resp = request_get(http_session, url, headers=YAHOO_STOCK_LIST_HEADER)
            self.download_stock_list(response=resp, exchange=exchange)
    def run(self):
        http_session = get_http_session()

        url = 'http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=1'

        resp = request_get(http_session, url, headers=DEFAULT_SH_HEADER)
        self.download_stock_list(response=resp, exchange='sh')

        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1110&TABKEY=tab1&random=0.20932135244582617'

        resp = request_get(http_session, url, headers=DEFAULT_SZ_HEADER)
        self.download_stock_list(response=resp, exchange='sz')
Exemplo n.º 7
0
    def run(self):
        http_session = get_http_session()
        trade_days= StockTradeDay.query_data(region=self.region, order=StockTradeDay.timestamp.desc(), return_type='domain')
        trade_day = [day.timestamp for day in trade_days]
        stock_detail = StockDetail.query_data(region=self.region, columns=['entity_id', 'end_date'], index=['entity_id'], return_type='df')

        time.sleep(random.randint(0, self.share_para[1]))
        process_identity = multiprocessing.current_process()._identity
        if len(process_identity) > 0:
            #  The worker process tqdm bar shall start at Position 1
            worker_id = (process_identity[0]-1)%self.share_para[1] + 1
        else:
            worker_id = 0
        desc = "{:02d}: {}".format(worker_id, self.share_para[0])

        with tqdm(total=len(self.entities), ncols=80, position=worker_id, desc=desc, leave=self.share_para[3]) as pbar:
            for entity_item in self.entities:
                self.process_loop(entity_item, trade_day, stock_detail, http_session)
                self.share_para[2].acquire()
                pbar.update()
                self.share_para[2].release()
        self.on_finish()
    def run(self):
        time.sleep(random.randint(0, self.share_para[1]))
        process_identity = multiprocessing.current_process()._identity
        if len(process_identity) > 0:
            #  The worker process tqdm bar shall start at Position 1
            worker_id = (process_identity[0] - 1) % self.share_para[1] + 1
        else:
            worker_id = 0
        desc = "{:02d} : {}".format(worker_id, self.share_para[0])

        with tqdm(total=len(self.entities),
                  ncols=80,
                  position=worker_id,
                  desc=desc,
                  leave=self.share_para[3]) as pbar:
            http_session = get_http_session()

            for security_item in self.entities:
                assert isinstance(security_item, StockDetail)

                if security_item.exchange == 'sh':
                    fc = "{}01".format(security_item.code)
                if security_item.exchange == 'sz':
                    fc = "{}02".format(security_item.code)

                # 基本资料
                param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"}
                resp = request_post(
                    http_session,
                    'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao',
                    json=param)
                resp.encoding = 'utf8'

                resp_json = resp.json()['Result']['JiBenZiLiao']

                security_item.profile = resp_json['CompRofile']
                security_item.main_business = resp_json['MainBusiness']
                security_item.date_of_establishment = to_pd_timestamp(
                    resp_json['FoundDate'])

                # 关联行业
                industry = ','.join(resp_json['Industry'].split('-'))
                security_item.industry = industry

                # 关联概念
                security_item.concept_indices = resp_json['Block']

                # 关联地区
                security_item.area_indices = resp_json['Provice']

                # 发行相关
                param = {"color": "w", "fc": fc}
                resp = request_post(
                    http_session,
                    'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan',
                    json=param)
                resp.encoding = 'utf8'

                resp_json = resp.json()['Result']['FaXingXiangGuan']

                security_item.issue_pe = to_float(resp_json['PEIssued'])
                security_item.price = to_float(resp_json['IssuePrice'])
                security_item.issues = to_float(resp_json['ShareIssued'])
                security_item.raising_fund = to_float(
                    (resp_json['NetCollection']))
                security_item.net_winning_rate = pct_to_float(
                    resp_json['LotRateOn'])

                self.session.commit()
                self.logger.info('finish recording stock meta for: {}'.format(
                    security_item.code))

                self.share_para[2].acquire()
                pbar.update()
                self.share_para[2].release()

                self.sleep()
Exemplo n.º 9
0
 def __init__(self) -> None:
     self.http_session = get_http_session()
     self.refresh_token()
     schedule.every(10).minutes.do(self.refresh_token)