def format(self, entity, df): # 分红总额 df['dividend_money'] = df['FenHongZongE'].apply( lambda x: to_float(x[1])) # 新股 df['ipo_issues'] = df['XinGu'].apply(lambda x: to_float(x[1])) # 增发 df['spo_issues'] = df['ZengFa'].apply(lambda x: to_float(x[1])) # 配股 df['rights_issues'] = df['PeiFa'].apply(lambda x: to_float(x[1])) df.update(df.select_dtypes(include=[np.number]).fillna(0)) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def run(self): for security_item in self.entities: assert isinstance(security_item, StockDetail) if security_item.exchange == 'sh': fc = "{}01".format(security_item.code) if security_item.exchange == 'sz': fc = "{}02".format(security_item.code) # 基本资料 param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"} resp = requests.post( 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['JiBenZiLiao'] security_item.profile = resp_json['CompRofile'] security_item.main_business = resp_json['MainBusiness'] security_item.date_of_establishment = to_pd_timestamp( resp_json['FoundDate']) # 关联行业 industries = ','.join(resp_json['Industry'].split('-')) security_item.industries = industries # 关联概念 security_item.concept_indices = resp_json['Block'] # 关联地区 security_item.area_indices = resp_json['Provice'] self.sleep() # 发行相关 param = {"color": "w", "fc": fc} resp = requests.post( 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['FaXingXiangGuan'] security_item.issue_pe = to_float(resp_json['PEIssued']) security_item.price = to_float(resp_json['IssuePrice']) security_item.issues = to_float(resp_json['ShareIssued']) security_item.raising_fund = to_float((resp_json['NetCollection'])) security_item.net_winning_rate = pct_to_float( resp_json['LotRateOn']) self.session.commit() self.logger.info('finish recording stock meta for:{}'.format( security_item.code)) self.sleep()
def run(self): for security_item in self.entities: assert isinstance(security_item, StockDetail) if security_item.exchange == "sh": fc = "{}01".format(security_item.code) if security_item.exchange == "sz": fc = "{}02".format(security_item.code) # 基本资料 param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"} resp = requests.post( "https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao", json=param) resp.encoding = "utf8" resp_json = resp.json()["Result"]["JiBenZiLiao"] security_item.profile = resp_json["CompRofile"] security_item.main_business = resp_json["MainBusiness"] security_item.date_of_establishment = to_pd_timestamp( resp_json["FoundDate"]) # 关联行业 industries = ",".join(resp_json["Industry"].split("-")) security_item.industries = industries # 关联概念 security_item.concept_indices = resp_json["Block"] # 关联地区 security_item.area_indices = resp_json["Provice"] self.sleep() # 发行相关 param = {"color": "w", "fc": fc} resp = requests.post( "https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan", json=param) resp.encoding = "utf8" resp_json = resp.json()["Result"]["FaXingXiangGuan"] security_item.issue_pe = to_float(resp_json["PEIssued"]) security_item.price = to_float(resp_json["IssuePrice"]) security_item.issues = to_float(resp_json["ShareIssued"]) security_item.raising_fund = to_float((resp_json["NetCollection"])) security_item.net_winning_rate = pct_to_float( resp_json["LotRateOn"]) self.session.commit() self.logger.info("finish recording stock meta for:{}".format( security_item.code)) self.sleep()
def run(self): for security_item in self.entities: assert isinstance(security_item, StockDetail) security = to_jq_entity_id(security_item) # 基本资料 data = c.css( security, "COMPANYPROFILE,FOUNDDATE,AREA,CAPITAL,IPOSHARESVOL,IPOPRICE,IPONETCOLLECTION,IPOPE,HKSE,IPOPLANISSUEVOL", "CurType=1,ClassiFication=4,ispandas=1") security_item.profile = data.COMPANYPROFILE[0] security_item.date_of_establishment = to_pd_timestamp( data.FOUNDDATE.values[0]) # 关联行业 security_item.industries = data.HKSE.values[0] # 关联概念 # security_item.concept_indices = [i['concept_name'] for i in concept_dict["000001.XSHE"]['jq_concept']] # 关联地区 security_item.area_indices = data.AREA.values[0] # 发行相关 security_item.price = data.IPOPRICE.values[0] security_item.issues = data.IPOPLANISSUEVOL.values[0] security_item.raising_fund = data.IPONETCOLLECTION.values[0] try: security_item.register_capital = to_float( data.CAPITAL.values[0].split(' ')[0]) / 10000 except AttributeError: pass self.session.commit() self.logger.info('finish recording stock meta for:{}'.format( security_item.code)) self.sleep()
def format(self, entity, df): cols = list(df.columns) str_cols = ['Title'] date_cols = [self.get_original_time_field()] float_cols = list(set(cols) - set(str_cols) - set(date_cols)) for column in float_cols: df[column] = df[column].apply(lambda x: to_float(x[0])) df.rename(columns=cash_flow_map, inplace=True) df.update(df.select_dtypes(include=[np.number]).fillna(0)) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['report_period'] = df['timestamp'].apply(lambda x: to_report_period_type(x)) df['report_date'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['name'] = entity.name df['id'] = self.generate_domain_id(entity, df) return df
def numba_boost_up(json_list): result_list = [] for item in json_list: result_list.append({ 'name': entity.name, 'timestamp': to_pd_timestamp(item['opendate']), 'close': to_float(item['avg_price']), 'change_pct': to_float(item['avg_changeratio']), 'turnover_rate': to_float(item['turnover']) / 10000, 'net_inflows': to_float(item['netamount']), 'net_inflow_rate': to_float(item['ratioamount']), 'net_main_inflows': to_float(item['r0_net']), 'net_main_inflow_rate': to_float(item['r0_ratio']) }) return result_list
def format(self, entity, df): df['report_period'] = df['timestamp'].apply( lambda x: to_report_period_type(x)) df['report_date'] = pd.to_datetime(df['timestamp']) # 股东代码 df['holder_code'] = df['GuDongDaiMa'].astype(str) df['holder_code'] = df['holder_code'].apply( lambda x: x.replace('\n', '').replace('\r', '')) # 股东名称 df['holder_name'] = df['GuDongMingCheng'].astype(str) df['holder_name'] = df['holder_name'].apply( lambda x: x.replace('\n', '').replace('\r', '')) # 持股数 df['shareholding_numbers'] = df['ChiGuShu'].apply( lambda x: to_float(x)) # 持股比例 df['shareholding_ratio'] = df['ChiGuBiLi'].apply(lambda x: to_float(x)) # 变动 df['change'] = df['ZengJian'].apply(lambda x: to_float(x)) # 变动比例 df['change_ratio'] = df['BianDongBiLi'].apply(lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) fill_values = { 'report_period': "未知", 'report_date': pd.to_datetime("1900-01-01"), 'holder_name': "未知", 'holder_code': "未知" } df.fillna(value=fill_values, inplace=True) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def format(self, entity, df): df['rights_issues'] = df['ShiJiPeiGu'].apply(lambda x: to_float(x)) df['rights_issue_price'] = df['PeiGuJiaGe'].apply(lambda x: to_float(x)) df['rights_raising_fund'] = df['ShiJiMuJi'].apply(lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime(df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def request(self, url=None, method='get', param=None, path_fields=None): url = url.format(param) response = requests.get(url=url, headers=DEFAULT_SH_SUMMARY_HEADER) results = demjson.decode( response.text[response.text.index("(") + 1:response.text.index(")")])['result'] result = [result for result in results if result['productType'] == '1'] if result and len(result) == 1: result_json = result[0] # 有些较老的数据不存在,默认设为0.0 return [{ 'provider': Provider.EXCHANGE.value, 'timestamp': param, 'name': '上证指数', 'pe': to_float(result_json['profitRate'], 0.0), 'total_value': to_float(result_json['marketValue1'] + '亿', 0.0), 'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0), 'volume': to_float(result_json['trdVol1'] + '万', 0.0), 'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0), 'turnover_rate': to_float(result_json['exchangeRate'], 0.0), }]
def record(self, entity, start, end, size, timestamps): json_results = [] for timestamp in timestamps: timestamp_str = to_time_str(timestamp) url = self.url.format(timestamp_str) response = requests.get(url=url, headers=DEFAULT_SH_SUMMARY_HEADER) results = demjson3.decode(response.text[response.text.index("(") + 1 : response.text.index(")")])["result"] result = [result for result in results if result["productType"] == "1"] if result and len(result) == 1: result_json = result[0] # 有些较老的数据不存在,默认设为0.0 json_results.append( { "provider": "exchange", "timestamp": timestamp, "name": "上证指数", "pe": to_float(result_json["profitRate"], 0.0), "total_value": to_float(result_json["marketValue1"] + "亿", 0.0), "total_tradable_vaule": to_float(result_json["negotiableValue1"] + "亿", 0.0), "volume": to_float(result_json["trdVol1"] + "万", 0.0), "turnover": to_float(result_json["trdAmt1"] + "亿", 0.0), "turnover_rate": to_float(result_json["exchangeRate"], 0.0), } ) if len(json_results) > 30: return json_results return json_results
def format(self, entity, df): df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x)) df['price'] = df['JiaoYiJunJia'].apply(lambda x: to_float(x)) df['holding'] = df['BianDongHouShuLiang'].apply(lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) df['trading_person'] = df['BianDongRen'].astype(str) df['trading_person'] = df['trading_person'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['trading_way'] = df['JiaoYiTuJing'].astype(str) df['trading_way'] = df['trading_way'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['manager'] = df['GaoGuanMingCheng'].astype(str) df['manager'] = df['manager'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['manager_position'] = df['GaoGuanZhiWei'].astype(str) df['manager_position'] = df['manager_position'].apply( lambda x: x.replace('\n', '').replace('\r', '')) df['relationship_with_manager'] = df['GaoGuanGuanXi'].astype(str) df['relationship_with_manager'] = df[ 'relationship_with_manager'].apply( lambda x: x.replace('\n', '').replace('\r', '')) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def record(self, entity, start, end, size, timestamps, http_session): json_results = [] for timestamp in timestamps: timestamp_str = to_time_str(timestamp) url = self.url.format(timestamp_str) response = request_get(http_session, url, headers=DEFAULT_SH_SUMMARY_HEADER) results = demjson.decode(response.text[response.text.index("(") + 1:response.text.index(")")])['result'] result = [result for result in results if result['productType'] == '1'] if result and len(result) == 1: result_json = result[0] # 有些较老的数据不存在,默认设为0.0 json_results.append({ 'provider': Provider.Exchange.value, 'timestamp': timestamp, 'name': '上证指数', 'pe': to_float(result_json['profitRate'], 0.0), 'total_value': to_float(result_json['marketValue1'] + '亿', 0.0), 'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0), 'volume': to_float(result_json['trdVol1'] + '万', 0.0), 'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0), 'turnover_rate': to_float(result_json['exchangeRate'], 0.0), }) if len(json_results) > self.batch_size: return json_results return json_results
def format(self, entity, df): df['volume'] = df['BianDongShuLiang'].apply(lambda x: to_float(x)) df['change_pct'] = df['BianDongBiLi'].apply(lambda x: to_float(x)) df['holding_pct'] = df['BianDongHouChiGuBiLi'].apply( lambda x: to_float(x)) df.update(df.select_dtypes(include=[np.number]).fillna(0)) df['holder_name'] = df['GuDongMingCheng'].astype(str) df['holder_name'] = df['holder_name'].apply( lambda x: x.replace('\n', '').replace('\r', '')) if 'timestamp' not in df.columns: df['timestamp'] = pd.to_datetime( df[self.get_original_time_field()]) elif not isinstance(df['timestamp'].dtypes, datetime): df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = entity.id df['provider'] = self.provider.value df['code'] = entity.code df['id'] = self.generate_domain_id(entity, df) return df
def record(self, entity, start, end, size, timestamps): url = self.generate_url(category=entity.category, code=entity.code, number=size) resp = requests.get(url) opendate = "opendate" avg_price = "avg_price" avg_changeratio = 'avg_changeratio' turnover = 'turnover' netamount = 'netamount' ratioamount = 'ratioamount' r0_net = 'r0_net' r0_ratio = 'r0_ratio' r0x_ratio = 'r0x_ratio' cnt_r0x_ratio = 'cnt_r0x_ratio' json_list = [] try: json_list = eval(resp.text) except Exception as e: resp.encoding = 'GBK' self.logger.error(resp.text) time.sleep(60 * 5) result_list = [] for item in json_list: result_list.append({ 'name': entity.name, 'timestamp': to_pd_timestamp(item['opendate']), 'close': to_float(item['avg_price']), 'change_pct': to_float(item['avg_changeratio']), 'turnover_rate': to_float(item['turnover']) / 10000, 'net_inflows': to_float(item['netamount']), 'net_inflow_rate': to_float(item['ratioamount']), 'net_main_inflows': to_float(item['r0_net']), 'net_main_inflow_rate': to_float(item['r0_ratio']) }) return result_list
def request(self, url=None, method='post', param=None, path_fields=None): # security_item = param['security_item'] resp = requests.get(param['url']) opendate = "opendate" avg_price = "avg_price" avg_changeratio = 'avg_changeratio' turnover = 'turnover' netamount = 'netamount' ratioamount = 'ratioamount' r0_net = 'r0_net' r0_ratio = 'r0_ratio' r0x_ratio = 'r0x_ratio' cnt_r0x_ratio = 'cnt_r0x_ratio' json_list = [] try: json_list = eval(resp.text) except Exception as e: resp.encoding = 'GBK' logger.error(resp.text) time.sleep(60 * 5) result_list = [] for item in json_list: result_list.append({ 'timestamp': to_pd_timestamp(item['opendate']), 'close': to_float(item['avg_price']), 'change_pct': to_float(item['avg_changeratio']), 'turnover_rate': to_float(item['turnover']) / 10000, 'net_inflows': to_float(item['netamount']), 'net_inflow_rate': to_float(item['ratioamount']), 'net_main_inflows': to_float(item['r0_net']), 'net_main_inflow_rate': to_float(item['r0_ratio']) }) return result_list
def record(self, entity, start, end, size, timestamps, http_session): json_results = [] for timestamp in timestamps: timestamp_str = to_time_str(timestamp) url = self.url.format(timestamp_str) text = sync_get(http_session, url=url, headers=DEFAULT_SH_SUMMARY_HEADER, return_type='text') if text is None: continue results = demjson.decode(text[text.index("(") + 1:text.index(")")])['result'] result = [ result for result in results if result['productType'] == '1' ] if result and len(result) == 1: result_json = result[0] # 有些较老的数据不存在,默认设为0.0 json_results.append({ 'timestamp': timestamp, 'pe': to_float(result_json['profitRate'], 0.0), 'total_value': to_float(result_json['marketValue1'] + '亿', 0.0), 'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0), 'volume': to_float(result_json['trdVol1'] + '万', 0.0), 'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0), 'turnover_rate': to_float(result_json['exchangeRate'], 0.0), }) if len(json_results) > self.batch_size: df = pd.DataFrame.from_records(json_results) df['entity_id'] = entity.id df['provider'] = Provider.Exchange.value df['timestamp'] = pd.to_datetime(df['timestamp']) df['name'] = '上证指数' return df if len(json_results) > 0: df = pd.DataFrame.from_records(json_results) return df return None
def numba_boost_up(klines): kdatas = [] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # time,open,close,high,low,volume,turnover fields = result.split(',') kdatas.append( dict(timestamp=fields[0], open=to_float(fields[1]), close=to_float(fields[2]), high=to_float(fields[3]), low=to_float(fields[4]), volume=to_float(fields[5]), turnover=to_float(fields[6]))) return kdatas
def record(self, entity, start, end, size, timestamps): url = self.generate_url(category=entity.category, code=entity.code, number=size) resp = requests.get(url) opendate = "opendate" avg_price = "avg_price" avg_changeratio = "avg_changeratio" turnover = "turnover" netamount = "netamount" ratioamount = "ratioamount" r0_net = "r0_net" r0_ratio = "r0_ratio" r0x_ratio = "r0x_ratio" cnt_r0x_ratio = "cnt_r0x_ratio" json_list = [] try: json_list = eval(resp.text) except Exception as e: resp.encoding = "GBK" self.logger.error(resp.text) time.sleep(60 * 5) result_list = [] for item in json_list: result_list.append( { "name": entity.name, "timestamp": to_pd_timestamp(item["opendate"]), "close": to_float(item["avg_price"]), "change_pct": to_float(item["avg_changeratio"]), "turnover_rate": to_float(item["turnover"]) / 10000, "net_inflows": to_float(item["netamount"]), "net_inflow_rate": to_float(item["ratioamount"]), "net_main_inflows": to_float(item["r0_net"]), "net_main_inflow_rate": to_float(item["r0_ratio"]), } ) return result_list
def record(self, entity, start, end, size, time_array): if entity.type == 'index': id_flag = "{}1".format(entity.code) elif entity.type == 'stock': if entity.exchange == 'sh': id_flag = "{}1".format(entity.code) if entity.exchange == 'sz': id_flag = "{}2".format(entity.code) the_url = self.url.format("{}".format(id_flag), eastmoney_map_zvt_trading_level(self.level), now_time_str(fmt=TIME_FORMAT_MINUTE), size) resp = requests.get(the_url) results = json_callback_param(resp.text) kdatas = [] for result in results: the_timestamp = to_pd_timestamp(result['time']) the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level) if not data_exist(self.session, self.kdata_schema, the_id): kdatas.append( self.kdata_schema(id=the_id, timestamp=the_timestamp, entity_id=entity.id, code=entity.code, name=entity.name, level=self.level, open=to_float(result['open']), close=to_float(result['close']), high=to_float(result['high']), low=to_float(result['low']), volume=to_float(result['volume']), turnover=to_float(result['amount']), turnover_rate=to_float( result['turnoverrate']))) return kdatas
def record(self, entity, start, end, size, timestamps): the_url = self.url.format("{}".format(entity.code), level_flag(self.level), size, now_time_str(fmt=TIME_FORMAT_DAY1)) resp = requests.get(the_url) results = json_callback_param(resp.text) kdatas = [] if results: klines = results['data']['klines'] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # time,open,close,high,low,volume,turnover fields = result.split(',') the_timestamp = to_pd_timestamp(fields[0]) the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level) kdatas.append( dict(id=the_id, timestamp=the_timestamp, entity_id=entity.id, code=entity.code, name=entity.name, level=self.level.value, open=to_float(fields[1]), close=to_float(fields[2]), high=to_float(fields[3]), low=to_float(fields[4]), volume=to_float(fields[5]), turnover=to_float(fields[6]))) return kdatas
def request(self, url=None, method='post', param=None, path_fields=None): # security_item = param['security_item'] resp = requests.get(param['url']) # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500", # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000", # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"} opendate = "opendate" trade = "trade" changeratio = 'changeratio' turnover = 'turnover' netamount = 'netamount' ratioamount = 'ratioamount' r0 = 'r0' r1 = 'r1' r2 = 'r2' r3 = 'r3' r0_net = 'r0_net' r1_net = 'r1_net' r2_net = 'r2_net' r3_net = 'r3_net' json_list = [] try: json_list = eval(resp.text) except Exception as e: resp.encoding = 'GBK' logger.error(resp.text) time.sleep(60 * 5) result_list = [] for item in json_list: amount = to_float(item['r0']) + to_float(item['r1']) + to_float( item['r2']) + to_float(item['r3']) result = { 'timestamp': to_pd_timestamp(item['opendate']), 'close': to_float(item['trade']), 'change_pct': to_float(item['changeratio']), 'turnover_rate': to_float(item['turnover']) / 10000, 'net_inflows': to_float(item['netamount']), 'net_inflow_rate': to_float(item['ratioamount']), # # 主力=超大单+大单 # net_main_inflows = Column(Float) # net_main_inflow_rate = Column(Float) # # 超大单 # net_huge_inflows = Column(Float) # net_huge_inflow_rate = Column(Float) # # 大单 # net_big_inflows = Column(Float) # net_big_inflow_rate = Column(Float) # # # 中单 # net_medium_inflows = Column(Float) # net_medium_inflow_rate = Column(Float) # # 小单 # net_small_inflows = Column(Float) # net_small_inflow_rate = Column(Float) 'net_main_inflows': to_float(item['r0_net']) + to_float(item['r1_net']), 'net_huge_inflows': to_float(item['r0_net']), 'net_big_inflows': to_float(item['r1_net']), 'net_medium_inflows': to_float(item['r2_net']), 'net_small_inflows': to_float(item['r3_net']), } if amount != 0: result['net_main_inflow_rate'] = (to_float( item['r0_net']) + to_float(item['r1_net'])) / amount result['net_huge_inflows_rate'] = to_float( item['r0_net']) / amount result['net_big_inflows_rate'] = to_float( item['r1_net']) / amount result['net_medium_inflows_rate'] = to_float( item['r2_net']) / amount result['net_small_inflows_rate'] = to_float( item['r3_net']) / amount result_list.append(result) return result_list
def record(self, entity, start, end, size, timestamps): param = { 'url': self.generate_url(code='{}{}'.format(entity.exchange, entity.code), number=size), 'security_item': entity } resp = requests.get(param['url']) # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500", # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000", # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"} opendate = "opendate" trade = "trade" changeratio = 'changeratio' turnover = 'turnover' netamount = 'netamount' ratioamount = 'ratioamount' r0 = 'r0' r1 = 'r1' r2 = 'r2' r3 = 'r3' r0_net = 'r0_net' r1_net = 'r1_net' r2_net = 'r2_net' r3_net = 'r3_net' json_list = [] try: json_list = eval(resp.text) except Exception as e: resp.encoding = 'GBK' self.logger.error(resp.text) time.sleep(60 * 5) result_list = [] for item in json_list: amount = to_float(item['r0']) + to_float(item['r1']) + to_float(item['r2']) + to_float(item['r3']) result = { 'timestamp': to_pd_timestamp(item['opendate']), 'name': entity.name, 'close': to_float(item['trade']), 'change_pct': to_float(item['changeratio']), 'turnover_rate': to_float(item['turnover']) / 10000, 'net_inflows': to_float(item['netamount']), 'net_inflow_rate': to_float(item['ratioamount']), # # 主力=超大单+大单 # net_main_inflows = Column(Float) # net_main_inflow_rate = Column(Float) # # 超大单 # net_huge_inflows = Column(Float) # net_huge_inflow_rate = Column(Float) # # 大单 # net_big_inflows = Column(Float) # net_big_inflow_rate = Column(Float) # # # 中单 # net_medium_inflows = Column(Float) # net_medium_inflow_rate = Column(Float) # # 小单 # net_small_inflows = Column(Float) # net_small_inflow_rate = Column(Float) 'net_main_inflows': to_float(item['r0_net']) + to_float(item['r1_net']), 'net_huge_inflows': to_float(item['r0_net']), 'net_big_inflows': to_float(item['r1_net']), 'net_medium_inflows': to_float(item['r2_net']), 'net_small_inflows': to_float(item['r3_net']), } if amount != 0: result['net_main_inflow_rate'] = (to_float(item['r0_net']) + to_float(item['r1_net'])) / amount result['net_huge_inflow_rate'] = to_float(item['r0_net']) / amount result['net_big_inflow_rate'] = to_float(item['r1_net']) / amount result['net_medium_inflow_rate'] = to_float(item['r2_net']) / amount result['net_small_inflow_rate'] = to_float(item['r3_net']) / amount result_list.append(result) return result_list
def record(self, security_item, start, end, size, timestamps): the_quarters = get_year_quarters(start) # treat has recorded the season if contains some date if not is_same_date(security_item.timestamp, start) and len(the_quarters) > 1: the_quarters = the_quarters[1:] for year, quarter in the_quarters: kdatas = [] for fuquan in ['bfq', 'hfq']: the_url = self.get_kdata_url(security_item.code, year, quarter, fuquan) resp = requests.get(the_url) trs = Selector(text=resp.text).xpath( '//*[@id="FundHoldSharesTable"]/tr[position()>1 and position()<=last()]' ).extract() for idx, tr in enumerate(trs): tds = Selector(text=tr).xpath('//td//text()').extract() tds = [x.strip() for x in tds if x.strip()] open = tds[1] high = tds[2] close = tds[3] low = tds[4] volume = tds[5] turnover = tds[6] if fuquan == 'hfq': factor = tds[7] the_timestamp = to_pd_timestamp(tds[0]) the_id = generate_kdata_id(security_id=security_item.id, timestamp=the_timestamp, level=self.level) if fuquan == 'hfq': # we got bfq at first and then update hfq data existed = [ item for item in kdatas if item['id'] == the_id ] if existed: kdata = existed[0] else: self.logger.error( "bfq not got for:{}".format(the_id)) kdata = { 'id': the_id, 'timestamp': the_timestamp, 'name': security_item.name, 'level': self.level.value, 'open': to_float(open) / to_float(factor), 'close': to_float(close) / to_float(factor), 'high': to_float(high) / to_float(factor), 'low': to_float(low) / to_float(factor), 'volume': to_float(volume), 'turnover': to_float(turnover) } kdatas.append(kdata) kdata['hfq_open'] = to_float(open) kdata['hfq_high'] = to_float(high) kdata['hfq_close'] = to_float(close) kdata['hfq_low'] = to_float(low) kdata['factor'] = to_float(factor) self.latest_factors[security_item.id] = to_float( factor) else: kdatas.append({ 'id': the_id, 'timestamp': the_timestamp, 'name': security_item.name, 'level': self.level.value, 'open': to_float(open), 'close': to_float(close), 'high': to_float(high), 'low': to_float(low), 'volume': to_float(volume), 'turnover': to_float(turnover) }) return kdatas
def record(self, entity, start, end, size, timestamps): param = { "url": self.generate_url(code="{}{}".format(entity.exchange, entity.code), number=size), "security_item": entity, } resp = requests.get(param["url"]) # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500", # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000", # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"} opendate = "opendate" trade = "trade" changeratio = "changeratio" turnover = "turnover" netamount = "netamount" ratioamount = "ratioamount" r0 = "r0" r1 = "r1" r2 = "r2" r3 = "r3" r0_net = "r0_net" r1_net = "r1_net" r2_net = "r2_net" r3_net = "r3_net" json_list = [] try: json_list = eval(resp.text) except Exception as e: resp.encoding = "GBK" self.logger.error(resp.text) time.sleep(60 * 5) result_list = [] for item in json_list: amount = to_float(item["r0"]) + to_float(item["r1"]) + to_float(item["r2"]) + to_float(item["r3"]) result = { "timestamp": to_pd_timestamp(item["opendate"]), "name": entity.name, "close": to_float(item["trade"]), "change_pct": to_float(item["changeratio"]), "turnover_rate": to_float(item["turnover"]) / 10000, "net_inflows": to_float(item["netamount"]), "net_inflow_rate": to_float(item["ratioamount"]), # # 主力=超大单+大单 # net_main_inflows = Column(Float) # net_main_inflow_rate = Column(Float) # # 超大单 # net_huge_inflows = Column(Float) # net_huge_inflow_rate = Column(Float) # # 大单 # net_big_inflows = Column(Float) # net_big_inflow_rate = Column(Float) # # # 中单 # net_medium_inflows = Column(Float) # net_medium_inflow_rate = Column(Float) # # 小单 # net_small_inflows = Column(Float) # net_small_inflow_rate = Column(Float) "net_main_inflows": to_float(item["r0_net"]) + to_float(item["r1_net"]), "net_huge_inflows": to_float(item["r0_net"]), "net_big_inflows": to_float(item["r1_net"]), "net_medium_inflows": to_float(item["r2_net"]), "net_small_inflows": to_float(item["r3_net"]), } if amount != 0: result["net_main_inflow_rate"] = (to_float(item["r0_net"]) + to_float(item["r1_net"])) / amount result["net_huge_inflow_rate"] = to_float(item["r0_net"]) / amount result["net_big_inflow_rate"] = to_float(item["r1_net"]) / amount result["net_medium_inflow_rate"] = to_float(item["r2_net"]) / amount result["net_small_inflow_rate"] = to_float(item["r3_net"]) / amount result_list.append(result) return result_list
def run(self): time.sleep(random.randint(0, self.share_para[1])) process_identity = multiprocessing.current_process()._identity if len(process_identity) > 0: # The worker process tqdm bar shall start at Position 1 worker_id = (process_identity[0] - 1) % self.share_para[1] + 1 else: worker_id = 0 desc = "{:02d} : {}".format(worker_id, self.share_para[0]) with tqdm(total=len(self.entities), ncols=80, position=worker_id, desc=desc, leave=self.share_para[3]) as pbar: http_session = get_http_session() for security_item in self.entities: assert isinstance(security_item, StockDetail) if security_item.exchange == 'sh': fc = "{}01".format(security_item.code) if security_item.exchange == 'sz': fc = "{}02".format(security_item.code) # 基本资料 param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"} resp = request_post( http_session, 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['JiBenZiLiao'] security_item.profile = resp_json['CompRofile'] security_item.main_business = resp_json['MainBusiness'] security_item.date_of_establishment = to_pd_timestamp( resp_json['FoundDate']) # 关联行业 industry = ','.join(resp_json['Industry'].split('-')) security_item.industry = industry # 关联概念 security_item.concept_indices = resp_json['Block'] # 关联地区 security_item.area_indices = resp_json['Provice'] # 发行相关 param = {"color": "w", "fc": fc} resp = request_post( http_session, 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['FaXingXiangGuan'] security_item.issue_pe = to_float(resp_json['PEIssued']) security_item.price = to_float(resp_json['IssuePrice']) security_item.issues = to_float(resp_json['ShareIssued']) security_item.raising_fund = to_float( (resp_json['NetCollection'])) security_item.net_winning_rate = pct_to_float( resp_json['LotRateOn']) self.session.commit() self.logger.info('finish recording stock meta for: {}'.format( security_item.code)) self.share_para[2].acquire() pbar.update() self.share_para[2].release() self.sleep()
def process_loop(self, entity, http_session): assert isinstance(entity, StockDetail) step1 = time.time() precision_str = '{' + ':>{},.{}f'.format(8, 4) + '}' self.result = None if entity.exchange == 'sh': fc = "{}01".format(entity.code) if entity.exchange == 'sz': fc = "{}02".format(entity.code) # 基本资料 param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"} url = 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao' json_result = sync_post(http_session, url, json=param) if json_result is None: return resp_json = json_result['JiBenZiLiao'] entity.profile = resp_json['CompRofile'] entity.main_business = resp_json['MainBusiness'] entity.date_of_establishment = to_pd_timestamp(resp_json['FoundDate']) # 关联行业 industry = ','.join(resp_json['Industry'].split('-')) entity.industry = industry # 关联概念 entity.sector = resp_json['Block'] # 关联地区 entity.area = resp_json['Provice'] self.sleep() # 发行相关 param = {"color": "w", "fc": fc} url = 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan' json_result = sync_post(http_session, url, json=param) if json_result is None: return resp_json = json_result['FaXingXiangGuan'] entity.issue_pe = to_float(resp_json['PEIssued']) entity.price = to_float(resp_json['IssuePrice']) entity.issues = to_float(resp_json['ShareIssued']) entity.raising_fund = to_float((resp_json['NetCollection'])) entity.net_winning_rate = pct_to_float(resp_json['LotRateOn']) session = get_db_session(region=self.region, provider=self.provider, data_schema=self.data_schema) session.commit() cost = precision_str.format(time.time() - step1) prefix = "finish~ " if zvt_config['debug'] else "" postfix = "\n" if zvt_config['debug'] else "" if self.result is not None: self.logger.info( "{}{}, {}, time: {}, size: {:>7,}, date: [ {}, {} ]{}".format( prefix, self.data_schema.__name__, entity.id, cost, self.result[0], self.result[1], self.result[2], postfix)) else: self.logger.info("{}{}, {}, time: {}{}".format( prefix, self.data_schema.__name__, entity.id, cost, postfix))
def numba_boost_up(json_list): result_list = [] # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500", # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000", # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"} for item in json_list: result = { 'timestamp': to_pd_timestamp(item['opendate']), 'close': to_float(item['trade']), 'change_pct': to_float(item['changeratio']), 'turnover_rate': to_float(item['turnover']) / 10000, 'net_inflows': to_float(item['netamount']), 'net_inflow_rate': to_float(item['ratioamount']), # # 主力=超大单+大单 # net_main_inflows = Column(Float) # net_main_inflow_rate = Column(Float) # # 超大单 # net_huge_inflows = Column(Float) # net_huge_inflow_rate = Column(Float) # # 大单 # net_big_inflows = Column(Float) # net_big_inflow_rate = Column(Float) # # # 中单 # net_medium_inflows = Column(Float) # net_medium_inflow_rate = Column(Float) # # 小单 # net_small_inflows = Column(Float) # net_small_inflow_rate = Column(Float) 'net_main_inflows': to_float(item['r0_net']) + to_float(item['r1_net']), 'net_huge_inflows': to_float(item['r0_net']), 'net_big_inflows': to_float(item['r1_net']), 'net_medium_inflows': to_float(item['r2_net']), 'net_small_inflows': to_float(item['r3_net']), } amount = to_float(item['r0']) + to_float( item['r1']) + to_float(item['r2']) + to_float(item['r3']) if amount != 0: result['net_main_inflow_rate'] = (to_float( item['r0_net']) + to_float(item['r1_net'])) / amount result['net_huge_inflow_rate'] = to_float( item['r0_net']) / amount result['net_big_inflow_rate'] = to_float( item['r1_net']) / amount result['net_medium_inflow_rate'] = to_float( item['r2_net']) / amount result['net_small_inflow_rate'] = to_float( item['r3_net']) / amount result_list.append(result) return result_list