def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( region=self.region, filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type='domain') start_timestamp = '2000-01-01' if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp(self.region)) df = jq_run_query( table='finance.FUND_MAIN_INFO', conditions= f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}', parse_dates=['start_date', 'end_date'], dtype={'main_code': str}) if not pd_is_not_null(df) or (df['start_date'].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['code'] = df['main_code'] df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = 'sz' df_to_db(df, ref_df=None, region=self.region, data_schema=Fund, provider=self.provider) self.logger.info( f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}' ) if is_same_date(end_timestamp, now_pd_timestamp(self.region)): break
def record(self, entity, start, end, size, timestamps, http_session): df = jq_run_query(table='finance.FUND_PORTFOLIO_STOCK', conditions=f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}', parse_dates=None) if pd_is_not_null(df): return df return None
def record(self, entity, start, end, size, timestamps, http_session): df = jq_run_query( table='finance.STK_ML_QUOTA', conditions=f'link_id#=#{entity.code}&day#>=#{to_time_str(start)}', parse_dates=None) if pd_is_not_null(df): if len(df) < 100: self.one_shot = True return df return None
def record(self, entity, start, end, size, timestamps, http_session): result_df = pd.DataFrame() for timestamp in timestamps: df = jq_run_query( table='finance.STK_HK_HOLD_INFO', conditions= f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}') result_df = pd.concat([result_df, df]) if pd_is_not_null(result_df): return result_df return None
def record(self, entity, start, end, size, timestamps, http_session): jq_code = code_map_jq.get(entity.code) df = jq_run_query( table='finance.STK_EXCHANGE_TRADE_INFO', conditions= f'exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}', parse_dates=['date']) if pd_is_not_null(df): if len(df) < 100: self.one_shot = True return df return None