Пример #1
0
    def run(self):
        # 按不同类别抓取
        # 编码    基金运作方式
        # 401001    开放式基金
        # 401002    封闭式基金
        # 401003    QDII
        # 401004    FOF
        # 401005    ETF
        # 401006    LOF
        for operate_mode_id in (401001, 401002, 401005):
            year_count = 2
            while True:
                latest = Fund.query_data(
                    region=self.region,
                    filters=[Fund.operate_mode_id == operate_mode_id],
                    order=Fund.timestamp.desc(),
                    limit=1,
                    return_type='domain')
                start_timestamp = '2000-01-01'
                if latest:
                    start_timestamp = latest[0].timestamp

                end_timestamp = min(
                    next_date(start_timestamp, 365 * year_count),
                    now_pd_timestamp(self.region))

                df = jq_run_query(
                    table='finance.FUND_MAIN_INFO',
                    conditions=
                    f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}',
                    parse_dates=['start_date', 'end_date'],
                    dtype={'main_code': str})
                if not pd_is_not_null(df) or (df['start_date'].max().year <
                                              end_timestamp.year):
                    year_count = year_count + 1

                if pd_is_not_null(df):
                    df.rename(columns={'start_date': 'timestamp'},
                              inplace=True)
                    df['timestamp'] = pd.to_datetime(df['timestamp'])
                    df['list_date'] = df['timestamp']
                    df['end_date'] = pd.to_datetime(df['end_date'])

                    df['code'] = df['main_code']
                    df['entity_id'] = df['code'].apply(
                        lambda x: to_entity_id(entity_type='fund', jq_code=x))
                    df['id'] = df['entity_id']
                    df['entity_type'] = 'fund'
                    df['exchange'] = 'sz'
                    df_to_db(df,
                             ref_df=None,
                             region=self.region,
                             data_schema=Fund,
                             provider=self.provider)
                    self.logger.info(
                        f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}'
                    )

                if is_same_date(end_timestamp, now_pd_timestamp(self.region)):
                    break
    def record(self, entity, start, end, size, timestamps, http_session):
        df = jq_run_query(table='finance.FUND_PORTFOLIO_STOCK',
                          conditions=f'pub_date#>=#{to_time_str(start)}&code#=#{entity.code}',
                          parse_dates=None)

        if pd_is_not_null(df):
            return df
        return None
Пример #3
0
 def record(self, entity, start, end, size, timestamps, http_session):
     df = jq_run_query(
         table='finance.STK_ML_QUOTA',
         conditions=f'link_id#=#{entity.code}&day#>=#{to_time_str(start)}',
         parse_dates=None)
     if pd_is_not_null(df):
         if len(df) < 100:
             self.one_shot = True
         return df
     return None
    def record(self, entity, start, end, size, timestamps, http_session):
        result_df = pd.DataFrame()
        for timestamp in timestamps:
            df = jq_run_query(
                table='finance.STK_HK_HOLD_INFO',
                conditions=
                f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}')
            result_df = pd.concat([result_df, df])

        if pd_is_not_null(result_df):
            return result_df
        return None
    def record(self, entity, start, end, size, timestamps, http_session):
        jq_code = code_map_jq.get(entity.code)

        df = jq_run_query(
            table='finance.STK_EXCHANGE_TRADE_INFO',
            conditions=
            f'exchange_code#=#{jq_code}&date#>=#{to_time_str(start)}',
            parse_dates=['date'])

        if pd_is_not_null(df):
            if len(df) < 100:
                self.one_shot = True
            return df
        return None