Exemplo n.º 1
0
def cal_ma_states(start='000001', end='002000'):
    logger.info(f'start cal day ma stats {start}:{end}')

    entities = get_entities(provider='eastmoney',
                            entity_type='stock',
                            columns=[Stock.entity_id, Stock.code],
                            filters=[Stock.code >= start, Stock.code < end])

    codes = entities.index.to_list()

    ma_1d_stats = MaStateStas(codes=codes,
                              start_timestamp='2005-01-01',
                              end_timestamp=now_pd_timestamp(),
                              level=IntervalLevel.LEVEL_1DAY)

    ma_1d_factor = MaFactor(codes=codes,
                            start_timestamp='2005-01-01',
                            end_timestamp=now_pd_timestamp(),
                            level=IntervalLevel.LEVEL_1DAY)

    logger.info(f'finish cal day ma stats {start}:{end}')

    ma_1wk_stats = MaStateStas(codes=codes,
                               start_timestamp='2005-01-01',
                               end_timestamp=now_pd_timestamp(),
                               level=IntervalLevel.LEVEL_1WEEK)

    logger.info(f'finish cal week ma stats {start}:{end}')
Exemplo n.º 2
0
    def record(self, entity, start, end, size, timestamps):
        # 只要前复权数据
        if not self.end_timestamp:
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                          fq_ref_date=to_time_str(now_pd_timestamp()),
                          include_now=True)
        else:
            end_timestamp = to_time_str(self.end_timestamp)
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                          end_dt=end_timestamp,
                          fq_ref_date=to_time_str(now_pd_timestamp()),
                          include_now=False)

        if pd_is_not_null(df):
            df['name'] = entity.name
            df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True)

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = self.level.value
            df['code'] = entity.code

            # 判断是否需要重新计算之前保存的前复权数据
            check_df = df.head(1)
            check_date = check_df['timestamp'][0]
            current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date,
                                   end_timestamp=check_date, limit=1, level=self.level)
            if pd_is_not_null(current_df):
                old = current_df.iloc[0, :]['close']
                new = check_df['close'][0]
                # 相同时间的close不同,表明前复权需要重新计算
                if round(old, 2) != round(new, 2):
                    self.factor = new / old
                    self.last_timestamp = pd.Timestamp(check_date)

            def generate_kdata_id(se):
                if self.level >= IntervalLevel.LEVEL_1DAY:
                    return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))
                else:
                    return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1)

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

        return None
Exemplo n.º 3
0
def every_day_report():
    while True:
        try:
            today = now_pd_timestamp()
            long_targets = select_by_finance(today)

            logger.info(f'selected:{len(long_targets)}')

            if long_targets:
                long_targets = list(set(long_targets))
                df = get_entities(provider='eastmoney', entity_schema=Stock, entity_ids=long_targets,
                                  columns=['code', 'name'])
                info = [df.loc[i, 'code'] + ' ' + df.loc[i, 'name'] for i in df.index]
                msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action = EmailInformer()
            email_action.send_message("*****@*****.**", f'{today} 基本面选股结果', msg)

            break
        except Exception as e:
            logger.exception('report2 sched error:{}'.format(e))
            time.sleep(60 * 3)
Exemplo n.º 4
0
    def on_finish(self):
        last_year = str(now_pd_timestamp().year)
        codes = [item.code for item in self.entities]
        need_filleds = get_dividend_financing(
            provider=self.provider,
            codes=codes,
            return_type='domain',
            session=self.session,
            filters=[DividendFinancing.rights_raising_fund.is_(None)],
            end_timestamp=last_year)

        for item in need_filleds:
            df = get_rights_issue_detail(
                provider=self.provider,
                entity_id=item.entity_id,
                columns=[
                    RightsIssueDetail.timestamp,
                    RightsIssueDetail.rights_raising_fund
                ],
                start_timestamp=item.timestamp,
                end_timestamp="{}-12-31".format(item.timestamp.year))
            if df_is_not_null(df):
                item.rights_raising_fund = df['rights_raising_fund'].sum()
                self.session.commit()

        super().on_finish()
    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == 'null' or resp.text is None:
                    break
                category_jsons = demjson.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category['code']
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append({
                        'id': '{}_{}'.format(block_id, stock_id),
                        'entity_id': block_id,
                        'entity_type': 'block',
                        'exchange': entity.exchange,
                        'code': entity.code,
                        'name': entity.name,
                        'timestamp': now_pd_timestamp(),
                        'stock_id': stock_id,
                        'stock_code': stock_code,
                        'stock_name': category['name'],

                    })
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                             force_update=True)

                self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()
Exemplo n.º 6
0
def select_by_finance(timestamp=now_pd_timestamp(), entity_ids=None):
    if timestamp.dayofweek in (5, 6):
        logger.info(f'today:{timestamp} is {timestamp.day_name()},just ignore')

    today = to_time_str(timestamp)

    my_selector = TargetSelector(start_timestamp='2015-01-01',
                                 end_timestamp=today,
                                 entity_ids=entity_ids)
    # add the factors
    good_factor1 = GoodCompanyFactor(start_timestamp='2015-01-01',
                                     end_timestamp=today,
                                     entity_ids=entity_ids)
    good_factor2 = GoodCompanyFactor(
        start_timestamp='2015-01-01',
        end_timestamp=today,
        entity_ids=entity_ids,
        data_schema=CashFlowStatement,
        columns=[
            CashFlowStatement.report_period,
            CashFlowStatement.net_op_cash_flows
        ],
        filters=[CashFlowStatement.net_op_cash_flows > 0],
        col_threshold={'net_op_cash_flows': 100000000})

    my_selector.add_filter_factor(good_factor1)
    my_selector.add_filter_factor(good_factor2)
    my_selector.run()

    long_targets = my_selector.get_open_long_targets(today)

    logger.info(f'selected:{len(long_targets)}')

    return long_targets
Exemplo n.º 7
0
    def record(self, entity, start, end, size, timestamps):
        q = query(valuation).filter(valuation.code == to_jq_entity_id(entity))
        count: pd.Timedelta = now_pd_timestamp() - start
        df = get_fundamentals_continuously(q,
                                           end_date=now_time_str(),
                                           count=count.days + 1,
                                           panel=False)
        df['entity_id'] = entity.id
        df['timestamp'] = pd.to_datetime(df['day'])
        df['code'] = entity.code
        df['name'] = entity.name
        df['id'] = df['timestamp'].apply(
            lambda x: "{}_{}".format(entity.id, to_time_str(x)))
        df = df.rename(
            {
                'pe_ratio_lyr': 'pe',
                'pe_ratio': 'pe_ttm',
                'pb_ratio': 'pb',
                'ps_ratio': 'ps',
                'pcf_ratio': 'pcf'
            },
            axis='columns')

        df['market_cap'] = df['market_cap'] * 100000000
        df['circulating_cap'] = df['circulating_cap'] * 100000000
        df['capitalization'] = df['capitalization'] * 10000
        df['circulating_cap'] = df['circulating_cap'] * 10000
        df_to_db(df=df,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force_update=self.force_update)

        return None
    def record(self, entity_item, start, end, size, timestamps):
        self.seed += 1

        timestamp = timestamps[0]

        the_url = self.url.format(to_time_str(timestamp),
                                  to_time_str(timestamp))

        items = get_all_results(url=the_url,
                                token=GithubAccount.get_token(seed=self.seed))

        current_time = now_pd_timestamp()

        results = [{
            'id': f'user_github_{item["login"]}',
            'entity_id': f'user_github_{item["login"]}',
            'timestamp': timestamp,
            'exchange': 'github',
            'entity_type': 'user',
            'code': item['login'],
            'node_id': item['node_id'],
            'created_timestamp': current_time,
            'updated_timestamp': None
        } for item in items]

        # for save faster
        df = pd.DataFrame(data=results[:-1])
        df_to_db(df=df,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force=True)

        return results[-1:]
Exemplo n.º 9
0
    def persist(self, entity, domain_list):
        if domain_list:
            if domain_list[0].timestamp >= domain_list[-1].timestamp:
                first_timestamp = domain_list[-1].timestamp
                last_timestamp = domain_list[0].timestamp
            else:
                first_timestamp = domain_list[0].timestamp
                last_timestamp = domain_list[-1].timestamp

            self.logger.info(
                "persist {} for entity_id:{},time interval:[{},{}]".format(
                    self.data_schema, entity.id, first_timestamp,
                    last_timestamp))

            current_timestamp = now_pd_timestamp()

            saving_datas = domain_list

            # FIXME:remove this logic
            # FIXME:should remove unfinished data when recording,always set it to False now
            if is_same_date(current_timestamp,
                            last_timestamp) and self.contain_unfinished_data:
                if current_timestamp.hour >= self.close_hour and current_timestamp.minute >= self.close_minute + 2:
                    # after the closing time of the day,we think the last data is finished
                    saving_datas = domain_list
                else:
                    # ignore unfinished kdata
                    saving_datas = domain_list[:-1]
                    self.logger.info(
                        "ignore kdata for entity_id:{},level:{},timestamp:{},current_timestamp"
                        .format(entity.id, self.level, last_timestamp,
                                current_timestamp))

            self.session.add_all(saving_datas)
            self.session.commit()
Exemplo n.º 10
0
    def record(self, entity, start, end, size, timestamps):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        end = now_pd_timestamp() + timedelta(days=1)

        start_timestamp = to_time_str(start)
        end_timestamp = to_time_str(end)
        # 不复权
        df = get_price(to_jq_entity_id(entity), start_date=to_time_str(start_timestamp),
                       end_date=end_timestamp,
                       frequency=self.jq_trading_level,
                       fields=['open', 'close', 'low', 'high', 'volume', 'money'],
                       skip_paused=True, fq=None)
        df.index.name = 'timestamp'
        df.reset_index(inplace=True)
        df['name'] = entity.name
        df.rename(columns={'money': 'turnover'}, inplace=True)

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['provider'] = 'joinquant'
        df['level'] = self.level.value

        # remove the unfinished kdata
        if is_in_trading(entity_type='stock', exchange='sh', timestamp=df.iloc[-1, :]['timestamp']):
            df = df.iloc[:-1, :]

        return df.to_dict(orient='records')
Exemplo n.º 11
0
 def init_entities(self):
     super().init_entities()
     # 过滤掉退市的
     self.entities = [
         entity for entity in self.entities
         if (entity.end_date is None) or (
             entity.end_date > now_pd_timestamp())
     ]
Exemplo n.º 12
0
def is_in_trading(entity_type, exchange, timestamp):
    current = now_pd_timestamp()
    timestamp = to_pd_timestamp(timestamp)
    if is_same_date(current, timestamp):
        for start, end in get_trading_intervals(entity_type=entity_type, exchange=exchange):
            if current > date_and_time(current, start) and current < date_and_time(current, end):
                return True
    return False
Exemplo n.º 13
0
    def __init__(
        self,
        data_schema=FinanceFactor,
        entity_ids: List[str] = None,
        entity_type: str = 'stock',
        exchanges: List[str] = ['sh', 'sz'],
        codes: List[str] = None,
        the_timestamp: Union[str, pd.Timestamp] = None,
        start_timestamp: Union[str, pd.Timestamp] = '2005-01-01',
        end_timestamp: Union[str, pd.Timestamp] = now_pd_timestamp(),
        # 高roe,高现金流,低财务杠杆,有增长
        columns: List = [
            FinanceFactor.roe, FinanceFactor.op_income_growth_yoy,
            FinanceFactor.net_profit_growth_yoy, FinanceFactor.report_period,
            FinanceFactor.op_net_cash_flow_per_op_income,
            FinanceFactor.sales_net_cash_flow_per_op_income,
            FinanceFactor.current_ratio, FinanceFactor.debt_asset_ratio
        ],
        filters: List = [
            FinanceFactor.roe >= 0.02,
            FinanceFactor.op_income_growth_yoy >= 0.05,
            FinanceFactor.net_profit_growth_yoy >= 0.05,
            FinanceFactor.op_net_cash_flow_per_op_income >= 0.1,
            FinanceFactor.sales_net_cash_flow_per_op_income >= 0.3,
            FinanceFactor.current_ratio >= 1,
            FinanceFactor.debt_asset_ratio <= 0.5
        ],
        order: object = None,
        limit: int = None,
        provider: str = 'eastmoney',
        level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY,
        category_field: str = 'entity_id',
        time_field: str = 'timestamp',
        computing_window: int = None,
        keep_all_timestamp: bool = True,
        fill_method: str = 'ffill',
        effective_number: int = None,
        transformer: Transformer = None,
        accumulator: Accumulator = None,
        persist_factor: bool = False,
        dry_run: bool = False,
        # 3 years
        window='1095d',
        count=8,
        col_threshold={'roe': 0.02},
        handling_on_period=('roe', )) -> None:
        self.window = window
        self.count = count
        self.col_threshold = col_threshold
        # 对于根据年度计算才有意义的指标,比如roe,我们会对不同季度的值区别处理
        self.handling_on_period = handling_on_period

        super().__init__(data_schema, entity_ids, entity_type, exchanges,
                         codes, the_timestamp, start_timestamp, end_timestamp,
                         columns, filters, order, limit, provider, level,
                         category_field, time_field, computing_window,
                         keep_all_timestamp, fill_method, effective_number,
                         transformer, accumulator, persist_factor, dry_run)
Exemplo n.º 14
0
def report_core_company():
    while True:
        error_count = 0
        email_action = EmailInformer()

        try:
            # StockTradeDay.record_data(provider='joinquant')
            # Stock.record_data(provider='joinquant')
            # FinanceFactor.record_data(provider='eastmoney')
            # BalanceSheet.record_data(provider='eastmoney')

            target_date = to_time_str(now_pd_timestamp())

            my_selector: TargetSelector = FundamentalSelector(
                start_timestamp='2015-01-01', end_timestamp=target_date)
            my_selector.run()

            long_targets = my_selector.get_open_long_targets(
                timestamp=target_date)
            if long_targets:
                stocks = get_entities(provider='joinquant',
                                      entity_schema=Stock,
                                      entity_ids=long_targets,
                                      return_type='domain')

                # add them to eastmoney
                try:
                    try:
                        eastmoneypy.del_group('core')
                    except:
                        pass
                    eastmoneypy.create_group('core')
                    for stock in stocks:
                        eastmoneypy.add_to_group(stock.code, group_name='core')
                except Exception as e:
                    email_action.send_message(
                        "*****@*****.**", f'report_core_company error',
                        'report_core_company error:{}'.format(e))

                info = [f'{stock.name}({stock.code})' for stock in stocks]
                msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action.send_message(get_subscriber_emails(),
                                      f'{to_time_str(target_date)} 核心资产选股结果',
                                      msg)
            break
        except Exception as e:
            logger.exception('report_core_company error:{}'.format(e))
            time.sleep(60 * 3)
            error_count = error_count + 1
            if error_count == 10:
                email_action.send_message(
                    "*****@*****.**", f'report_core_company error',
                    'report_core_company error:{}'.format(e))
Exemplo n.º 15
0
def get_report_period(the_date=now_pd_timestamp()):
    if the_date.month >= 10:
        return "{}{}".format(the_date.year, '-09-30')
    elif the_date.month >= 7:
        return "{}{}".format(the_date.year, '-06-30')
    elif the_date.month >= 4:
        return "{}{}".format(the_date.year, '-03-31')
    else:
        return "{}{}".format(the_date.year - 1, '-12-31')
Exemplo n.º 16
0
    def download_sz_etf_component(self, df: pd.DataFrame):
        query_url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml'

        self.parse_sz_etf_underlying_index(df)
        for _, etf in df.iterrows():
            underlying_index = etf['拟合指数']
            etf_code = etf['证券代码']

            if len(underlying_index) == 0:
                self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数,跳过...')
                continue

            url = query_url.format(underlying_index)
            response = requests.get(url)
            response.encoding = 'gbk'

            try:
                dfs = pd.read_html(response.text, header=1)
            except ValueError as error:
                self.logger.error(
                    f'HTML parse error: {error}, response: {response.text}')
                continue

            if len(dfs) < 4:
                continue

            response_df = dfs[3].copy()
            response_df = response_df.dropna(axis=1, how='any')
            response_df['品种代码'] = response_df['品种代码'].apply(
                lambda x: f'{x:06d}')

            etf_id = f'etf_sz_{etf_code}'
            response_df = response_df[['品种代码', '品种名称']].copy()
            response_df.rename(columns={
                '品种代码': 'stock_code',
                '品种名称': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = 'etf'
            response_df['exchange'] = 'sz'
            response_df['code'] = etf_code
            response_df['name'] = etf['证券简称']
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
Exemplo n.º 17
0
def report_cross_ma():
    while True:
        error_count = 0
        email_action = EmailInformer()

        try:
            # 抓取k线数据
            # StockTradeDay.record_data(provider='joinquant')
            # Stock1dKdata.record_data(provider='joinquant')

            latest_day: StockTradeDay = StockTradeDay.query_data(
                order=StockTradeDay.timestamp.desc(),
                limit=1,
                return_type='domain')
            if latest_day:
                target_date = latest_day[0].timestamp
            else:
                target_date = now_pd_timestamp()

            # 计算均线
            my_selector = TargetSelector(start_timestamp='2018-01-01',
                                         end_timestamp=target_date)
            # add the factors
            ma_factor = CrossMaFactor(start_timestamp='2018-01-01',
                                      end_timestamp=target_date)

            my_selector.add_filter_factor(ma_factor)

            my_selector.run()

            long_targets = my_selector.get_open_long_targets(
                timestamp=target_date)
            if long_targets:
                stocks = get_entities(provider='joinquant',
                                      entity_schema=Stock,
                                      entity_ids=long_targets,
                                      return_type='domain')
                info = [f'{stock.name}({stock.code})' for stock in stocks]
                msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action.send_message("*****@*****.**",
                                      f'{target_date} 均线选股结果', msg)

            break
        except Exception as e:
            logger.exception('report_cross_ma error:{}'.format(e))
            time.sleep(60 * 3)
            error_count = error_count + 1
            if error_count == 10:
                email_action.send_message("*****@*****.**",
                                          f'report_cross_ma error',
                                          'report_cross_ma error:{}'.format(e))
Exemplo n.º 18
0
def get_etf_stocks(code=None,
                   codes=None,
                   ids=None,
                   timestamp=now_pd_timestamp(),
                   provider=None):
    latests: List[EtfStock] = EtfStock.query_data(
        provider=provider,
        code=code,
        end_timestamp=timestamp,
        order=EtfStock.timestamp.desc(),
        limit=1,
        return_type='domain')
    if latests:
        latest_record = latests[0]
        # 获取最新的报表
        df = EtfStock.query_data(
            provider=provider,
            code=code,
            codes=codes,
            ids=ids,
            end_timestamp=timestamp,
            filters=[EtfStock.report_date == latest_record.report_date])
        # 最新的为年报或者半年报
        if latest_record.report_period == ReportPeriod.year or latest_record.report_period == ReportPeriod.half_year:
            return df
        # 季报,需要结合 年报或半年报 来算持仓
        else:
            step = 0
            while True:
                report_date = get_recent_report_date(latest_record.report_date,
                                                     step=step)

                pre_df = EtfStock.query_data(
                    provider=provider,
                    code=code,
                    codes=codes,
                    ids=ids,
                    end_timestamp=timestamp,
                    filters=[
                        EtfStock.report_date == to_pd_timestamp(report_date)
                    ])
                df = df.append(pre_df)

                # 半年报和年报
                if (ReportPeriod.half_year.value
                        in pre_df['report_period'].tolist()) or (
                            ReportPeriod.year.value
                            in pre_df['report_period'].tolist()):
                    # 保留最新的持仓
                    df = df.drop_duplicates(subset=['stock_code'],
                                            keep='first')
                    return df
                step = step + 1

                if step >= 20:
                    break
Exemplo n.º 19
0
    def record(self, entity_item, start, end, size, timestamps):
        self.seed += 1

        the_url = self.url.format(entity_item.code)
        user_info = get_result(url=the_url,
                               token=GithubAccount.get_token(seed=self.seed))
        if user_info:
            user_info['updated_timestamp'] = now_pd_timestamp()
            return [user_info]
        return []
Exemplo n.º 20
0
    def record(self, entity, start, end, size, timestamps):
        # 不复权
        try:
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=[
                              'date', 'open', 'close', 'low', 'high', 'volume',
                              'money'
                          ],
                          include_now=False)
        except Exception as e:
            # just ignore the error,for some new stocks not in the index
            self.logger.exception(e)
            return None
        df['name'] = entity.name
        df.rename(columns={'money': 'turnover'}, inplace=True)

        df['timestamp'] = pd.to_datetime(df['date'])
        df['provider'] = 'joinquant'
        df['level'] = self.level.value

        # 前复权
        end_timestamp = to_time_str(now_pd_timestamp())
        qfq_df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high'],
                          fq_ref_date=end_timestamp,
                          include_now=False)
        # not need to update past
        df['qfq_close'] = qfq_df['close']
        df['qfq_open'] = qfq_df['open']
        df['qfq_high'] = qfq_df['high']
        df['qfq_low'] = qfq_df['low']

        check_df = qfq_df.head(1)
        check_date = check_df['date'][0]

        current_df = get_kdata(entity_id=entity.id,
                               provider=self.provider,
                               start_timestamp=check_date,
                               end_timestamp=check_date,
                               limit=1,
                               level=self.level)

        if df_is_not_null(current_df):
            old = current_df.iloc[0, :]['qfq_close']
            new = check_df['close'][0]
            # 相同时间的close不同,表明前复权需要重新计算
            if old != new:
                self.factor = new / old
                self.last_timestamp = pd.Timestamp(check_date)

        return df.to_dict(orient='records')
Exemplo n.º 21
0
    def __init__(self,
                 entity_ids=None,
                 entity_schema=Stock,
                 exchanges=None,
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 long_threshold=0.8,
                 short_threshold=0.2,
                 level=IntervalLevel.LEVEL_1DAY,
                 provider='eastmoney',
                 portfolio_selector=None) -> None:
        self.entity_ids = entity_ids
        self.entity_schema = entity_schema
        self.exchanges = exchanges
        self.codes = codes
        self.provider = provider
        self.portfolio_selector: TargetSelector = portfolio_selector

        if self.portfolio_selector:
            assert self.portfolio_selector.entity_schema in [Etf, Block, Index]

        if the_timestamp:
            self.the_timestamp = to_pd_timestamp(the_timestamp)
            self.start_timestamp = self.the_timestamp
            self.end_timestamp = self.the_timestamp
        else:
            if start_timestamp:
                self.start_timestamp = to_pd_timestamp(start_timestamp)
            if end_timestamp:
                self.end_timestamp = to_pd_timestamp(end_timestamp)
            else:
                self.end_timestamp = now_pd_timestamp()

        self.long_threshold = long_threshold
        self.short_threshold = short_threshold
        self.level = level

        self.filter_factors: List[FilterFactor] = []
        self.score_factors: List[ScoreFactor] = []
        self.filter_result = None
        self.score_result = None

        self.open_long_df: DataFrame = None
        self.open_short_df: DataFrame = None

        self.init_factors(entity_ids=entity_ids,
                          entity_schema=entity_schema,
                          exchanges=exchanges,
                          codes=codes,
                          the_timestamp=the_timestamp,
                          start_timestamp=start_timestamp,
                          end_timestamp=end_timestamp,
                          level=self.level)
Exemplo n.º 22
0
    def evaluate_start_end_size_timestamps(self, entity):
        latest_record = self.get_latest_saved_record(entity=entity)

        if latest_record:
            latest_timestamp = latest_record.updated_timestamp
            if latest_timestamp is not None:
                if (now_pd_timestamp() - latest_timestamp).days < 10:
                    self.logger.info('entity_item:{},updated_timestamp:{},ignored'.format(entity.id, latest_timestamp))
                    return None, None, 0, None

        return None, None, self.default_size, None
Exemplo n.º 23
0
 def get_stocks(cls,
                code=None,
                codes=None,
                ids=None,
                timestamp=now_pd_timestamp(),
                provider=None):
     from zvt.api.common import get_etf_stocks
     return get_etf_stocks(code=code,
                           codes=codes,
                           ids=ids,
                           timestamp=timestamp,
                           provider=provider)
Exemplo n.º 24
0
def report_core_company():
    while True:
        error_count = 0
        email_action = EmailInformer()

        try:
            StockTradeDay.record_data(provider='joinquant')
            Stock.record_data(provider='joinquant')
            FinanceFactor.record_data(provider='eastmoney')
            BalanceSheet.record_data(provider='eastmoney')

            latest_day: StockTradeDay = StockTradeDay.query_data(
                order=StockTradeDay.timestamp.desc(),
                limit=1,
                return_type='domain')
            if latest_day:
                target_date = latest_day[0].timestamp
            else:
                target_date = now_pd_timestamp()

            my_selector: TargetSelector = FundamentalSelector(
                start_timestamp='2015-01-01', end_timestamp=target_date)
            my_selector.run()

            long_targets = my_selector.get_open_long_targets(
                timestamp=target_date)
            if long_targets:
                stocks = get_entities(provider='joinquant',
                                      entity_schema=Stock,
                                      entity_ids=long_targets,
                                      return_type='domain')
                info = [f'{stock.name}({stock.code})' for stock in stocks]
                msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action.send_message([
                '*****@*****.**', '*****@*****.**',
                '*****@*****.**', '*****@*****.**',
                '*****@*****.**'
            ], f'{to_time_str(target_date)} 核心资产选股结果', msg)

            break
        except Exception as e:
            logger.exception('report_core_company error:{}'.format(e))
            time.sleep(60 * 3)
            error_count = error_count + 1
            if error_count == 10:
                email_action.send_message(
                    "*****@*****.**", f'report_core_company error',
                    'report_core_company error:{}'.format(e))
Exemplo n.º 25
0
    def record(self, entity, start, end, size, timestamps):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        # if self.level < IntervalLevel.LEVEL_1HOUR:
        #     start = '2019-01-01'

        end = now_pd_timestamp()

        start_timestamp = to_time_str(start)

        # 聚宽get_price函数必须指定结束时间,否则会有未来数据
        end_timestamp = to_time_str(end, fmt=TIME_FORMAT_MINUTE2)
        # 不复权
        df = get_price(
            to_jq_entity_id(entity),
            start_date=to_time_str(start_timestamp),
            end_date=end_timestamp,
            frequency=self.jq_trading_level,
            fields=['open', 'close', 'low', 'high', 'volume', 'money'],
            skip_paused=True,
            fq=None)
        if df_is_not_null(df):
            df.index.name = 'timestamp'
            df.reset_index(inplace=True)
            df['name'] = entity.name
            df.rename(columns={'money': 'turnover'}, inplace=True)

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = self.level.value
            df['code'] = entity.code

            def generate_kdata_id(se):
                if self.level >= IntervalLevel.LEVEL_1DAY:
                    return "{}_{}".format(
                        se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))
                else:
                    return "{}_{}".format(
                        se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id,
                                                            axis=1)

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force=self.force_update)

        return None
Exemplo n.º 26
0
def every_day_report():
    while True:
        try:
            t = now_pd_timestamp()
            if t.dayofweek in (5, 6):
                logger.info(f'today:{t} is {t.day_name()},just ignore')

            today = to_time_str(t)

            # 抓取k线数据
            JqChinaStockKdataRecorder(level=IntervalLevel.LEVEL_1DAY).run()
            JqChinaStockKdataRecorder(level=IntervalLevel.LEVEL_1WEEK).run()
            JqChinaStockKdataRecorder(level=IntervalLevel.LEVEL_1MON).run()

            # 计算均线
            my_selector = TargetSelector(start_timestamp='2016-01-01',
                                         end_timestamp=today)
            # add the factors
            # 设置dry_run为True,因为我们只需要最近的数据,不需要加载全量数据进行回测
            ma_factor = CrossMaFactor(start_timestamp='2016-01-01',
                                      end_timestamp=today,
                                      dry_run=True)

            my_selector.add_filter_factor(ma_factor)

            my_selector.run()

            long_targets = my_selector.get_open_long_targets(timestamp=today)
            if long_targets:
                df = get_entities(provider='eastmoney',
                                  entity_schema=Stock,
                                  entity_ids=long_targets,
                                  columns=['code', 'name'])
                info = [
                    df.loc[i, 'code'] + ' ' + df.loc[i, 'name']
                    for i in df.index
                ]
                msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action = EmailInformer()
            email_action.send_message("*****@*****.**", f'{today} 均线选股结果', msg)

            break
        except Exception as e:
            logger.exception('report1 sched error:{}'.format(e))
            time.sleep(60 * 3)
Exemplo n.º 27
0
def every_day_report():
    while True:
        try:
            today = now_pd_timestamp()
            long_targets = select_by_finance(today)

            logger.info(f'selected:{len(long_targets)}')

            if long_targets:
                ma_factor = CrossMaFactor(start_timestamp='2015-01-01',
                                          end_timestamp=today,
                                          dry_run=True,
                                          persist_factor=False,
                                          entity_ids=long_targets,
                                          windows=[5, 30, 120])
                my_selector = TargetSelector(start_timestamp='2015-01-01',
                                             end_timestamp=today,
                                             entity_ids=long_targets)
                my_selector.add_filter_factor(ma_factor)
                my_selector.run()
                final_targets = my_selector.get_open_long_targets(today)

                final_targets = list(set(final_targets))

                logger.info(f'final selected:{len(final_targets)}')

                if final_targets:
                    df = get_entities(provider='eastmoney',
                                      entity_schema=Stock,
                                      entity_ids=final_targets,
                                      columns=['code', 'name'])
                    info = [
                        df.loc[i, 'code'] + ' ' + df.loc[i, 'name']
                        for i in df.index
                    ]
                    msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action = EmailInformer()
            email_action.send_message("*****@*****.**",
                                      f'{today} 基本面 + 技术面选股结果', msg)

            break
        except Exception as e:
            logger.exception('report3 sched error:{}'.format(e))
            time.sleep(60 * 3)
Exemplo n.º 28
0
    def fetch_csi_index_component(self, df: pd.DataFrame):
        """
        抓取上证、中证指数成分股
        """
        query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = requests.get(url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content))

            response_df = response_df[[
                '成分券代码Constituent Code', '成分券名称Constituent Name'
            ]].rename(
                columns={
                    '成分券代码Constituent Code': 'stock_code',
                    '成分券名称Constituent Name': 'stock_name'
                })

            index_id = f'index_cn_{index_code}'
            response_df['entity_id'] = index_id
            response_df['entity_type'] = 'index'
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider,
                     force_update=True)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
Exemplo n.º 29
0
    def fetch_cni_index_component(self, df: pd.DataFrame):
        """
        抓取国证指数成分股
        """
        query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = requests.get(url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content),
                                        dtype='str')

            index_id = f'index_cn_{index_code}'

            try:
                response_df = response_df[['样本股代码']]
            except KeyError:
                response_df = response_df[['证券代码']]

            response_df['entity_id'] = index_id
            response_df['entity_type'] = 'index'
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp()

            response_df.columns = ['stock_code']
            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
Exemplo n.º 30
0
    def download_sh_etf_component(self, df: pd.DataFrame):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \
                    'isPagination=false&type={}&etfClass={}'

        etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')]
        etf_df = self.populate_sh_etf_type(etf_df)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS'])
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson.decode(response.text)
            response_df = pd.DataFrame(response_dict.get('result', []))

            etf_code = etf['FUND_ID']
            etf_id = f'etf_sh_{etf_code}'
            response_df = response_df[['instrumentId',
                                       'instrumentName']].copy()
            response_df.rename(columns={
                'instrumentId': 'stock_code',
                'instrumentName': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = 'etf'
            response_df['exchange'] = 'sh'
            response_df['code'] = etf_code
            response_df['name'] = etf['FUND_NAME']
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()