def compute(self, begin_date, end_date):
        codes = get_all_codes()
        dm = DataModule()

        for code in codes:
            df_daily = dm.get_k_data(code, begin_date=begin_date, end_date=end_date)

            if df_daily.index.size == 0:
                continue

            # 当日放量下跌
            df_daily['change'] = df_daily['close'] - df_daily['pre_close']
            df_daily = df_daily[df_daily['change'] < 0]
            df_daily['last_volume'] = df_daily['volume'].shift(1)
            df_daily.dropna(inplace=True)
            df_daily['volume_change'] = round(df_daily['volume']/df_daily['last_volume'], 2)
            df_daily = df_daily[df_daily['volume_change'] > 1.5]

            # 收的阴线实体不大
            df_daily['entity'] = round(abs((df_daily['open'] -df_daily['close'])) * 100/df_daily['close'], 2)
            df_daily = df_daily[df_daily['entity'] < 3]

            # 大部分时间在昨日之上运行
            df_daily.set_index(['date'], 1, inplace=True)

            update_requests = []
            for date in df_daily.index:

                # 大部分时间在昨日之上运行
                pre_close = df_daily.loc[date]['pre_close']
                df_minute = dm.get_k_data(code, period='M1', begin_date=date, end_date=date)
                df_minute = df_minute[df_minute['close'] > pre_close]

                if df_minute.index.size > 150:
                    update_requests.append(UpdateOne({
                        'code': code, 'date': date},
                        {'$set': {'code': code, 'date': date}},
                        upsert=True))

            if len(update_requests) > 0:
                save_result = self.collection.bulk_write(update_requests, ordered=False)
                print('股票代码: %s, 因子: %s, 插入:%4d, 更新: %4d' %
                      (code, self.name, save_result.upserted_count, save_result.modified_count), flush=True)
Exemplo n.º 2
0
    def fill_high_limit_low_limit(begin_date, end_date):
        """
        为daily数据集填充涨停价和跌停价
        :param begin_date: 开始日期
        :param end_date: 结束日期
        """
        # 从tushare获取新股数据
        df_new_stocks = ts.new_stocks()
        print(df_new_stocks)
        code_ipo_price_dict = dict()
        code_ipo_date_set = set()
        for index in df_new_stocks.index:
            ipo_price = df_new_stocks.loc[index]['price']
            code = df_new_stocks.loc[index]['code']
            ipo_date = df_new_stocks.loc[index]['ipo_date']
            code_ipo_price_dict[code + '_' + ipo_date] = ipo_price
            code_ipo_date_set.add(code + '_' + ipo_date)

        all_codes = get_all_codes()

        basic_cursor = DB_CONN['basic'].find(
            {'date': {
                '$gte': begin_date,
                '$lte': end_date
            }},
            projection={
                'code': True,
                'date': True,
                'name': True,
                '_id': False
            },
            batch_size=1000)

        code_date_basic_dict = dict([(x['code'] + '_' + x['date'], x['name'])
                                     for x in basic_cursor])
        code_date_key_sets = set(code_date_basic_dict.keys())

        print(code_date_basic_dict)

        for code in all_codes:
            daily_cursor = DB_CONN['daily'].find(
                {
                    'code': code,
                    'date': {
                        '$lte': end_date,
                        '$gte': begin_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'pre_close': True
                })

            update_requests = []
            for daily in daily_cursor:
                date = daily['date']
                code_date_key = code + '_' + daily['date']
                try:
                    high_limit = -1
                    low_limit = -1
                    pre_close = daily['pre_close']

                    if code_date_key in code_ipo_date_set:
                        high_limit = round(
                            code_ipo_price_dict[code_date_key] * 1.44, 2)
                        low_limit = round(
                            code_ipo_price_dict[code_date_key] * 0.64, 2)
                    elif code_date_key in code_date_key_sets and code_date_basic_dict[code_date_key][0:2]\
                            in ['ST', '*S'] and pre_close > 0:
                        high_limit = round(pre_close * 1.04, 2)
                        low_limit = round(pre_close * 0.95, 2)
                    elif pre_close > 0:
                        high_limit = round(pre_close * 1.10, 2)
                        low_limit = round(pre_close * 0.9, 2)

                    print(
                        'pre_close: %6.2f, high_limit: %6.2f, low_limit: %6.2f'
                        % (pre_close, high_limit, low_limit),
                        flush=True)

                    if high_limit > 0 and low_limit > 0:
                        update_requests.append(
                            UpdateOne(
                                {
                                    'code': code,
                                    'date': date,
                                    'index': False
                                }, {
                                    '$set': {
                                        'high_limit': high_limit,
                                        'low_limit': low_limit
                                    }
                                }))
                except:
                    print('填充涨跌停时发生错误,股票代码:%s,日期:%s' % (code, date),
                          flush=True)

            if len(update_requests) > 0:
                update_result = DB_CONN['daily'].bulk_write(update_requests,
                                                            ordered=False)
                print('填充涨跌停,股票:%s,更新:%4d条' %
                      (code, update_result.modified_count),
                      flush=True)
Exemplo n.º 3
0
    def fill_au_factor_pre_close(begin_date, end_date):
        """
        为daily数据集填充:
        1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close
        2. pre_close = close(-1) * au_factor(-1)/au_factor
        :param begin_date: 开始日期
        :param end_date: 结束日期
        """
        all_codes = get_all_codes()

        for code in all_codes:
            hfq_daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$lte': end_date,
                        '$gte': begin_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True
                })

            date_hfq_close_dict = dict([(x['date'], x['close'])
                                        for x in hfq_daily_cursor])

            daily_cursor = DB_CONN['daily'].find(
                {
                    'code': code,
                    'date': {
                        '$lte': end_date,
                        '$gte': begin_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True
                })

            last_close = -1
            last_au_factor = -1

            update_requests = []
            for daily in daily_cursor:
                date = daily['date']
                try:
                    close = daily['close']

                    doc = dict()

                    au_factor = round(date_hfq_close_dict[date] / close, 2)
                    doc['au_factor'] = au_factor
                    if last_close != -1 and last_au_factor != -1:
                        pre_close = last_close * last_au_factor / au_factor
                        doc['pre_close'] = round(pre_close, 2)

                    last_au_factor = au_factor
                    last_close = close

                    update_requests.append(
                        UpdateOne({
                            'code': code,
                            'date': date,
                            'index': False
                        }, {'$set': doc}))
                except:
                    print('计算复权因子时发生错误,股票代码:%s,日期:%s' % (code, date),
                          flush=True)
                    # 恢复成初始值,防止用错
                    last_close = -1
                    last_au_factor = -1

            if len(update_requests) > 0:
                update_result = DB_CONN['daily'].bulk_write(update_requests,
                                                            ordered=False)
                print('填充复权因子和前收,股票:%s,更新:%4d条' %
                      (code, update_result.modified_count),
                      flush=True)
Exemplo n.º 4
0
    def compute(self, begin_date, end_date):
        codes = get_all_codes()

        all_dates = get_trading_dates(begin_date=begin_date, end_date=end_date)

        for code in codes:
            update_requests = []
            for date in all_dates:
                lrb = DB_CONN['CWBB_LRB'].find_one(
                    {
                        'code': code,
                        'announced_date': {
                            '$lte': date
                        },
                        'report_date': {
                            '$regex': '\d{4}-12-31$'
                        }
                    },
                    sort=[('announced_date', DESCENDING)],
                    projection={'parentnetprofit': True})

                # 如果没有利润表信息,则跳过
                if lrb is None:
                    continue

                zcfzb = DB_CONN['CWBB_ZCFZB'].find_one(
                    {
                        'code': code,
                        'announced_date': {
                            '$lte': date
                        },
                        'report_date': {
                            '$regex': '\d{4}-12-31$'
                        }
                    },
                    sort=[('announced_date', DESCENDING)],
                    projection={'sumasset': True})

                if zcfzb is None:
                    continue

                improved_roe = round(
                    lrb['parentnetprofit'] / zcfzb['sumasset'], 2)

                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'roe': improved_roe
                        }
                    },
                              upsert=True))

            if len(update_requests) > 0:
                save_result = self.collection.bulk_write(update_requests,
                                                         ordered=False)
                print('股票代码: %s, 因子: %s, 插入:%4d, 更新: %4d' %
                      (code, self.name, save_result.upserted_count,
                       save_result.modified_count),
                      flush=True)
    def crawl_finance_report(self):
        """
        从东方财富网站抓取三张财务报表
        :return:
        """
        # 先获取所有的股票列表
        codes = get_all_codes()

        # 创建连接池
        conn_pool = urllib3.PoolManager()

        # 抓取的网址,两个替换参数 {1} - 财报类型 {2} - 股票代码
        url = 'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?' \
              'type={1}&token=70f12f2f4f091e459a279469fe49eca5&' \
              'st=reportdate&sr=-1&p=1&ps=500&filter=(scode=%27{2}%27)' \
              '&js={%22pages%22:(tp),%22data%22:%20(x)}&rt=51044775#'

        user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'

        # 对应的类型,分别资产负债表、现金流量表和利润表
        report_types = ['CWBB_ZCFZB', 'CWBB_XJLLB', 'CWBB_LRB']

        for code in codes:
            for report_type in report_types:
                print('开始抓取财报数据,股票:%s,财报类型:%s' % (code, report_type),
                      flush=True)
                response = conn_pool.request(
                    'GET',
                    url.replace('{1}', report_type).replace('{2}', code),
                    headers={'User-Agent': user_agent})

                # 解析抓取结果
                result = json.loads(response.data.decode('UTF-8'), 'UTF-8')

                reports = result['data']

                update_requests = []
                for report in reports:
                    # 更新字段端
                    try:
                        report.update({
                            # 公告日和报告期只保留年月日
                            'announced_date':
                            report['noticedate'][0:10],
                            'report_date':
                            report['reportdate'][0:10],
                            # 股票名称和股票代码的字段名和系统设计保持一致
                            'code':
                            code,
                            'name':
                            report['sname']
                        })

                        update_requests.append(
                            UpdateOne(
                                {
                                    'code': code,
                                    'report_date': report['report_date'],
                                    'announced_date': report['announced_date']
                                }, {'$set': report},
                                upsert=True))
                    except:
                        print('解析出错,股票:%s 财报类型:%s' % (code, report_type))

                if len(update_requests) > 0:
                    update_result = self.db[report_type].bulk_write(
                        update_requests, ordered=False)
                    print('股票 %s, 财报类型:%s,更新: %4d, 新增: %4d' %
                          (code, report_type, update_result.modified_count,
                           update_result.upserted_count))
Exemplo n.º 6
0
    def compute(self, begin_date, end_date):
        """
        计算指定时间段内所有股票的该因子的值,并保存到数据库中
        :param begin_date:  开始时间
        :param end_date: 结束时间
        """
        dm = DataModule()

        # 获取所有股票
        codes = get_all_codes()

        for code in codes:
            print('计算市盈率, %s' % code)
            df_daily = dm.get_k_data(code,
                                     autype=None,
                                     begin_date=begin_date,
                                     end_date=end_date)

            if df_daily.index.size > 0:
                df_daily.set_index(['date'], 1, inplace=True)

                update_requests = []
                for date in df_daily.index:
                    finance_report = DB_CONN['finance_report'].find_one(
                        {
                            'code': code,
                            'report_date': {
                                '$regex': '\d{4}-12-31'
                            },
                            'announced_date': {
                                '$lte': date
                            }
                        },
                        sort=[('announced_date', DESCENDING)])

                    if finance_report is None:
                        continue

                    # 计算滚动市盈率并保存到daily_k中
                    eps = 0
                    if finance_report['eps'] != '-':
                        eps = finance_report['eps']

                    # 计算PE
                    if eps != 0:
                        pe = round(df_daily.loc[date]['close'] / eps, 3)

                        print('%s, %s, %s, eps: %5.2f, pe: %6.2f' %
                              (code, date, finance_report['announced_date'],
                               finance_report['eps'], pe),
                              flush=True)

                        update_requests.append(
                            UpdateOne({
                                'code': code,
                                'date': date
                            }, {
                                '$set': {
                                    'code': code,
                                    'date': date,
                                    'pe': pe
                                }
                            },
                                      upsert=True))

                if len(update_requests) > 0:
                    save_result = self.collection.bulk_write(update_requests,
                                                             ordered=False)
                    print('股票代码: %s, 因子: %s, 插入:%4d, 更新: %4d' %
                          (code, self.name, save_result.upserted_count,
                           save_result.modified_count),
                          flush=True)
Exemplo n.º 7
0
    def compute(self, begin_date=None, end_date=None):
        """
        计算指定日期内的信号
        :param begin_date: 开始日期
        :param end_date: 结束日期
        """
        codes = get_all_codes()

        dm = DataModule()

        for code in codes:
            try:
                df_dailies = dm.get_k_data(code,
                                           autype='hfq',
                                           begin_date=begin_date,
                                           end_date=end_date)

                if df_dailies.index.size == 0:
                    continue

                # 计算MA10
                df_dailies['ma'] = df_dailies['close'].rolling(10).mean()
                # 计算当日收盘和MA10的差值
                df_dailies['delta'] = df_dailies['close'] - df_dailies['ma']

                # 删除不再使用的ma和close列
                df_dailies.drop(['ma', 'close'], 1, inplace=True)

                # 判断突破类型
                index_size = df_dailies.index.size
                breaks = [0]
                for index in range(1, index_size):
                    # 如果当前日期为停牌状态,则后面连续11日不参与计算
                    if df_dailies.loc[
                            df_dailies.index[index]]['is_trading'] is False:
                        count = 10
                        while count > 0:
                            index += 1
                            count -= 1
                            breaks.append(0)

                        index += 1

                    last = df_dailies.loc[df_dailies.index[index - 1]]['delta']
                    current = df_dailies.loc[df_dailies.index[index]]['delta']

                    # 向上突破设为1,向下突破设为-1,不是突破设为0
                    break_direction = 1 if last <= 0 < current else -1 if last >= 0 > current else 0
                    breaks.append(break_direction)

                # 设置突破信号
                df_dailies['break'] = breaks

                # 将日期作为索引
                df_dailies.set_index(['date'], 1, inplace=True)
                # 删除不再使用的trade_status和delta数据列
                df_dailies.drop(['is_trading', 'delta'], 1, inplace=True)
                # 只保留突破的日期
                df_dailies = df_dailies[df_dailies['break'] != 0]

                # 将信号保存到数据库
                update_requests = []
                for index in df_dailies.index:
                    doc = {
                        'code':
                        code,
                        'date':
                        index,
                        # 方向,向上突破 up,向下突破 down
                        'direction':
                        'up' if df_dailies.loc[index]['break'] == 1 else 'down'
                    }
                    update_requests.append(
                        UpdateOne(doc, {'$set': doc}, upsert=True))

                if len(update_requests) > 0:
                    update_result = self.collection.bulk_write(update_requests,
                                                               ordered=False)
                    print('%s, upserted: %4d, modified: %4d' %
                          (code, update_result.upserted_count,
                           update_result.modified_count),
                          flush=True)
            except:
                traceback.print_exc()
Exemplo n.º 8
0
    def compute(self, begin_date, end_date):
        """
        计算指定日期内的信号
        :param begin_date: 开始日期
        :param end_date: 结束日期
        """
        all_codes = get_all_codes()

        dm = DataModule()

        N = 20
        k = 2

        for code in all_codes:
            try:
                df_daily = dm.get_k_data(code,
                                         autype='hfq',
                                         begin_date=begin_date,
                                         end_date=end_date)

                # 计算MB,盘后计算,这里用当日的Close
                df_daily['MID'] = df_daily['close'].rolling(N).mean()
                # 计算STD20
                df_daily['std'] = df_daily['close'].rolling(N).std()
                # 计算UP
                df_daily['UP'] = df_daily['MID'] + k * df_daily['std']
                # 计算down
                df_daily['DOWN'] = df_daily['MID'] - k * df_daily['std']

                # 将日期作为索引
                df_daily.set_index(['date'], inplace=True)

                # 上轨和中轨右移一位
                shifted_up = df_daily['UP'].shift(1)
                shifted_middle = df_daily['MID'].shift(1)

                # 收盘价突破或者跌破中轨的幅度占上轨和中轨宽度的比例
                ref_line = (df_daily['close'] -
                            shifted_middle) / (shifted_up - shifted_middle)

                ref_prev = ref_line.shift(1)

                # 找到时间窗口内的最小值
                min_val = ref_line.rolling(10).min()

                # 找到时间窗口内最低点前的最大值
                max_leading_value = ref_line.rolling(10).apply(
                    lambda vec: vec[:np.argmin(vec) + 1].max().astype(float),
                    raw=True)

                # 中轨支撑的作用的范围
                delta = 0.15

                # 判断是否存在中轨支撑反弹的信号,要求:
                # 时间窗口的最低点之前的最大值大于delta,最小值的绝对值小于delta,就有一个穿越阈值分界线的动作;
                # 当前日期在也在阈值之上,表示又从最低点穿越到阈值分界线之上;
                # 而判断前一日在阈值分界线之下,表示穿越是在当前交易日完成
                m_rebound_mask = (abs(min_val) <= delta) & (ref_line > delta) & (ref_prev <= delta) & \
                                 (max_leading_value > delta)

                # 将信号保存到数据库
                update_requests = []
                df_daily['m_rebound_mask'] = m_rebound_mask
                df_daily = df_daily[df_daily['m_rebound_mask']]
                for date in df_daily.index:
                    doc = {'code': code, 'date': date, 'signal': 'mid_rebound'}
                    update_requests.append(
                        UpdateOne(doc, {'$set': doc}, upsert=True))

                if len(update_requests) > 0:
                    update_result = self.collection.bulk_write(update_requests,
                                                               ordered=False)
                    print('%s, upserted: %4d, modified: %4d' %
                          (code, update_result.upserted_count,
                           update_result.modified_count),
                          flush=True)
            except:
                traceback.print_exc()