def crawl_finance_report():
    # 先获取所有的股票列表
    codes = get_all_codes()

    # 创建连接池
    conn_pool = urllib3.PoolManager()

    # 抓取的财务地址,scode为股票代码 - http://data.eastmoney.com/bbsj/yjbb/600691.html
    url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \
          'type=YJBB21_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \
          '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}'

    response = conn_pool.request('GET', url.replace('{0}', '600691'))
    result = json.loads(response.data.decode('UTF-8'))
    reports = result['data']
    # TODO: 字体反爬处理 - http://fontstore.baidu.com/static/editor/index.html#
    # https://www.cnblogs.com/TM0831/p/10078372.html
    # https://www.jianshu.com/p/ebd73b026ccf
    # https://blog.csdn.net/qq_41733098/article/details/88959897
    # https://www.cnblogs.com/songzhixue/articles/11242696.html
    # https: // cloud.tencent.com / developer / article / 1386548
    print(reports)
    doc = {}
    for report in reports:
        doc = {
            # 报告期
            'report_date': report['reportdate'][0:10],
            # 公告日期
            'announced_date': report['latestnoticedate'][0:10],
            # 每股收益
            'eps': report['basiceps'],
            'code': '600691'
        }
    print(doc)
Пример #2
0
def compute_pe():
    '''
    计算股票在某只的市盈率
    '''
    codes = get_all_codes()

    for code in codes:
        daily_cursor = daily_collection.find({'code': code},
                                             projection={
                                                 'close': True,
                                                 'date': True
                                             })

        update_request = []
        for daily in daily_cursor:
            _date = daily['date']
            # 找到该股票距离当前日期最近的年报,通过公告日期查询,防止未来函数
            finance_report = finance_report_collection.find_one(
                {
                    'code': code,
                    'report_date': {
                        '$regex': '\d{4}-12-31'
                    },
                    'announced_date': {
                        '$lte': _date
                    }
                },
                sort=[('announced_date', DESCENDING)])

            if finance_report is None:
                continue

            # 计算滚动市盈率并保存到daily_k中
            eps = 0
            if finance_report['eps'] != '-':
                eps = finance_report['eps']
            # 计算PE
            if eps != 0:
                update_request.append(
                    UpdateOne({
                        'code': code,
                        'date': _date,
                        '$set': {
                            'pe': round(daily['close'] / eps, 4)
                        }
                    }))
        if len(update_request) > 0:
            update_result = finance_report_collection.bulk_write(
                update_request, ordered=False)
            print('更新PE, %s, 更新:%d' % (code, update_result.modified_count))
Пример #3
0
def compute_boll(start_date, end_date):
    """
    计算指定日期内的Boll突破上轨和突破下轨信号,并保存到数据库中,
    方便查询使用
    :param start_date: 开始日期
    :param end_date: 结束日期
    """

    all_codes = get_all_codes()
    N = 20

    for index, code in enumerate(all_codes):
        try:
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': start_date,
                        '$lte': end_date
                    }
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': False
                })

            # df_daily = DataFrame([daily for daily in daily_cursor])
            df_daily = DataFrame(list(daily_cursor))

            if df_daily.index.size < N:
                print('数据量不够: %s, 只有: %d' % (code, df_daily.index.size),
                      flush=True)
                continue

            # 计算MB,盘后计算,这里用当日的Close
            df_daily['MB'] = df_daily['close'].rolling(N).mean()
            # 计算STD20,计算20日的标准差
            df_daily['std'] = df_daily['close'].rolling(N).std()

            # 计算UP,上轨
            df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std']
            # 计算down,下轨
            df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std']

            df_daily.set_index(['date'], inplace=True)

            # 将close移动一个位置,变为当前索引位置的前收
            last_close = df_daily['close'].shift(1)
            # 将上轨移一位,前一日的上轨和前一日的收盘价都在当日了
            shifted_up = df_daily['UP'].shift(1)
            # 突破上轨,是向上突破,条件是前一日收盘价小于前一日上轨,当日收盘价大于当日上轨
            df_daily['up_mask'] = (last_close <= shifted_up) & (
                df_daily['close'] > shifted_up)

            # 将下轨移一位,前一日的下轨和前一日的收盘价都在当日了
            shifted_down = df_daily['DOWN'].shift(1)
            # 突破下轨,是向下突破,条件是前一日收盘价大于前一日下轨,当日收盘价小于当日下轨
            df_daily['down_mask'] = (last_close >= shifted_down) & (
                df_daily['close'] < shifted_down)

            # 对结果进行过滤,只保留向上突破或者向上突破的数据
            df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']]

            # 从DataFrame中扔掉不用的数据
            df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'],
                          1,
                          inplace=True)

            # 将信号保存到数据库
            update_requests = []
            for date in df_daily.index:
                # 保存的数据包括股票代码、日期和信号类型,结合数据集的名字,就表示某只股票在某日
                doc = {
                    'code': code,
                    'date': date,
                    # 方向,向上突破 up,向下突破 down
                    'direction':
                    'up' if df_daily.loc[date]['up_mask'] else 'down'
                }
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['boll'].bulk_write(update_requests,
                                                           ordered=False)
                print('SAVE BOLL, 第%d个, 股票代码: %s, 插入: %4d, 更新: %4d' %
                      (index + 1, code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)

        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
Пример #4
0
def compute_rsi(start_date, end_date):
    """
    计算指定时间段内的RSI信号,并保存到数据库中
    :param start_date: 开始日期
    :param end_date: 结束日期
    """

    all_codes = get_all_codes()
    # all_codes = ['6001318']

    # 计算RSI
    N = 12

    for index, code in enumerate(all_codes):
        try:
            daily_cursor = DB_CONN['daily'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': start_date,
                        '$lte': end_date
                    },
                    'index': False
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': True
                })

            df_daily = DataFrame(list(daily_cursor))

            # 如果查询出的行情数量还不足以计算N天的平均值,则不再参与计算
            if df_daily.index.size < N:
                print('数据量不够: %s, 只有: %d' % (code, df_daily.index.size),
                      flush=True)
                continue

            df_daily.set_index(['date'], inplace=True)
            # 将close移一位作为当日的pre_close
            df_daily['pre_close'] = df_daily['close'].shift(1)
            # 计算当日的涨跌幅:(close - pre_close) * 100 / pre_close
            df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close']
                                      ) * 100 / df_daily['pre_close']
            # 只保留上涨的日期的涨幅 (涨幅大于0)
            df_daily['up_pct'] = DataFrame({
                'up_pct': df_daily['change_pct'],
                'zero': 0
            }).max(1)

            # 计算RSI mean(up_change, N) * 100 / mean(abs(change),N)
            df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() * 100 / abs(
                df_daily['change_pct']).rolling(N).mean()

            # 将RSI移一位作为当日的PREV_RSI
            df_daily['PREV_RSI'] = df_daily['RSI'].shift(1)

            # 超买,RSI下穿80,作为卖出信号
            df_daily_over_bought = df_daily[(df_daily['RSI'] < 80)
                                            & (df_daily['PREV_RSI'] >= 80)]

            # 超卖,RSI上穿20,作为买入信号
            df_daily_over_sold = df_daily[(df_daily['RSI'] > 20)
                                          & (df_daily['PREV_RSI'] <= 20)]

            # 保存结果到数据库,要以code和date创建索引,db.rsi.createIndex({'code': 1, 'date': 1})
            update_requests = []
            # 超买数据,以code和date为key更新数据,signal为over_bought
            for date in df_daily_over_bought.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'over_bought'
                        }
                    },
                              upsert=True))
            # 超卖数据,以code和date为key更新数据,signal为over_sold
            for date in df_daily_over_sold.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'over_sold'
                        }
                    },
                              upsert=True))
            if len(update_requests) > 0:
                update_result = DB_CONN['rsi'].bulk_write(update_requests,
                                                          ordered=False)
                print('Save RSI, 第%d个, 股票代码:%s, 插入:%4d, 更新:%4d' %
                      (index + 1, code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()
Пример #5
0
def fill_au_factor_pre_close(start_date, end_date):
    """
    为daily数据集填充:
    1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close
    2. 前收pre_close = close(-1) * au_factor(-1)/au_factor
    :param begin_date: 开始日期
    :param end_date: 结束日期
    """
    all_codes = get_all_codes()
    print(all_codes)

    for code in all_codes:
        hfq_daily_cursor = DB_CONN['daily_hfq'].find(
            {'code': code, 'date': {'$lte': end_date, '$gte': start_date}},
            sort=[('date', ASCENDING)],
            projection={'date': True, 'close': True})

        date_hfq_close_dict = dict([(x['date'], x['close']) for x in hfq_daily_cursor])

        daily_cursor = DB_CONN['daily'].find(
            {'code': code, 'date': {'$lte': end_date, '$gte': start_date}, 'index': False},
            sort=[('date', ASCENDING)],
            projection={'date': True, 'close': True}
        )

        last_close = -1
        last_au_factor = -1

        update_requests = []
        for daily in daily_cursor:
            date = daily['date']
            try:
                close = daily['close']

                doc = dict()

                # 复权因子 = 当日后复权价格 / 当日实际价格
                au_factor = round(date_hfq_close_dict[date] / close, 2)
                doc['au_factor'] = au_factor
                # 当日前收价 = 前一日实际收盘价 * 前一日复权因子 / 当日复权因子 (可直接用shift()获取前日收盘价)
                if last_close != -1 and last_au_factor != -1:
                    pre_close = last_close * last_au_factor / au_factor
                    doc['pre_close'] = round(pre_close, 2)

                last_au_factor = au_factor
                last_close = close

                update_requests.append(
                    UpdateOne(
                        {'code': code, 'date': date, 'index': False},
                        {'$set': doc}))
            except:
                print('计算复权因子时发生错误,股票代码:%s,日期:%s' % (code, date), flush=True)
                traceback.print_exc()
                # 恢复成初始值,防止用错
                last_close = -1
                last_au_factor = -1

        if len(update_requests) > 0:
            update_result = DB_CONN['daily'].bulk_write(update_requests, ordered=False)
            print('填充复权因子和前收,股票:%s,更新:%4d条' %
                  (code, update_result.modified_count), flush=True)
Пример #6
0
def compute_macd(start_date, end_date):
    """
    计算给定周期内的MACD金叉和死叉信号,把结果保存到数据库中
    :param start_date: 开始日期
    :param end_date: 结束日期
    """

    short_period = 12
    long_period = 26
    m_for_diff_period = 9

    codes = get_all_codes()

    # codes = ['000939'] # 002604
    for indexx, code in enumerate(codes):
        try:
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': start_date,
                        '$lte': end_date
                    }
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'close': True,
                    '_id': True
                })

            # 转成DataFrame
            # df_daily = DataFrame(list(daily_cursor))
            df_daily = DataFrame([daily for daily in daily_cursor])
            # 设置date为索引
            df_daily.set_index(['date'], inplace=True)
            print(df_daily)

            # 如果查询出的行情数量还不足以计算N天的平均值,则不再参与计算
            if df_daily.index.size < short_period:
                print('数据量不够: %s, 只有: %d' % (code, df_daily.index.size),
                      flush=True)
                continue
            '''
            计算EMA
            alpha = 2/(N+1)
            EMA(i) = (1 - alpha) * EMA(i-1) + alpha * CLOSE(i)
                   = alpha * (CLOSE(i) - EMA(i-1)) + EMA(i-1)
            '''
            index = 0
            EMA1 = []  # 短时EMA列表
            EMA2 = []  # 长时EMA列表
            for date in df_daily.index:
                # 第一天EMA就是当日的close(收盘价)
                if index == 0:
                    EMA1.append(df_daily.loc[date]['close'])
                    EMA2.append(df_daily.loc[date]['close'])
                else:
                    EMA1.append(
                        2 / (short_period + 1) *
                        (df_daily.loc[date]['close'] - EMA1[index - 1]) +
                        EMA1[index - 1])
                    EMA2.append(
                        2 / (long_period + 1) *
                        (df_daily.loc[date]['close'] - EMA2[index - 1]) +
                        EMA2[index - 1])

                index += 1

            df_daily['EMA1'] = EMA1
            df_daily['EMA2'] = EMA2

            # 计算DIFF, 短时EMA - 长时EMA
            df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2']
            '''
            计算DEA,
            DIFF的EMA,
            计算公式是: EMA(DIFF,M)
            '''
            index = 0
            DEA = []  # DEA列表
            for date in df_daily.index:
                # 第一天EMA就是当日的close(收盘价)
                if index == 0:
                    DEA.append(df_daily.loc[date]['DIFF'])
                else:
                    DEA.append(2 / (m_for_diff_period + 1) *
                               (df_daily.loc[date]['DIFF'] - DEA[index - 1]) +
                               DEA[index - 1])

                index += 1
            df_daily['DEA'] = DEA

            # 计算DIFF和DEA的差值 ===> macd
            df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA']
            # 将delta的移一位,那么前一天delta就变成了今天的pre_delta
            df_daily['pre_delta'] = df_daily['delta'].shift(1)
            # 金叉,DIFF上穿DEA,前一日DIFF在DEA下面,当日DIFF在DEA上面
            df_daily_gold = df_daily[(df_daily['pre_delta'] <= 0)
                                     & (df_daily['delta'] > 0)]
            # 死叉,DIFF下穿DEA,前一日DIFF在DEA上面,当日DIFF在DEA下面
            df_daily_dead = df_daily[(df_daily['pre_delta'] >= 0)
                                     & (df_daily['delta'] < 0)]

            # 保存结果到数据库
            update_requests = []
            for date in df_daily_gold.index:
                # 保存时以code和date为查询条件,做更新或者新建,所以对code和date建立索引
                # 通过signal字段表示金叉还是死叉,gold表示金叉
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'gold'
                        }
                    },
                              upsert=True))

            for date in df_daily_dead.index:
                update_requests.append(
                    UpdateOne({
                        'code': code,
                        'date': date
                    }, {
                        '$set': {
                            'code': code,
                            'date': date,
                            'signal': 'dead'
                        }
                    },
                              upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['macd'].bulk_write(update_requests,
                                                           ordered=False)
                print('Save MACD, 第%d个, 股票代码:%s, 插入:%4d, 更新:%4d' %
                      (indexx + 1, code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s, 在取值日期范围内没有数据' % code, flush=True)
            traceback.print_exc()
Пример #7
0
def compute_fractal(begin_date, end_date):
    codes = get_all_codes()
    # codes = ['000151']

    # 计算每个股票的信号
    for index, code in enumerate(codes):
        try:
            # 获取后复权的价格,使用后复权的价格计算分型信号
            daily_cursor = DB_CONN['daily_hfq'].find(
                {
                    'code': code,
                    'date': {
                        '$gte': begin_date,
                        '$lte': end_date
                    }
                },
                sort=[('date', ASCENDING)],
                projection={
                    'date': True,
                    'high': True,
                    'low': True,
                    '_id': False
                })

            df_daily = DataFrame([daily for daily in daily_cursor])

            # 设置日期作为索引
            df_daily.set_index(['date'], inplace=True)

            # 通过shift,将前两天和后两天对齐到中间一天
            df_daily_shift_1 = df_daily.shift(1)
            df_daily_shift_2 = df_daily.shift(2)
            df_daily_shift_3 = df_daily.shift(3)
            df_daily_shift_4 = df_daily.shift(4)

            # 顶分型,中间日的最高价既大于前两天的最高价,也大于后两天的最高价
            df_daily['up'] = (df_daily_shift_3['high'] > df_daily_shift_1['high']) & \
                             (df_daily_shift_3['high'] > df_daily_shift_2['high']) & \
                             (df_daily_shift_3['high'] > df_daily_shift_4['high']) & \
                             (df_daily_shift_3['high'] > df_daily['high'])

            # 底分型,中间日的最低价既小于前两天的最低价,也小于后两天的最低价
            df_daily['down'] = (df_daily_shift_3['low'] < df_daily_shift_1['low']) & \
                               (df_daily_shift_3['low'] < df_daily_shift_2['low']) & \
                               (df_daily_shift_3['low'] < df_daily_shift_4['low']) & \
                               (df_daily_shift_3['low'] < df_daily['low'])

            # 只保留了出现顶分型和低分型信号的日期, 其他数据全部舍弃
            df_daily = df_daily[(df_daily['up'] | df_daily['down'])]

            # 抛掉不用的数据
            df_daily.drop(['high', 'low'], axis=1, inplace=True)
            # print(df_daily)
            '''
            up   down
date                    
2019-05-15  False   True
2019-05-16   True  False
2019-05-20   True  False
2019-05-23  False   True
            '''

            # 将信号保存到数据库 ,
            update_requests = []
            # 保存的数据结果时,code、date和信号的方向
            for date in df_daily.index:
                doc = {
                    'code': code,
                    'date': date,
                    # up: 顶分型, down:底分型
                    'direction': 'up' if df_daily.loc[date]['up'] else 'down'
                }

                # 保存时以code、date和direction做条件,那么就需要在这三个字段上建立索引
                # db.fractal_signal.createIndex({'code': 1, 'date': 1, 'direction': 1})
                update_requests.append(
                    UpdateOne(doc, {'$set': doc}, upsert=True))

            if len(update_requests) > 0:
                update_result = DB_CONN['fractal'].bulk_write(update_requests,
                                                              ordered=False)
                print('Save Fractal, 第%d个, 股票代码:%s, 插入:%4d, 更新:%4d' %
                      (index + 1, code, update_result.upserted_count,
                       update_result.modified_count),
                      flush=True)
        except:
            print('错误发生: %s' % code, flush=True)
            traceback.print_exc()