def crawl_finance_report(): # 先获取所有的股票列表 codes = get_all_codes() # 创建连接池 conn_pool = urllib3.PoolManager() # 抓取的财务地址,scode为股票代码 - http://data.eastmoney.com/bbsj/yjbb/600691.html url = 'http://dcfm.eastmoney.com//em_mutisvcexpandinterface/api/js/get?' \ 'type=YJBB21_YJBB&token=70f12f2f4f091e459a279469fe49eca5&st=reportdate&sr=-1' \ '&filter=(scode={0})&p={page}&ps={pageSize}&js={"pages":(tp),"data":%20(x)}' response = conn_pool.request('GET', url.replace('{0}', '600691')) result = json.loads(response.data.decode('UTF-8')) reports = result['data'] # TODO: 字体反爬处理 - http://fontstore.baidu.com/static/editor/index.html# # https://www.cnblogs.com/TM0831/p/10078372.html # https://www.jianshu.com/p/ebd73b026ccf # https://blog.csdn.net/qq_41733098/article/details/88959897 # https://www.cnblogs.com/songzhixue/articles/11242696.html # https: // cloud.tencent.com / developer / article / 1386548 print(reports) doc = {} for report in reports: doc = { # 报告期 'report_date': report['reportdate'][0:10], # 公告日期 'announced_date': report['latestnoticedate'][0:10], # 每股收益 'eps': report['basiceps'], 'code': '600691' } print(doc)
def compute_pe(): ''' 计算股票在某只的市盈率 ''' codes = get_all_codes() for code in codes: daily_cursor = daily_collection.find({'code': code}, projection={ 'close': True, 'date': True }) update_request = [] for daily in daily_cursor: _date = daily['date'] # 找到该股票距离当前日期最近的年报,通过公告日期查询,防止未来函数 finance_report = finance_report_collection.find_one( { 'code': code, 'report_date': { '$regex': '\d{4}-12-31' }, 'announced_date': { '$lte': _date } }, sort=[('announced_date', DESCENDING)]) if finance_report is None: continue # 计算滚动市盈率并保存到daily_k中 eps = 0 if finance_report['eps'] != '-': eps = finance_report['eps'] # 计算PE if eps != 0: update_request.append( UpdateOne({ 'code': code, 'date': _date, '$set': { 'pe': round(daily['close'] / eps, 4) } })) if len(update_request) > 0: update_result = finance_report_collection.bulk_write( update_request, ordered=False) print('更新PE, %s, 更新:%d' % (code, update_result.modified_count))
def compute_boll(start_date, end_date): """ 计算指定日期内的Boll突破上轨和突破下轨信号,并保存到数据库中, 方便查询使用 :param start_date: 开始日期 :param end_date: 结束日期 """ all_codes = get_all_codes() N = 20 for index, code in enumerate(all_codes): try: daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': start_date, '$lte': end_date } }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': False }) # df_daily = DataFrame([daily for daily in daily_cursor]) df_daily = DataFrame(list(daily_cursor)) if df_daily.index.size < N: print('数据量不够: %s, 只有: %d' % (code, df_daily.index.size), flush=True) continue # 计算MB,盘后计算,这里用当日的Close df_daily['MB'] = df_daily['close'].rolling(N).mean() # 计算STD20,计算20日的标准差 df_daily['std'] = df_daily['close'].rolling(N).std() # 计算UP,上轨 df_daily['UP'] = df_daily['MB'] + 2 * df_daily['std'] # 计算down,下轨 df_daily['DOWN'] = df_daily['MB'] - 2 * df_daily['std'] df_daily.set_index(['date'], inplace=True) # 将close移动一个位置,变为当前索引位置的前收 last_close = df_daily['close'].shift(1) # 将上轨移一位,前一日的上轨和前一日的收盘价都在当日了 shifted_up = df_daily['UP'].shift(1) # 突破上轨,是向上突破,条件是前一日收盘价小于前一日上轨,当日收盘价大于当日上轨 df_daily['up_mask'] = (last_close <= shifted_up) & ( df_daily['close'] > shifted_up) # 将下轨移一位,前一日的下轨和前一日的收盘价都在当日了 shifted_down = df_daily['DOWN'].shift(1) # 突破下轨,是向下突破,条件是前一日收盘价大于前一日下轨,当日收盘价小于当日下轨 df_daily['down_mask'] = (last_close >= shifted_down) & ( df_daily['close'] < shifted_down) # 对结果进行过滤,只保留向上突破或者向上突破的数据 df_daily = df_daily[df_daily['up_mask'] | df_daily['down_mask']] # 从DataFrame中扔掉不用的数据 df_daily.drop(['close', 'std', 'MB', 'UP', 'DOWN'], 1, inplace=True) # 将信号保存到数据库 update_requests = [] for date in df_daily.index: # 保存的数据包括股票代码、日期和信号类型,结合数据集的名字,就表示某只股票在某日 doc = { 'code': code, 'date': date, # 方向,向上突破 up,向下突破 down 'direction': 'up' if df_daily.loc[date]['up_mask'] else 'down' } update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['boll'].bulk_write(update_requests, ordered=False) print('SAVE BOLL, 第%d个, 股票代码: %s, 插入: %4d, 更新: %4d' % (index + 1, code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def compute_rsi(start_date, end_date): """ 计算指定时间段内的RSI信号,并保存到数据库中 :param start_date: 开始日期 :param end_date: 结束日期 """ all_codes = get_all_codes() # all_codes = ['6001318'] # 计算RSI N = 12 for index, code in enumerate(all_codes): try: daily_cursor = DB_CONN['daily'].find( { 'code': code, 'date': { '$gte': start_date, '$lte': end_date }, 'index': False }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': True }) df_daily = DataFrame(list(daily_cursor)) # 如果查询出的行情数量还不足以计算N天的平均值,则不再参与计算 if df_daily.index.size < N: print('数据量不够: %s, 只有: %d' % (code, df_daily.index.size), flush=True) continue df_daily.set_index(['date'], inplace=True) # 将close移一位作为当日的pre_close df_daily['pre_close'] = df_daily['close'].shift(1) # 计算当日的涨跌幅:(close - pre_close) * 100 / pre_close df_daily['change_pct'] = (df_daily['close'] - df_daily['pre_close'] ) * 100 / df_daily['pre_close'] # 只保留上涨的日期的涨幅 (涨幅大于0) df_daily['up_pct'] = DataFrame({ 'up_pct': df_daily['change_pct'], 'zero': 0 }).max(1) # 计算RSI mean(up_change, N) * 100 / mean(abs(change),N) df_daily['RSI'] = df_daily['up_pct'].rolling(N).mean() * 100 / abs( df_daily['change_pct']).rolling(N).mean() # 将RSI移一位作为当日的PREV_RSI df_daily['PREV_RSI'] = df_daily['RSI'].shift(1) # 超买,RSI下穿80,作为卖出信号 df_daily_over_bought = df_daily[(df_daily['RSI'] < 80) & (df_daily['PREV_RSI'] >= 80)] # 超卖,RSI上穿20,作为买入信号 df_daily_over_sold = df_daily[(df_daily['RSI'] > 20) & (df_daily['PREV_RSI'] <= 20)] # 保存结果到数据库,要以code和date创建索引,db.rsi.createIndex({'code': 1, 'date': 1}) update_requests = [] # 超买数据,以code和date为key更新数据,signal为over_bought for date in df_daily_over_bought.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'over_bought' } }, upsert=True)) # 超卖数据,以code和date为key更新数据,signal为over_sold for date in df_daily_over_sold.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'over_sold' } }, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['rsi'].bulk_write(update_requests, ordered=False) print('Save RSI, 第%d个, 股票代码:%s, 插入:%4d, 更新:%4d' % (index + 1, code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()
def fill_au_factor_pre_close(start_date, end_date): """ 为daily数据集填充: 1. 复权因子au_factor,复权的因子计算方式:au_factor = hfq_close/close 2. 前收pre_close = close(-1) * au_factor(-1)/au_factor :param begin_date: 开始日期 :param end_date: 结束日期 """ all_codes = get_all_codes() print(all_codes) for code in all_codes: hfq_daily_cursor = DB_CONN['daily_hfq'].find( {'code': code, 'date': {'$lte': end_date, '$gte': start_date}}, sort=[('date', ASCENDING)], projection={'date': True, 'close': True}) date_hfq_close_dict = dict([(x['date'], x['close']) for x in hfq_daily_cursor]) daily_cursor = DB_CONN['daily'].find( {'code': code, 'date': {'$lte': end_date, '$gte': start_date}, 'index': False}, sort=[('date', ASCENDING)], projection={'date': True, 'close': True} ) last_close = -1 last_au_factor = -1 update_requests = [] for daily in daily_cursor: date = daily['date'] try: close = daily['close'] doc = dict() # 复权因子 = 当日后复权价格 / 当日实际价格 au_factor = round(date_hfq_close_dict[date] / close, 2) doc['au_factor'] = au_factor # 当日前收价 = 前一日实际收盘价 * 前一日复权因子 / 当日复权因子 (可直接用shift()获取前日收盘价) if last_close != -1 and last_au_factor != -1: pre_close = last_close * last_au_factor / au_factor doc['pre_close'] = round(pre_close, 2) last_au_factor = au_factor last_close = close update_requests.append( UpdateOne( {'code': code, 'date': date, 'index': False}, {'$set': doc})) except: print('计算复权因子时发生错误,股票代码:%s,日期:%s' % (code, date), flush=True) traceback.print_exc() # 恢复成初始值,防止用错 last_close = -1 last_au_factor = -1 if len(update_requests) > 0: update_result = DB_CONN['daily'].bulk_write(update_requests, ordered=False) print('填充复权因子和前收,股票:%s,更新:%4d条' % (code, update_result.modified_count), flush=True)
def compute_macd(start_date, end_date): """ 计算给定周期内的MACD金叉和死叉信号,把结果保存到数据库中 :param start_date: 开始日期 :param end_date: 结束日期 """ short_period = 12 long_period = 26 m_for_diff_period = 9 codes = get_all_codes() # codes = ['000939'] # 002604 for indexx, code in enumerate(codes): try: daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': start_date, '$lte': end_date } }, sort=[('date', ASCENDING)], projection={ 'date': True, 'close': True, '_id': True }) # 转成DataFrame # df_daily = DataFrame(list(daily_cursor)) df_daily = DataFrame([daily for daily in daily_cursor]) # 设置date为索引 df_daily.set_index(['date'], inplace=True) print(df_daily) # 如果查询出的行情数量还不足以计算N天的平均值,则不再参与计算 if df_daily.index.size < short_period: print('数据量不够: %s, 只有: %d' % (code, df_daily.index.size), flush=True) continue ''' 计算EMA alpha = 2/(N+1) EMA(i) = (1 - alpha) * EMA(i-1) + alpha * CLOSE(i) = alpha * (CLOSE(i) - EMA(i-1)) + EMA(i-1) ''' index = 0 EMA1 = [] # 短时EMA列表 EMA2 = [] # 长时EMA列表 for date in df_daily.index: # 第一天EMA就是当日的close(收盘价) if index == 0: EMA1.append(df_daily.loc[date]['close']) EMA2.append(df_daily.loc[date]['close']) else: EMA1.append( 2 / (short_period + 1) * (df_daily.loc[date]['close'] - EMA1[index - 1]) + EMA1[index - 1]) EMA2.append( 2 / (long_period + 1) * (df_daily.loc[date]['close'] - EMA2[index - 1]) + EMA2[index - 1]) index += 1 df_daily['EMA1'] = EMA1 df_daily['EMA2'] = EMA2 # 计算DIFF, 短时EMA - 长时EMA df_daily['DIFF'] = df_daily['EMA1'] - df_daily['EMA2'] ''' 计算DEA, DIFF的EMA, 计算公式是: EMA(DIFF,M) ''' index = 0 DEA = [] # DEA列表 for date in df_daily.index: # 第一天EMA就是当日的close(收盘价) if index == 0: DEA.append(df_daily.loc[date]['DIFF']) else: DEA.append(2 / (m_for_diff_period + 1) * (df_daily.loc[date]['DIFF'] - DEA[index - 1]) + DEA[index - 1]) index += 1 df_daily['DEA'] = DEA # 计算DIFF和DEA的差值 ===> macd df_daily['delta'] = df_daily['DIFF'] - df_daily['DEA'] # 将delta的移一位,那么前一天delta就变成了今天的pre_delta df_daily['pre_delta'] = df_daily['delta'].shift(1) # 金叉,DIFF上穿DEA,前一日DIFF在DEA下面,当日DIFF在DEA上面 df_daily_gold = df_daily[(df_daily['pre_delta'] <= 0) & (df_daily['delta'] > 0)] # 死叉,DIFF下穿DEA,前一日DIFF在DEA上面,当日DIFF在DEA下面 df_daily_dead = df_daily[(df_daily['pre_delta'] >= 0) & (df_daily['delta'] < 0)] # 保存结果到数据库 update_requests = [] for date in df_daily_gold.index: # 保存时以code和date为查询条件,做更新或者新建,所以对code和date建立索引 # 通过signal字段表示金叉还是死叉,gold表示金叉 update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'gold' } }, upsert=True)) for date in df_daily_dead.index: update_requests.append( UpdateOne({ 'code': code, 'date': date }, { '$set': { 'code': code, 'date': date, 'signal': 'dead' } }, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['macd'].bulk_write(update_requests, ordered=False) print('Save MACD, 第%d个, 股票代码:%s, 插入:%4d, 更新:%4d' % (indexx + 1, code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s, 在取值日期范围内没有数据' % code, flush=True) traceback.print_exc()
def compute_fractal(begin_date, end_date): codes = get_all_codes() # codes = ['000151'] # 计算每个股票的信号 for index, code in enumerate(codes): try: # 获取后复权的价格,使用后复权的价格计算分型信号 daily_cursor = DB_CONN['daily_hfq'].find( { 'code': code, 'date': { '$gte': begin_date, '$lte': end_date } }, sort=[('date', ASCENDING)], projection={ 'date': True, 'high': True, 'low': True, '_id': False }) df_daily = DataFrame([daily for daily in daily_cursor]) # 设置日期作为索引 df_daily.set_index(['date'], inplace=True) # 通过shift,将前两天和后两天对齐到中间一天 df_daily_shift_1 = df_daily.shift(1) df_daily_shift_2 = df_daily.shift(2) df_daily_shift_3 = df_daily.shift(3) df_daily_shift_4 = df_daily.shift(4) # 顶分型,中间日的最高价既大于前两天的最高价,也大于后两天的最高价 df_daily['up'] = (df_daily_shift_3['high'] > df_daily_shift_1['high']) & \ (df_daily_shift_3['high'] > df_daily_shift_2['high']) & \ (df_daily_shift_3['high'] > df_daily_shift_4['high']) & \ (df_daily_shift_3['high'] > df_daily['high']) # 底分型,中间日的最低价既小于前两天的最低价,也小于后两天的最低价 df_daily['down'] = (df_daily_shift_3['low'] < df_daily_shift_1['low']) & \ (df_daily_shift_3['low'] < df_daily_shift_2['low']) & \ (df_daily_shift_3['low'] < df_daily_shift_4['low']) & \ (df_daily_shift_3['low'] < df_daily['low']) # 只保留了出现顶分型和低分型信号的日期, 其他数据全部舍弃 df_daily = df_daily[(df_daily['up'] | df_daily['down'])] # 抛掉不用的数据 df_daily.drop(['high', 'low'], axis=1, inplace=True) # print(df_daily) ''' up down date 2019-05-15 False True 2019-05-16 True False 2019-05-20 True False 2019-05-23 False True ''' # 将信号保存到数据库 , update_requests = [] # 保存的数据结果时,code、date和信号的方向 for date in df_daily.index: doc = { 'code': code, 'date': date, # up: 顶分型, down:底分型 'direction': 'up' if df_daily.loc[date]['up'] else 'down' } # 保存时以code、date和direction做条件,那么就需要在这三个字段上建立索引 # db.fractal_signal.createIndex({'code': 1, 'date': 1, 'direction': 1}) update_requests.append( UpdateOne(doc, {'$set': doc}, upsert=True)) if len(update_requests) > 0: update_result = DB_CONN['fractal'].bulk_write(update_requests, ordered=False) print('Save Fractal, 第%d个, 股票代码:%s, 插入:%4d, 更新:%4d' % (index + 1, code, update_result.upserted_count, update_result.modified_count), flush=True) except: print('错误发生: %s' % code, flush=True) traceback.print_exc()