def simplePlot(code): # data_info = find_data(TSDB_CONN['stock_basic'], whereParam={'ts_code': code}, selParam={'_id':False}) data_info = pd.read_sql("select * from stock_basic where ts_code='%s'" % code, get_conn()) if len(data_info)<1: logger.info("没有该stock信息") return logger.info(data_info['fullname'][0]) print(type(data_info['fullname'][0])) sql = "SELECT * FROM tsquant.daily where ts_code='%s' order by trade_date;" df = pd.read_sql(sql % code, get_conn()) df['close'] = pd.to_numeric(df['close']) df['trade_date'] = pd.to_datetime(df['trade_date']) df.set_index(df['trade_date'], inplace=True) df.drop('trade_date', axis=1, inplace=True) plt.plot(df['close'], label='收盘价') # plt.plot(df['hfq_close'], label='后复权收盘价') plt.legend() plt.title(data_info['fullname'][0]) plt.xlabel('日期') plt.ylabel('收盘价') plt.rcParams['font.sans-serif']=['SimHei'] plt.show()
def init_base_schedule(start=start1, end=end1): datas = find_data(TSDB_CONN['job_basic'], whereParam={ 'start': start, 'end': end }) if len(datas) > 0: logger.info("已设置初始任务") return datas = find_data(TSDB_CONN['stock_basic'], selParam={ '_id': False, 'ts_code': True }) update_requests = [] for data in datas: doc = { 'ts_code': data['ts_code'], 'start': start, 'end': end, 'daily': False, 'adj_factor': False, 'suspend': False, 'daily_basic': False, 'cal_hfq': False, 'index': False } index = {'ts_code': data['ts_code']} update_requests.append(UpdateOne(index, {'$set': doc}, upsert=True)) save_data_update_one(None, TSDB_CONN['job_basic'], update_requests=update_requests, memo='设置基础任务')
def __get_fina_indicator(query_name, code, start, end): start_date = datetime.strptime(start, '%Y%m%d') end_date = datetime.strptime(end, '%Y%m%d') date_list = [] memo = "%s子线程执行%s[%s-%s]任务[%d/%d]" + query_name.value while start_date < end_date: temp_date = start_date+timedelta(days=360*10) temp_date = end_date if temp_date > end_date else temp_date start_str = start_date.strftime('%Y%m%d') end_str = temp_date.strftime('%Y%m%d') date_list.append({'start':start_str, 'end': end_str}) start_date = temp_date all_l = 0 i = 0 l = len(date_list) for date in date_list: i += 1 memo = "%s子线程执行%s[%s-%s]任务[%d/%d]%s"\ %(threading.current_thread().name,code,date['start'], date['end'], i, l, query_name.value) logger.info(memo) data = pro.query(query_name.value, ts_code=code, start_date=date['start'], end_date=date['end']) res = insert_many('fina_indicator', data, memo=memo) memo = "更新执行任务" + query_name.value + "[%s][%s-%s]状态" % (code, date['start'],date['end']) all_l += res if res is not None else 0 if all_l>0: u_sql = "update job_finance_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % ('fina_indicator', code, start, end), memo=memo)
def __daily_crawler_job(query_name, code_ls, fields= None): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name,query_name,code,i,l)) ts_code = code[0] start = code[1] end = code[2] data = None update_field = '' if query_name == 'daily': update_field = "daily" data = pro.query(query_name,ts_code=ts_code, start_date=start, end_date=end) elif query_name == 'adj_factor': update_field = "adj_factor" data = pro.query(query_name, ts_code=ts_code) elif query_name == 'suspend': update_field = "suspend" data = pro.query(query_name, ts_code=ts_code, fields="ts_code,suspend_date,resume_date,ann_date,suspend_reason,reason_type") elif query_name == 'daily_basic': update_field = "daily_basic" data = pro.query(query_name, ts_code=ts_code, start_date=start, end_date=end) else : logger.info("不在列表之内") return memo = "执行任务%s[%s]" % (query_name, code) result = insert_many(query_name, data, memo=memo) memo = "更新执行任务" + query_name + "[%s]状态" % code if result is not None and result > 0: u_sql = "update job_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % (update_field, ts_code, start, end) , memo=memo)
def __daily_crawler_job(query_name, code_ls, fields=None): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name, query_name, code, i, l)) ts_code = code[0] start = code[1] end = code[2] data = None update_field = '' if query_name == 'index_daily': update_field = "index_daily" data = pro.index_daily(ts_code=ts_code, start_date=start, end_date=end) elif query_name == 'index_weigth': update_field = "index_weigth" data = pro.index_weight(index_code=ts_code, start_date=start, end_date=end) else: logger.info("不在列表之内") return memo = "执行任务" + query_name result = insert_many(query_name, data, memo=memo) memo = "更新执行任务" + query_name + "[%s]状态" % code if result is not None and result > 0: u_sql = "update job_basic_index set `%s`=1 where `index_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % (update_field, ts_code, start, end), memo=memo)
def save_data_update_one(data, collection, update_requests=[], indexs=None, extra_fields=None, memo=''): """ :param data: 抓取的数据 :param collection: 要保存的数据集 :param indexs: updateOne 的键值 :param extra_field: 额外的字段 :param memo: 日志说明字段 :return: """ update_result = None if data is not None: update_requests = __daily_obj_2_doc(data, indexs=indexs, extra_fields=None) if len(update_requests) > 0: update_requests = update_requests if len(update_requests) > 0: update_result = collection.bulk_write(update_requests, ordered=False) logger.info('保存%s数据共%d条 匹配%d条 插入%d条, 更新%d条 额外字段:%s ' % (memo, len(update_requests), update_result.matched_count, update_result.upserted_count, update_result.modified_count, extra_fields)) else: logger.info('无数据更新%d' % len(update_requests)) return update_result, len(update_requests)
def __daily_crawler_job(query_name, code_ls, fields=None): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name, query_name, code, i, l)) ts_code = code['ts_code'] start = code['start'] end = code['end'] data = None update_field = '' if query_name == 'daily': update_field = "daily" index = ['ts_code', 'trade_date'] data = pro.query(query_name, ts_code=ts_code, start_date=start, end_date=end) elif query_name == 'adj_factor': update_field = "adj_factor" data = pro.query(query_name, ts_code=ts_code) index = ['ts_code', 'trade_date'] elif query_name == 'suspend': update_field = "suspend" data = pro.query( query_name, ts_code=ts_code, fields= "ts_code,suspend_date,resume_date,ann_date,suspend_reason,reason_type" ) index = ['ts_code'] elif query_name == 'daily_basic': update_field = "daily_basic" data = pro.query(query_name, ts_code=ts_code, start_date=start, end_date=end) index = ['ts_code', 'trade_date'] memo = "执行任务" + query_name where_param = {'ts_code': ts_code, 'start': start, 'end': end} update_doc = {update_field: True} result, rlen = save_data_update_one(data, TSDB_CONN[query_name], indexs=index, memo=memo) memo = "更新执行任务" + query_name + "[%s]状态" % code flag = result is not None and (result.matched_count > 0 or result.upserted_count > 0 or result.modified_count > 0) flag = flag or rlen == 0 if flag: update_date(TSDB_CONN['job_basic'], where_param=where_param, update_doc=update_doc, memo=memo)
def init_base_schedule(start=start1, end=end1): data = mysql_execute("select * from job_basic where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已设置初始任务") return data_s = mysql_search("select ts_code from stock_basic order by ts_code") update_requests = [] for data in data_s: doc = {'ts_code': data[0], 'start': start, 'end': end, 'daily': '0', 'adj_factor': '0', 'suspend': '0', 'daily_basic': '0', 'cal_hfq': '0', 'index': '0'} update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_basic', df, memo='设置基础任务')
def init_finance_job_base(start=start1, end=end1): data = mysql_execute("select * from job_finance_basic where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已经设置财务任务[%s-%s]%d" % (start, end, data)) return data_s = mysql_search("select ts_code from stock_basic order by ts_code") update_requests = [] for data in data_s: doc = {'ts_code': data[0], 'start': start, 'end': end, 'income': '0', 'balancesheet': '0', 'cashflow': '0', 'forecast': '0', 'express': '0', 'fina_indicator': '0', "fina_audit": '0', "fina_mainbz": '0'} update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_finance_basic', df, memo='设置财务数据基础任务')
def init_base_schedule(start=start1, end=end1): data = mysql_execute( "select * from job_basic_index where `start`='%s' and `end`='%s'" % (start, end)) if data > 0: logger.info("已设置初始任务") return datas = mysql_search("select ts_code from index_basic order by ts_code") update_requests = [] for data in datas: doc = { 'index_code': data[0], 'start': start, 'end': end, 'index_daily': '0', 'index_weigth': '0' } update_requests.append(doc) df = pd.DataFrame(update_requests) insert_many('job_basic_index', df, memo='设置指数基础任务')
def __finance_crawler_job(query_name, code_ls): i = 0 l = len(code_ls) for code in code_ls: i += 1 logger.info('%s抓取%s%s [%d/%d]' % (threading.current_thread().name, query_name.value, code, i, l)) ts_code = code[0] start = code[1] end = code[2] data = None update_field = query_name.value if query_name == QueryName.income: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.balance_sheet: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.cash_flow: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.forecast: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.express: data = pro.query(query_name.value, ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.fina_indicator: __get_fina_indicator(query_name, ts_code, start, end) continue elif query_name == QueryName.fina_audit: data = pro.fina_audit(ts_code=ts_code, start_date=start, end_date=end) elif query_name == QueryName.fina_main_bz: pro.fina_mainbz(ts_code=ts_code, start_date=start, end_date=end) else: logger.info("不在业务范围内") return memo = "执行任务" + query_name.value res = insert_many(query_name.value, data, memo=memo) memo = "更新执行任务" + query_name.value + "[%s]状态" % code if res is not None and res > 0: u_sql = "update job_finance_basic set `%s`=1 where `ts_code`='%s' and `start`='%s' and `end`='%s'" mysql_execute(u_sql % (update_field, ts_code, start, end), memo=memo)
now_str = now_date.strftime('%Y%m%d') pre_str = pre_date.strftime('%Y%m%d') init_base_schedule(start=pre_str, end=now_str) daily_crawler_job( 'daily', is_thread=True, ) daily_crawler_job('adj_factor', is_thread=True) daily_crawler_job('suspend', is_thread=True) daily_crawler_job('daily_basic', is_thread=True) cal_hfq_close(is_thread=True, thread_n=2) def week_baic_crawle(): stock_basic_crawler() trade_cal_crawler() if __name__ == '__main__': schedule.every().day.at("15:30").do(stock_daily_crawler) schedule.every().day.at("20:30").do(stock_daily_crawler) schedule.every(30).minutes.do(cal_hfq_close) schedule.every().friday.at("11:30").do(week_baic_crawle) logger.info("启动stock_schedule_daily.") while True: schedule.run_pending() time.sleep(10)
def __cal_hfq_close(code_ls, fq_type='hfq'): i = 0 job_len = len(code_ls) update_field = '' memo = '' if fq_type == 'hfq': update_field = 'hfq' memo = '后复权' elif fq_type == 'qfq': update_field = 'qfq' memo = '前复权' return else: return for code in code_ls: i += 1 ts_code = code['ts_code'] start = code['start'] end = code['end'] thread_name = threading.current_thread().name logger.info('%s计算%s %s[%s-%s]后复权数据 [%d/%d]\r' % (thread_name, memo, ts_code, start, end, i, job_len)) close_data = find_data(TSDB_CONN['daily'], whereParam={ 'ts_code': ts_code, 'trade_date': { '$gte': start, '$lte': end } }, selParam={ 'ts_code': True, 'trade_date': True, 'close': True, 'high': True, 'open': True, 'low': True, '_id': False }) factor_data = find_data(TSDB_CONN['adj_factor'], whereParam={ 'ts_code': ts_code, 'trade_date': { '$gte': start, '$lte': end } }, selParam={ 'ts_code': True, 'trade_date': True, 'adj_factor': True, '_id': False }) if len(close_data) < 1: logger.info('不计算复权数据,没有日线数据') continue pd_close = pd.DataFrame(close_data) pd_close.set_index('trade_date', inplace=True) pd_factor = pd.DataFrame(factor_data) pd_factor.set_index('trade_date', inplace=True) if pd_factor.shape[0] != pd_close.shape[0]: logger.info('不计算复权数据,日线和复权因子没有同步') continue fq_data = pd.concat([pd_close, pd_factor], axis=1) fq_data['adj_factor'] = pd.to_numeric(fq_data['adj_factor']) fq_data['close'] = pd.to_numeric(fq_data['close']) fq_data['high'] = pd.to_numeric(fq_data['high']) fq_data['low'] = pd.to_numeric(fq_data['low']) fq_data['open'] = pd.to_numeric(fq_data['open']) if fq_type == 'hfq': fq_data[update_field + "close"] = fq_data['close'] * fq_data['adj_factor'] fq_data[update_field + "high"] = fq_data['high'] * fq_data['adj_factor'] fq_data[update_field + "low"] = fq_data['low'] * fq_data['adj_factor'] fq_data[update_field + "open"] = fq_data['open'] * fq_data['adj_factor'] elif fq_type == 'qfq': pass else: pass update_requests = [] doc = {} my_index = {} for index, row in fq_data.iterrows(): my_index['ts_code'] = ts_code my_index['trade_date'] = str(index) doc[update_field + '_close'] = row[update_field + 'close'] doc[update_field + '_high'] = row[update_field + 'high'] doc[update_field + '_low'] = row[update_field + 'low'] doc[update_field + '_open'] = row[update_field + 'open'] doc['adj_factor'] = row['adj_factor'] update_requests.append( UpdateOne(my_index, {'$set': doc}, upsert=False)) doc = {} my_index = {} result, l = save_data_update_one(None, TSDB_CONN['daily'], update_requests, memo='daily更新%s数据' % memo) flag = result is not None and (result.matched_count > 0 or result.upserted_count > 0 or result.modified_count > 0) where_param = {'ts_code': ts_code, 'start': start, 'end': end} update_doc = {'cal_hfq': True} if flag: update_date(TSDB_CONN['job_basic'], where_param=where_param, update_doc=update_doc, memo='更新复权任务状态')
def update_date(collection, where_param={}, update_doc={}, memo=''): result = collection.update(where_param, {"$set": update_doc}) logger.info(memo)