def get_data(symbol=None): if not os.path.exists('./data/prepared/datacsv/'): os.makedirs('./data/prepared/datacsv/') if not symbol: api = DataApi(addr="tcp://data.quantos.org:8910") result, msg = api.login( "18652420434", "eyJhbGciOiJIUzI1NiJ9.eyJjcmVhdGVfdGltZSI6IjE1MTcwNjAxMDgyOTMiLCJpc3MiOiJhdXRoMCIsImlkIjoiMTg2NTI0MjA0MzQifQ.b1ejSpbEVS7LhbsveZ5kvbWgUs7fnUd0-CBakPwNUu4" ) # print(result) data, msg = api.query( view="lb.indexCons", fields="symbol", filter="index_code=000016.SH&start_date=20180801&end_date=20180831", data_format='pandas') print(msg) # symbols = data['symbol'].tolist() symbols = [ '600887.SH', '601988.SH', '600048.SH', '601006.SH', '601398.SH', '601628.SH', '601166.SH', '601318.SH', '601328.SH', '601169.SH', '601088.SH', '601857.SH', '601390.SH', '601601.SH', '601186.SH', '601668.SH', '601766.SH', '600999.SH', '601989.SH', '601688.SH', '601288.SH', '601818.SH', '601800.SH', '601360.SH', '601336.SH', '603993.SH', '601211.SH', '600958.SH', '601878.SH', '601229.SH', '601881.SH' ] print(symbols) for sym in symbols: # df = load_data(sym) # close = [float(x) for x in df['close']] # # prepare macd data # df['MACD'], df['MACDsignal'], df['MACDhist'] = talib.MACD(np.array(close), # fastperiod=12, slowperiod=26, signalperiod=9) # df = df.sort_index() # df.index = pd.to_datetime(df.index, format='%Y%m%d') # df = prepare_kdj(df, 9, 'close') # 计算好kdj之后从13行开始取数据,计算出来的kdj比较准确 # df = df[34:] # df = calculator_close(df) # save_csv(sym, df) draw_pic(sym, 'macd_j') else: df = load_data(symbol) close = [float(x) for x in df['close']] # prepare macd data df['MACD'], df['MACDsignal'], df['MACDhist'] = talib.MACD( np.array(close), fastperiod=12, slowperiod=26, signalperiod=9) # df = df.sort_index() # df.index = pd.to_datetime(df.index, format='%Y%m%d') df = prepare_kdj(df, 9, 'close') # 计算好kdj之后从13行开始取数据,计算出来的kdj比较准确 df = df[34:] df = calculator_close(df) save_csv(symbol, df) draw_pic(symbol, 'macd_j')
symbols = ",".join(df['symbol']) start_data = 20170101 curr_data = 20180110 index_map = get_index_map(api, symbols, start_data) # print(index_map) indicators = list() for (symbol, index) in index_map.items(): performance_metrics, risk_metrics, df_returns = cal_active_return( api, symbol, index, start_data, curr_data) if performance_metrics is None: continue df_temp, msg_temp = api.query(view="lb.mfInfo", fields='name', filter="symbol=" + symbol) name = df_temp.loc[0]['name'] indicators.append( (symbol, index, name, performance_metrics['Annual Return (%)'], performance_metrics['Annual Volatility (%)'], performance_metrics['Sharpe Ratio'], df_returns['strat_cum'].iat[-1], df_returns['bench_cum'].iat[-1], risk_metrics['Beta'], risk_metrics['Maximum Drawdown (%)'], risk_metrics['Maximum Drawdown start'], risk_metrics['Maximum Drawdown end'])) labels = [ 'symbol', 'index', 'name', 'AnnualReturn', 'AnnualVolatility', 'SharpeRatio', 'StratCumReturn', 'BenchCumReturn', 'Beta', 'MaximumDrawdown', 'MaximumDrawdownStart', 'MaximumDrawdownEnd'
class RemoteDataService(DataService): """ RemoteDataService is a concrete class using data from remote server's database. """ __metaclass__ = Singleton # TODO no validity check for input parameters def __init__(self): DataService.__init__(self) self.data_api = None self.REPORT_DATE_FIELD_NAME = 'report_date' # self.calendar = None def __del__(self): self.data_api.close() def init_from_config(self, props): # do not initialize and login again if self.data_api is not None and self.data_api._loggined: return if props is None: props = dict() if self.data_api is not None: if len(props) == 0: return else: self.data_api.close() def get_from_list_of_dict(l, key, default=None): res = None for dic in l: res = dic.get(key, None) if res is not None: break if res is None: res = default return res props_default = dict( ) # jutil.read_json(jutil.join_relative_path('etc/data_config.json')) dic_list = [props, props_default] address = get_from_list_of_dict(dic_list, "remote.data.address", "") username = get_from_list_of_dict(dic_list, "remote.data.username", "") password = get_from_list_of_dict(dic_list, "remote.data.password", "") if address is None or username is None or password is None: raise ValueError("no address, username or password available!") time_out = get_from_list_of_dict(dic_list, "timeout", 60) self.data_api = DataApi(address, use_jrpc=False) self.data_api.set_timeout(timeout=time_out) print("\nDataApi login: {}@{}".format(username, address)) r, msg = self.data_api.login(username=username, password=password) if not r: print(" login failed: msg = '{}'\n".format(msg)) else: print " login success \n" # self.calendar = Calendar(self) # ----------------------------------------------------------------------------------- # Basic APIs def daily(self, symbol, start_date, end_date, fields="", adjust_mode=None): df, err_msg = self.data_api.daily(symbol=symbol, start_date=start_date, end_date=end_date, fields=fields, adjust_mode=adjust_mode, data_format="") # trade_status performance warning # TODO there will be duplicate entries when on stocks' IPO day df = df.drop_duplicates() return df, err_msg def bar(self, symbol, start_time=200000, end_time=160000, trade_date=None, freq='1M', fields=""): df, msg = self.data_api.bar(symbol=symbol, fields=fields, start_time=start_time, end_time=end_time, trade_date=trade_date, freq='1M', data_format="") return df, msg def query(self, view, filter="", fields="", **kwargs): """ Get various reference data. Parameters ---------- view : str data source. fields : str Separated by ',' filter : str filter expressions. kwargs Returns ------- df : pd.DataFrame msg : str error code and error message, joined by ',' Examples -------- res3, msg3 = ds.query("lb.secDailyIndicator", fields="price_level,high_52w_adj,low_52w_adj",\ filter="start_date=20170907&end_date=20170907",\ orderby="trade_date",\ data_format='pandas') view does not change. fileds can be any field predefined in reference data api. """ df, msg = self.data_api.query(view, fields=fields, filter=filter, data_format="", **kwargs) return df, msg # ----------------------------------------------------------------------------------- # Convenient Functions def get_trade_date_range_OLD(self, start_date, end_date): return self.calendar.get_trade_date_range(start_date, end_date) @staticmethod def _dic2url(d): """ Convert a dict to str like 'k1=v1&k2=v2' Parameters ---------- d : dict Returns ------- str """ l = ['='.join([key, str(value)]) for key, value in d.viewitems()] return '&'.join(l) def query_lb_fin_stat(self, type_, symbol, start_date, end_date, fields="", drop_dup_cols=None): """ Helper function to call data_api.query with 'lb.income' more conveniently. Parameters ---------- type_ : {'income', 'balance_sheet', 'cash_flow'} symbol : str separated by ',' start_date : int Annoucement date in results will be no earlier than start_date end_date : int Annoucement date in results will be no later than start_date fields : str, optional separated by ',', default "" drop_dup_cols : list or tuple Whether drop duplicate entries according to drop_dup_cols. Returns ------- df : pd.DataFrame index date, columns fields msg : str """ view_map = { 'income': 'lb.income', 'cash_flow': 'lb.cashFlow', 'balance_sheet': 'lb.balanceSheet', 'fin_indicator': 'lb.finIndicator' } view_name = view_map.get(type_, None) if view_name is None: raise NotImplementedError("type_ = {:s}".format(type_)) dic_argument = { 'symbol': symbol, 'start_date': start_date, 'end_date': end_date, # 'update_flag': '0' } if view_name != 'lb.finIndicator': dic_argument.update({ 'report_type': '408001000' }) # we do not use single quarter single there are zeros """ 408001000: joint 408002000: joint (single quarter) """ filter_argument = self._dic2url( dic_argument) # 0 means first time, not update res, msg = self.query(view_name, fields=fields, filter=filter_argument, order_by=self.REPORT_DATE_FIELD_NAME) # change data type try: cols = list( set.intersection({'ann_date', 'report_date'}, set(res.columns))) dic_dtype = {col: int for col in cols} res = res.astype(dtype=dic_dtype) except: pass if drop_dup_cols is not None: res = res.sort_values(by=drop_dup_cols, axis=0) res = res.drop_duplicates(subset=drop_dup_cols, keep='first') return res, msg def query_lb_dailyindicator(self, symbol, start_date, end_date, fields=""): """ Helper function to call data_api.query with 'lb.secDailyIndicator' more conveniently. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int fields : str, optional separated by ',', default "" Returns ------- df : pd.DataFrame index date, columns fields msg : str """ filter_argument = self._dic2url({ 'symbol': symbol, 'start_date': start_date, 'end_date': end_date }) return self.query("lb.secDailyIndicator", fields=fields, filter=filter_argument, orderby="trade_date") def get_index_weights(self, index, trade_date): """ Return all securities that have been in index during start_date and end_date. Parameters ---------- index : str separated by ',' trade_date : int Returns ------- pd.DataFrame """ if index == '000300.SH': index = '399300.SZ' filter_argument = self._dic2url({ 'index_code': index, 'trade_date': trade_date }) df_io, msg = self.query("lb.indexWeight", fields="", filter=filter_argument) if msg != '0,': print msg df_io = df_io.set_index('symbol') df_io = df_io.astype({'weight': float, 'trade_date': int}) df_io.loc[:, 'weight'] = df_io['weight'] / 100. return df_io def get_index_weights_daily(self, index, start_date, end_date): """ Return all securities that have been in index during start_date and end_date. Parameters ---------- index : str start_date : int end_date : int Returns ------- res : pd.DataFrame Index is trade_date, columns are symbols. """ # TODO: temparary api trade_dates = self.get_trade_date_range(start_date, end_date) start_date, end_date = trade_dates[0], trade_dates[-1] td = start_date dic = dict() symbols_set = set() while True: if td > end_date: break df = self.get_index_weights(index, td) # update_date = df['trade_date'].iat[0] # if update_date >= start_date and update_date <= end_date: symbols_set.update(set(df.index)) dic[td] = df['weight'] td = jutil.get_next_period_day(td, 'month', 1) merge = pd.concat(dic, axis=1).T merge = merge.fillna(0.0) # for those which are not components res = pd.DataFrame(index=trade_dates, columns=sorted(list(symbols_set)), data=np.nan) res.update(merge) res = res.fillna(method='ffill') res = res.loc[start_date:end_date] return res def _get_index_comp(self, index, start_date, end_date): """ Return all securities that have been in index during start_date and end_date. Parameters ---------- index : str separated by ',' start_date : int end_date : int Returns ------- list """ filter_argument = self._dic2url({ 'index_code': index, 'start_date': start_date, 'end_date': end_date }) df_io, msg = self.query("lb.indexCons", fields="", filter=filter_argument, orderby="symbol") return df_io, msg def get_index_comp(self, index, start_date, end_date): """ Return list of symbols that have been in index during start_date and end_date. Parameters ---------- index : str separated by ',' start_date : int end_date : int Returns ------- list """ df_io, msg = self._get_index_comp(index, start_date, end_date) if msg != '0,': print msg return list(np.unique(df_io.loc[:, 'symbol'])) def get_index_comp_df(self, index, start_date, end_date): """ Get index components on each day during start_date and end_date. Parameters ---------- index : str separated by ',' start_date : int end_date : int Returns ------- res : pd.DataFrame index dates, columns all securities that have ever been components, values are 0 (not in) or 1 (in) """ df_io, msg = self._get_index_comp(index, start_date, end_date) if msg != '0,': print msg def str2int(s): if isinstance(s, (str, unicode)): return int(s) if s else 99999999 elif isinstance(s, (int, np.integer, float, np.float)): return s else: raise NotImplementedError("type s = {}".format(type(s))) df_io.loc[:, 'in_date'] = df_io.loc[:, 'in_date'].apply(str2int) df_io.loc[:, 'out_date'] = df_io.loc[:, 'out_date'].apply(str2int) # df_io.set_index('symbol', inplace=True) dates = self.get_trade_date_range(start_date=start_date, end_date=end_date) dic = dict() gp = df_io.groupby(by='symbol') for sec, df in gp: mask = np.zeros_like(dates, dtype=int) for idx, row in df.iterrows(): bool_index = np.logical_and(dates > row['in_date'], dates < row['out_date']) mask[bool_index] = 1 dic[sec] = mask res = pd.DataFrame(index=dates, data=dic) return res def get_industry_daily(self, symbol, start_date, end_date, type_='SW', level=1): """ Get index components on each day during start_date and end_date. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int type_ : {'SW', 'ZZ'} Returns ------- res : pd.DataFrame index dates, columns symbols values are industry code """ df_raw = self.get_industry_raw(symbol, type_=type_, level=level) dic_sec = jutil.group_df_to_dict(df_raw, by='symbol') dic_sec = { sec: df.sort_values(by='in_date', axis=0).reset_index() for sec, df in dic_sec.viewitems() } df_ann_tmp = pd.concat( {sec: df.loc[:, 'in_date'] for sec, df in dic_sec.viewitems()}, axis=1) df_value_tmp = pd.concat( { sec: df.loc[:, 'industry{:d}_code'.format(level)] for sec, df in dic_sec.viewitems() }, axis=1) idx = np.unique( np.concatenate([df.index.values for df in dic_sec.values()])) symbol_arr = np.sort(symbol.split(',')) df_ann = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) df_ann.loc[df_ann_tmp.index, df_ann_tmp.columns] = df_ann_tmp df_value = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) df_value.loc[df_value_tmp.index, df_value_tmp.columns] = df_value_tmp dates_arr = self.get_trade_date_range(start_date, end_date) df_industry = align.align(df_value, df_ann, dates_arr) # TODO before industry classification is available, we assume they belong to their first group. df_industry = df_industry.fillna(method='bfill') df_industry = df_industry.astype(str) return df_industry def get_industry_raw(self, symbol, type_='ZZ', level=1): """ Get daily industry of securities from ShenWanZhiShu or ZhongZhengZhiShu. Parameters ---------- symbol : str separated by ',' type_ : {'SW', 'ZZ'} level : {1, 2, 3, 4} Use which level of industry index classification. Returns ------- df : pd.DataFrame """ if type_ == 'SW': src = u'申万研究所'.encode('utf-8') if level not in [1, 2, 3, 4]: raise ValueError("For [SW], level must be one of {1, 2, 3, 4}") elif type_ == 'ZZ': src = u'中证指数有限公司'.encode('utf-8') if level not in [1, 2, 3, 4]: raise ValueError("For [ZZ], level must be one of {1, 2}") else: raise ValueError("type_ must be one of SW of ZZ") filter_argument = self._dic2url({ 'symbol': symbol, 'industry_src': src }) fields_list = [ 'symbol', 'industry{:d}_code'.format(level), 'industry{:d}_name'.format(level) ] df_raw, msg = self.query("lb.secIndustry", fields=','.join(fields_list), filter=filter_argument, orderby="symbol") if msg != '0,': print msg df_raw = df_raw.astype(dtype={ 'in_date': int, # 'out_date': int }) return df_raw.drop_duplicates() def get_adj_factor_daily(self, symbol, start_date, end_date, div=False): """ Get index components on each day during start_date and end_date. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int div : bool False for normal adjust factor, True for diff. Returns ------- res : pd.DataFrame index dates, columns symbols values are industry code """ df_raw = self.get_adj_factor_raw(symbol, start_date=start_date, end_date=end_date) dic_sec = jutil.group_df_to_dict(df_raw, by='symbol') dic_sec = { sec: df.set_index('trade_date').loc[:, 'adjust_factor'] for sec, df in dic_sec.viewitems() } # TODO: duplicate codes with dataview.py: line 512 res = pd.concat(dic_sec, axis=1) # TODO: fillna ? idx = np.unique( np.concatenate([df.index.values for df in dic_sec.values()])) symbol_arr = np.sort(symbol.split(',')) res_final = pd.DataFrame(index=idx, columns=symbol_arr, data=np.nan) res_final.loc[res.index, res.columns] = res # align to every trade date s, e = df_raw.loc[:, 'trade_date'].min(), df_raw.loc[:, 'trade_date'].max() dates_arr = self.get_trade_date_range(s, e) if not len(dates_arr) == len(res_final.index): res_final = res_final.reindex(dates_arr) res_final = res_final.fillna(method='ffill').fillna(method='bfill') if div: res_final = res_final.div(res_final.shift(1, axis=0)).fillna(1.0) # res = res.loc[start_date: end_date, :] return res_final def get_adj_factor_raw(self, symbol, start_date=None, end_date=None): """ Query adjust factor for symbols. Parameters ---------- symbol : str separated by ',' start_date : int end_date : int Returns ------- df : pd.DataFrame """ if start_date is None: start_date = "" if end_date is None: end_date = "" filter_argument = self._dic2url({ 'symbol': symbol, 'start_date': start_date, 'end_date': end_date }) fields_list = ['symbol', 'trade_date', 'adjust_factor'] df_raw, msg = self.query("lb.secAdjFactor", fields=','.join(fields_list), filter=filter_argument, orderby="symbol") if msg != '0,': print msg df_raw = df_raw.astype(dtype={ 'symbol': str, 'trade_date': int, 'adjust_factor': float }) return df_raw.drop_duplicates() def query_inst_info(self, symbol, inst_type="", fields=""): if inst_type == "": inst_type = "1,2,3,4,5,101,102,103,104" filter_argument = self._dic2url({ 'symbol': symbol, 'inst_type': inst_type }) df_raw, msg = self.query("jz.instrumentInfo", fields=fields, filter=filter_argument, orderby="symbol") if msg != '0,': print msg dtype_map = { 'symbol': str, 'list_date': int, 'delist_date': int, 'inst_type': int } cols = set(df_raw.columns) dtype_map = {k: v for k, v in dtype_map.viewitems() if k in cols} df_raw = df_raw.astype(dtype=dtype_map) res = df_raw.set_index('symbol') return res # ----------------------------------------------------------------------------------- # subscribe for real time trading def subscribe(self, symbols): """ Parameters ---------- symbols : str Separated by , """ self.data_api.subscribe(symbols, func=self.mkt_data_callback) def mkt_data_callback(self, key, quote): e = Event(EVENT_TYPE.MARKET_DATA) # print quote e.dic = {'quote': quote} self.ctx.instance.put(e) # --------------------------------------------------------------------- # Calendar def get_trade_date_range(self, start_date, end_date): """ Get array of trade dates within given range. Return zero size array if no trade dates within range. Parameters ---------- start_date : int YYmmdd end_date : int Returns ------- trade_dates_arr : np.ndarray dtype = int """ filter_argument = self._dic2url({ 'start_date': start_date, 'end_date': end_date }) df_raw, msg = self.data_api.query("jz.secTradeCal", fields="trade_date", filter=filter_argument, orderby="") if df_raw.empty: return np.array([], dtype=int) trade_dates_arr = df_raw['trade_date'].values.astype(int) return trade_dates_arr def get_last_trade_date(self, date): """ Parameters ---------- date : int Returns ------- res : int """ dt = jutil.convert_int_to_datetime(date) delta = pd.Timedelta(weeks=2) dt_old = dt - delta date_old = jutil.convert_datetime_to_int(dt_old) dates = self.get_trade_date_range(date_old, date) mask = dates < date res = dates[mask][-1] return res def is_trade_date(self, date): """ Check whether date is a trade date. Parameters ---------- date : int Returns ------- bool """ dates = self.get_trade_date_range(date, date) return len(dates) > 0 def get_next_trade_date(self, date): """ Parameters ---------- date : int Returns ------- res : int """ dt = jutil.convert_int_to_datetime(date) delta = pd.Timedelta(weeks=2) dt_new = dt + delta date_new = jutil.convert_datetime_to_int(dt_new) dates = self.get_trade_date_range(date, date_new) mask = dates > date res = dates[mask][0] return res
from jaqs.data import DataApi import matplotlib.pyplot as plt # matplotlib inline import pandas as pd import numpy as np api = DataApi(addr='tcp://data.tushare.org:8910') phone = 'phone' token = 'token' df, msg = api.login(phone, token) print(df, msg) df, msg = api.query(view="jz.instrumentInfo", fields="market,symbol,list_date,status", filter="inst_type=1&status=1&market=SH,SZ", data_format="pandas") df.index = df['symbol'] df.sort_index(inplace=True) print(len(df)) print(len(df[df['market'] == 'SZ'])) print(len(df[df['market'] == 'SH']))
class JaqsDataDownloader(object): MAIN_CONTRACT_DB_NAME = 'VnTrader_MainContract' def __init__(self): self.setting = None self.contractDict = None self.api = None self.dbClient = None self.db = None self.symbols = [] self.taskList = [] self.finishedSymbols = [] # 学习协程使用的,普通和多线程下载用不上。 # 连接出错重试设置 self._retry = 0 self._waitTime = 10 # 初始化设置 self._loadSetting() self._loadContracts() def _loadSetting(self): """ 默认载入json文件的合约列表 """ with codecs.open('config.json', 'r', 'utf-8') as f: self.setting = json.load(f) self.symbols = self.setting['SYMBOLS'] def _loadContracts(self): """ jaqs的数据api合约代码是基础代码加上jaqs设置的市场编号,如'cu1805.SHF'。 将合约与市场的映射关系保存在json文件上,有新合约上市可以在json文件上修改 。 """ with codecs.open('contract.json', 'r', 'utf-8') as f: self.contractDict = json.load(f) def _symbolConvert(self, symbol): """ 把合约代码格式转换成jaqs的合约代码格式。 """ symbolAlphabet = re.match(r'^([A-Z]|[a-z])+', symbol).group() for market, symbols in self.contractDict.items(): if symbolAlphabet in symbols: return '%s.%s' % (symbol, market) print(u'%s-目前期货交易所无此合约.' % symbol) def _generateVtBar(self, symbol, data): """ 生成vtBar(vnpy的bar类对象) """ bar = VtBarData() bar.symbol = symbol bar.vtSymbol = symbol bar.open = float(data['open']) bar.high = float(data['high']) bar.low = float(data['low']) bar.close = float(data['close']) bar.volume = int(data['volume']) bar.openInterest = int(data['oi']) dt_str = '%08d %06d' % (data['date'], data['time']) bar.datetime = datetime.strptime(dt_str, '%Y%m%d %H%M%S') bar.date = bar.datetime.strftime('%Y%m%d') bar.time = bar.datetime.strftime('%H:%M:%S') return bar def loginJaqsApp(self): """ # 登录jaqs的api。需要账号和token,可在官网上注册。 """ print(u'正在登陆JaqsAPI.') self.api = DataApi(addr=self.setting['ADDR']) self.api.login(self.setting['PHONE'], self.setting['TOKEN']) def connectDb(self, dbName=MINUTE_DB_NAME): """ 连接数据库。 """ self.dbClient = MongoClient(self.setting['MONGO_HOST'], self.setting['MONGO_PORT']) self.db = self.dbClient[dbName] # print(self.db.collection_names()) def setSymbols(self, symbolsList): """ 设置要批量下载的合约列表,覆盖配置文件合约代码的设置 """ self.symbols = symbolsList def getData(self, symbol, *args, **kwargs): """ 调用jaqs的api方法,如果传入错误的参数,可能会发生阻塞或异常,如果出错,可以检查传入参数是否正确。 **kwargs :param symbol: 交易合约代码,如rb1810 :param args: 其他支持的参数可以在官网查询jaqs的api文档,最常用的是trade_date,可以支持自定义要下载的交易日。 :param kwargs: 同上 :return: """ symbol = self._symbolConvert(symbol) df, msg = self.api.bar(symbol=symbol, freq="1M", *args, **kwargs) # print df, msg return df def getTradingday(self, startDate, endDate=None): """ :param startDate: 开始日期,格式YYYY-MM-DD :param endDate: 结束日期,格式同上 :return: 交易日列表,格式同上 """ # jaqs交易日api传入参数的日期格式是YYYYMMDD,需要先转换 if not endDate: endDate = datetime.now().strftime('%Y%m%d') else: endDate = '%s%s%s' % (endDate[0:4], endDate[5:7], endDate[8:10]) startDate = '%s%s%s' % (startDate[0:4], startDate[5:7], startDate[8:10]) flt = 'start_date=%s&end_date=%s' % (startDate, endDate) df, msg = self.api.query(view='jz.secTradeCal', filter=flt) dates = df['trade_date'].values dates = map( lambda dateStr: '%s-%s-%s' % (dateStr[0:4], dateStr[4:6], dateStr[6:8]), dates) return dates def getMainContract(self, symbol, startDate='2012-01-01', endDate=None): """ 获取从2012年1月1日开始的历史主力合约列表(jaqs数据从2012年开始)。 jaqs不提供历史主力合约数据,本方法依赖本地数据库,需要先用附带的模块从ricequant获取数据并入库。 :param symbol: 合约字母,不包含日期,如螺纹钢是rb :param startDate: 开始日期,格式YYYY-MM-DD :param endDate: 结束日期,格式同上,默认今日 :return: tuple(实际合约代码,日期) """ if not endDate: endDate = datetime.now() else: endDate = datetime.strptime(endDate, '%Y-%m-%d') startDate = datetime.strptime(startDate, '%Y-%m-%d') # print startDate, endDate db = self.dbClient[self.MAIN_CONTRACT_DB_NAME] # print db.collection_names() # 搜索合约所在的交易所 exchange = None for colName in db.collection_names(): doc = db[colName].find_one() if symbol in doc.keys(): exchange = colName # print(exchange) if exchange: flt = {'date': {'$gte': startDate, '$lt': endDate}} projection = {'date': True, '_id': False, symbol: True} cursor = db[exchange].find(flt, projection).sort('date', ASCENDING) docs = list(cursor) docs = [(doc[symbol].lower(), doc['date'].strftime('%Y-%m-%d')) for doc in docs] # print(docs) return docs else: print(u'数据库找不该合约的数据') def getExistedDay(self, symbol): """ 查找数据库已经存在的数据的日期,避免重复下载 """ col = self.db[symbol] docs = list( col.find({}, { 'datetime': True, '_id': False }).sort('datetime', ASCENDING)) dateList = [doc['datetime'].strftime('%Y-%m-%d') for doc in docs] # 如果下载过程出错,已经保存好的数据最后一天很可能不是完整的,所以数据库最后一天从已有数据集合删除,重新下载。 lastDay = dateList[-1] dates = set(dateList) dates.remove(lastDay) return dates def saveToDb(self, symbol, overwrite=True, *args, **kwargs): """ 将单一合约分钟线数据存入数据库。 默认当前交易日,可通过trade_date='2018-02-03'指定交易日。 默认覆盖已有数据库资料,overwrite设为 :param symbol: 交易合约代码,如rb1810 :param overwrite: 是否覆盖数据库已有数据,默认是True(覆盖)。False:若数据库存在重复的日期,跳过。 :param args: jaqs其他支持的参数通过这里传入,请查询官方文档。 :param kwargs: jaqs其他支持的参数通过这里传入,请查询官方文档。 :return: """ # 如果当前日期的数据在数据已经存在,并且模式为不覆盖,则忽略任务 if not overwrite and 'trade_date' in kwargs: qryDate = kwargs['trade_date'] existedDate = self.getExistedDay(symbol) if qryDate in existedDate: print(u'数据库已存在该日期数据,忽略') return # 调用jaqs的api data = self.getData(symbol, *args, **kwargs) if data.empty: print(u'无数据!可能是节假日或超过jaqs数据库的保存范围。') return col = self.db[symbol] col.create_index([('datetime', ASCENDING)], unique=True) dataStart = None dataEnd = None for i in range(len(data)): bar = self._generateVtBar(symbol, data.iloc[i]) document = bar.__dict__ flt = {'datetime': bar.datetime} col.replace_one(flt, document, upsert=True) if i == 0: dataStart = bar.datetime elif i == len(data) - 1: dataEnd = bar.datetime # 判断当前任务是否完成,协程方式使用,普通和多线程方式用不上 dateStr = '%s' % dataEnd missionID = '.'.join([dateStr[0:10], symbol]) self.finishedSymbols.append(missionID) print(u'合约%s下载完成。日期区间:%s - %s' % (symbol, dataStart, dataEnd)) def downloadAllSymbol(self, *args, **kwargs): """ 多合约单日下载,适用于每日收市后的数据更新,默认当前交易日,可通过trade_date='2018-02-03'指定交易日。 """ self.taskList.extend(self.symbols) print(u'开始下载所有合约分钟线数据,任务列表:') print(self.taskList) # 如果网络连接出错,会重新连接,重试3次。 # 如果不是网络连接问题,请仔细检查传入jaqsApi的参数是否正确。 while self.taskList and self._retry <= 3: # 重试计时 if self._retry > 0: print(u'%s秒后开始重试' % self._waitTime) time.sleep(self._waitTime) for symbol in self.taskList: try: self.saveToDb(symbol, *args, **kwargs) except: self._retry += 1 self._waitTime += 10 traceback.print_exc() else: self.taskList.remove(symbol) # 达到最大重试次数后,检查任务完成度 if not self.taskList: print(u'全部合约分钟线数据下载完成') else: print(u'下载失败,达到最大重试次数。未完成合约:%s' % self.taskList)