def raw_history_bars(self, instrument, frequency, start_dt=None, end_dt=None, length=None): symbol = instrument_to_tushare(instrument) if frequency in ["1d"]: if start_dt and end_dt: s_date_int = convert_date_to_int(start_dt.date()) e_date_int = convert_date_to_int(end_dt.date()) elif start_dt and length: dates = self._dates_index(instrument) s_date_int = convert_date_to_int(start_dt.date()) s_pos = safe_searchsorted(dates, s_date_int) s_date_int = int(dates[s_pos]) e_date_int = int(dates[min(s_pos + length, len(dates)) - 1]) elif end_dt and length: dates = self._dates_index(instrument) e_date_int = convert_date_to_int(end_dt.date()) e_pos = safe_searchsorted(dates, e_date_int, side="right") s_date_int = int(dates[max(e_pos - length, 0)]) e_date_int = int(dates[e_pos - 1]) else: raise RuntimeError("At least two of [start_dt,end_dt,length] should be given.") data, msg = self._api.daily(symbol, freq=frequency, adjust_mode=None, start_date=s_date_int // 1000000, end_date=e_date_int // 1000000) if isinstance(data, pd.DataFrame) and data.size: data = data[data["volume"] > 0] # TODO sikp_suspended? return QuantOsConverter.df2np(data) else: if msg: system_log.warning(msg) return QuantOsConverter.empty() else: return MiniteBarDataSourceMixin.raw_history_bars( self, instrument, frequency, start_dt=start_dt, end_dt=end_dt, length=length )
def __init__(self, f): try: with open(f, 'r', encoding="utf-8") as store: self._share_transformation = json.load(store) except FileNotFoundError: # only for compatibility with the old bundle system_log.warning("{} not found, use default data which may be out-of-date".format(f)) self._share_transformation = DEFAULT_SHARE_TRANSFORMATION
def fix_time(self, stock_list: list or str, factors: list or f, trade_date: datetime.date or str): stock_list = convert_11code(stock_list) collection2factor_map = dis_collection2factor_map( factors, self.factor2collection_map) start_date = datetime.datetime(2010, 8, 14) end_date = start_date + datetime.timedelta( hours=23, minutes=59, seconds=59) dtypes = [("stock", "U11")] for ft in factors: dtypes.append((ft.name, "<f8")) result = numpy.full((len(stock_list), ), numpy.NAN, dtype=dtypes) result["stock"] = stock_list indexes = {} for collection in collection2factor_map: factor_name_list = gen_factor_name_list( collection2factor_map[collection]) snap = [ 'SecuCode', 'PubDate', ] snap.extend(factor_name_list) doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 db_coll = connect_coll(collection, self._db) data = db_coll.find( { 'SecuCode': { '$in': stock_list }, "PubDate": { "$gte": start_date, "$lte": end_date } }, doc_snap).sort("PubDate", pymongo.ASCENDING) for d in data: code = d['SecuCode'] idx = indexes.get(code) if idx is None: idx = find_date_in_array(code, result["stock"]) if idx == -1: system_log.warning( f"[Finance.fix_time] code index not found. record={d}" ) continue indexes[code] = idx for ft in factor_name_list: result[idx][ft] = d[ft] return result
def fix_factor(self, stock_list: list, factor: f or list, time: str or datetime.date, frequency: (1, 2, 3, 4)): # 格式化参数 stock_list = convert_11code(stock_list) collection2factor_map = dis_collection2factor_map( factor, self.factor2collection_map) collection, field = list(collection2factor_map.items())[0] field = field[0].name snap = ['SecuCode', 'PubDate', field] doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 # 测试时间点 start_date = datetime.datetime(2015, 1, 1) end_date = datetime.datetime(2015, 3, 31) # 查询 db_coll = connect_coll(collection, self._db) ret = db_coll.find( { 'SecuCode': { '$in': stock_list }, "PubDate": { "$gte": start_date, "$lte": end_date } }, doc_snap).sort('PubDate', pymongo.ASCENDING) dtypes = [("date", "uint32")] # 转换证券代码标识 for code in stock_list: dtypes.append((code, "<f8")) trade_days = TradeCalendar().calendar(start_date, end_date) result = numpy.full((trade_days.shape[0], ), numpy.NaN, dtype=dtypes) result["date"] = trade_days indexes = {} for r in ret: t = yyyymmdd_date(r['PubDate']) idx = indexes.get(t) if not idx: idx = find_date_in_array(t, result["date"]) if idx == -1: # 非交易日 system_log.warning( f"[Finance.fix_factor] date index not found. record={r}" ) continue indexes[t] = idx result[r.get('SecuCode')][idx] = r[field] return result
def safe_searchsorted(a, v, side='left', sorter=None): assert side in ["left", "right"] if not len(a): raise RuntimeError("Can't search in a empty array!") pos = np.searchsorted(a, v, side=side, sorter=sorter) if pos >= len(a): system_log.warning( RuntimeWarning( "Value to search [%s] beyond array range [ %s - %s ], there may be some data missing." % (v, a[0], a[-1]))) return len(a) - 1 if side == "left" else len(a) return pos
def fix_symbol(self, stock: str, factors: list or f, time: str or datetime.date, frequency: (1, 2, 3, 4)): stock = convert_11code(stock)[0] print(stock) collection2factor_map = dis_collection2factor_map( factors, self.factor2collection_map) # 测试时间点 start_date = datetime.datetime(2010, 1, 1) end_date = datetime.datetime(2015, 3, 31) calendar = TradeCalendar() trade_days = calendar.calendar(start_date, end_date) print("==>", trade_days) dtypes = [("date", "uint32")] for ft in factors: dtypes.append((ft.name, "<f8")) print("###", dtypes) for collection in collection2factor_map: factor_name_list = gen_factor_name_list( collection2factor_map[collection]) print("-->", factor_name_list) snap = [ 'PubDate', ] snap.extend(factor_name_list) doc_snap = {k: 1 for k in snap} doc_snap["_id"] = 0 db_coll = connect_coll(collection, self._db) data = db_coll.find( { 'SecuCode': stock, "PubDate": { "$gte": start_date, "$lte": end_date } }, doc_snap).sort("PubDate", pymongo.ASCENDING) # 初始化结果 result = numpy.full((trade_days.shape[0], ), numpy.NAN, dtype=dtypes) result["date"] = trade_days indexes = {} for d in data: t = yyyymmdd_date(d["PubDate"]) print(t) idx = indexes.get(t) print(idx, "---") if not idx: idx = find_date_in_array(t, result["date"]) print(idx, "===") if idx == -1: system_log.warning( f"[Finance.fix_symbol] date index not found. record={d}" ) continue indexes[t] = idx for ft in factor_name_list: result[idx][ft] = d[ft] return result