def get_treasury_data(start, end): """期间国库券利率 Arguments: start {date like} -- 开始日期 end {date like} -- 结束日期 Returns: DataFrame -- 期间利率 Example: >>> start, end = '2020-03-10', '2020-03-15' >>> get_treasury_data(start, end).iloc[:3,:5] cash 1month 2month 3month 6month date 2020-03-10 00:00:00+00:00 0.016000 0.016231 0.016610 0.016661 0.016991 2020-03-11 00:00:00+00:00 0.016000 0.016727 0.016996 0.017001 0.017211 2020-03-12 00:00:00+00:00 0.015742 0.016195 0.016993 0.016994 0.017625 2020-03-13 00:00:00+00:00 0.014287 0.016395 0.016699 0.016705 0.017953 """ start, end = sanitize_dates(start, end) db = get_db() collection = db['国债利率'] predicate = {'date': {"$gte": start, "$lte": end}} projection = {"_id": 0} sort = [("日期", 1)] df = pd.DataFrame.from_records( collection.find(predicate, projection, sort=sort)) # df.set_index('date', inplace=True) df.index = pd.DatetimeIndex(df.pop('date')) # 缺少2年数据,使用简单平均插值 value = (df['y1'] + df['y3']) / 2 df.insert(7, '2year', value) df.rename(columns=TREASURY_COL_MAPS, inplace=True) return df.tz_localize('UTC')
def get_treasury_data(start_date, end_date): start_date, end_date = sanitize_dates(start_date, end_date) # 确保为date类型 start_date = pd.Timestamp(start_date).date() end_date = pd.Timestamp(end_date).date() with session_scope('szsh') as sess: query = sess.query( Treasury.date, Treasury.m1, Treasury.m3, Treasury.m6, Treasury.y1, Treasury.y3, Treasury.y5, Treasury.y7, Treasury.y10, Treasury.y20, Treasury.y30 ).filter(Treasury.date.between(start_date, end_date)) df = pd.DataFrame.from_records(query.all()) # 缺少2年数据,使用简单平均插值 value = (df.iloc[:,4] + df.iloc[:, 5]) / 2 df.insert(5,'2year',value) df.columns = TREASURY_COL_NAMES df.set_index(keys='date', inplace=True) df.index = pd.DatetimeIndex(df.index) return df.tz_localize('UTC')
def _fetch_single_index(code, start, end): index_code = decode_index_code(code) start, end = sanitize_dates(start, end) db = get_db('wy_index_daily') collection = db[index_code] predicate = {'日期': {'$gte': start, '$lte': end}} projection = {'_id': 0} sort = [('日期', 1)] cursor = collection.find(predicate, projection, sort=sort) df = pd.DataFrame.from_records(cursor) if df.empty: return df df['股票代码'] = code # fill 0 df['换手率'] = 0.0 df['流通市值'] = 0.0 df['总市值'] = 0.0 # 截取所需列 df = df[WY_DAILY_COL_MAPS.keys()] df.rename(columns=WY_DAILY_COL_MAPS, inplace=True) df.sort_values('date', inplace=True) # fill 0 cols = [ 'b_close', 'b_high', 'b_low', 'b_open', 'shares_outstanding', 'total_shares' ] df.loc[:, cols] = 0.0 return df
def get_symbol_rets(symbol, start=None, end=None): """ Calls the currently registered 'returns_func' Parameters ---------- symbol : object An identifier for the asset whose return series is desired. e.g. ticker symbol or database ID start : date, optional Earliest date to fetch data for. Defaults to earliest date available. end : date, optional Latest date to fetch data for. Defaults to latest date available. Returns ------- pandas.Series Returned by the current 'returns_func' """ start, end = sanitize_dates(start, end) return SETTINGS['returns_func'](symbol, start_date=start, end_date=end)
def get_non_trading_days(start, end, tz='utc'): """自然日历中除交易日外的日期定义为非交易日期""" start, end = sanitize_dates(start, end) assert (end - start).days >= 1, '期间最少相隔1天' all_days = pd.date_range(start, end, tz=tz) trading_dates = get_trading_dates(start, end, tz) diff_ = all_days.difference(trading_dates) return diff_
def get_trading_dates(start=None, end=None, tz='utc'): """期间所有交易日 Keyword Arguments: start {date like} -- 开始日期 (default: {None}) end {[type]} -- 结束日期 (default: {None}) tz {str} -- 输出目标时区 (default: {'utc'}) Returns: DatetimeIndex -- 期间交易日期 如含未来日期,则未来工作日视同为交易日 存在的情形: 1. start > today start-----end ^ today 2. today == start start-----end ^ today 3. start < today < end start-----end ^ today 4. today == end start-----end ^ today 5. today > end start-----end ^ today """ start, end = sanitize_dates(start, end) assert (end - start).days >= 1, '期间最少相隔1天' today = dt.datetime.today().date() if start > today: dates = pd.bdate_range(start, end, freq='B').sort_values() if start == today: dates = pd.bdate_range(today + pd.Timedelta(days=1), end, freq='B') if is_trading_day(today): dates = dates.append(pd.DatetimeIndex([today])) if start < today < end: dates = _historical_trading_dates(start, today - pd.Timedelta(days=1)) if is_trading_day(today): dates = dates.append(pd.DatetimeIndex([today])) future = pd.bdate_range(today + pd.Timedelta(days=1), end, freq='B') dates = dates.append(future) if end == today: dates = _historical_trading_dates(start, today - pd.Timedelta(days=1)) if is_trading_day(today): dates = dates.append(pd.DatetimeIndex([today])) if end < today: dates = _historical_trading_dates(start, end) return dates.tz_localize(tz).sort_values()
def select_output_by(output, start=None, end=None, assets=None, reduce_format=True): """ 按时间及代码选择`pipeline`输出数据框 专用于研究环境下的run_pipeline输出结果分析 参数 ---- output : MultiIndex DataFrame pipeline输出结果 start : str 开始时间 end : str 结束时间 assets : 可迭代对象或str 股票代码 案例 ---- >>> # result 为运行`pipeline`输出结果 >>> select_output_by(result,'2018-04-23','2018-04-24',assets=['000585','600871']) mean_10 2018-04-23 00:00:00+00:00 *ST东电(000585) 2.7900 *ST油服(600871) 2.0316 2018-04-24 00:00:00+00:00 *ST东电(000585) 2.7620 *ST油服(600871) 2.0316 """ nlevels = output.index.nlevels if nlevels != 2: raise ValueError('输入数据框只能是run_pipeline输出结果,MultiIndex DataFrame') start, end = sanitize_dates(start, end) sessions = trading_sessions(start, end) start, end = sessions[0], sessions[-1] if assets is not None: assets = symbols(assets) else: assets = [] ret = _select_output_by(output, start, end, assets) if reduce_format: cond1 = start == end cond2 = len(assets) == 1 if cond1 & cond2: ret = ret.xs((start, assets[0])) elif cond1: ret = ret.xs(start, level=0) elif cond2: ret = ret.xs(assets[0], level=1) return ret
def to_tdates(start, end): """修正交易日期""" calendar = _trading_calendar() dates = calendar.all_sessions # 修正日期 start, end = sanitize_dates(start, end) # 定位交易日期 start_date = dates[dates.get_loc(start, method='bfill')] end_date = dates[dates.get_loc(end, method='ffill')] if start_date > end_date: start_date = end_date return dates, start_date, end_date
def read_stock_daily(stock_code): start, end = sanitize_dates(START_DATE, END_DATE) pf = f'tests/resources/cndata/stock_daily/{stock_code}.csv' df = pd.read_csv(pf, encoding='gb2312', na_values=['-', None]) df = df[WY_DAILY_COL_MAPS.keys()] df.rename(columns=WY_DAILY_COL_MAPS, inplace=True) df = df.sort_values('date') df['date'] = pd.to_datetime(df['date']) cond = df['date'].between(start, end) df = df.loc[cond, :] df['change_pct'].fillna(0.0, inplace=True) return df
def get_adhoc_holidays(start, end, tz='utc'): """ 非交易日的其中的工作日 **注意** 不同于非交易日 adhoc_holidays = 非交易日 - 周末日期 """ start, end = sanitize_dates(start, end) assert (end - start).days >= 1, '期间最少相隔1天' b_dates = pd.bdate_range(start, end, tz=tz) trading_dates = get_trading_dates(start, end, tz) diff_ = b_dates.difference(trading_dates) return diff_
def get_treasury_data(start_date, end_date): """读取期间资金成本数据 Parameters ---------- start_date : datetime-like 开始日期 end_date : datetime-like 结束日期 return ---------- DataFrame: DataFrame对象。 Examples -------- >>> start_date = '2020-05-15' >>> end_date = '2020-05-25' >>> df = get_treasury_data(start_date, end_date) >>> df.iloc[:5, :5] cash 1month 2month 3month 6month date 2020-05-15 00:00:00+00:00 0.006838 0.009496 0.009506 0.010076 0.011570 2020-05-18 00:00:00+00:00 0.006838 0.009369 0.009611 0.010414 0.011701 2020-05-19 00:00:00+00:00 0.009838 0.009425 0.010490 0.010307 0.012016 2020-05-20 00:00:00+00:00 0.008188 0.009084 0.010712 0.011012 0.012378 2020-05-21 00:00:00+00:00 0.007028 0.008569 0.010695 0.011032 0.012465 """ start, end = sanitize_dates(start_date, end_date) db = get_db() collection = db['国债利率'] predicate = {'date': {'$gte': start, '$lte': end}} projection = {'_id': 0} sort = [('date', 1)] cursor = collection.find(predicate, projection, sort=sort) df = pd.DataFrame.from_records(cursor) # 缺少2年数据,使用简单平均插值 value = (df['y1'] + df['y3']) / 2 df.insert(7, '2year', value) df.rename(columns=TREASURY_COL_MAPS, inplace=True) df.set_index('date', inplace=True) df = df.tz_localize('UTC') calendar = get_calendar('XSHG') start = start.tz_localize('UTC') end = end.tz_localize('UTC') sessions = calendar.sessions_in_range(start, end) # 务必与交易日历一致 return df.reindex(sessions).fillna(method='ffill')
def _get_single_stock_equity(symbol, start_date, end_date, is_index, index_name): start_date, end_date = sanitize_dates(start_date, end_date) db_name = 'wy_index_daily' if is_index else 'wy_stock_daily' db = get_db(db_name) collection = db[symbol] df = query(collection, start_date, end_date) df.columns = DAILY_COLS df['change_pct'] = df['change_pct'] / 100.0 df['date'] = pd.to_datetime(df['date']) df.set_index('date', inplace=True) df.sort_index(inplace=True) res = df.tz_localize('utc')['change_pct'] res.name = index_name # 原始数据中含nan res.fillna(0.0, inplace=True) return res
def fetch_treasury_data_from(start=EARLIEST_POSSIBLE_DATE.date(), end=pd.Timestamp('today')): """ 获取期间资金成本数据 Parameters ---------- start : datelike 开始日期 end : datelike 结束日期 Returns ------- res : DataFrame Index: 日期 columns:月度年度周期 Example ------- >>> df = fetch_treasury_data_from('2017-11-1','2017-11-20') >>> df.columns Index(['m0', 'm1', 'm2', 'm3', 'm6', 'm9', 'y1', 'y3', 'y5', 'y7', 'y10','y15', 'y20', 'y30', 'y40', 'y50'],dtype='object') >>> df.iloc[:,:6] m0 m1 m2 m3 m6 m9 date 2017-11-01 0.030340 0.030800 0.030909 0.035030 0.035121 0.035592 2017-11-02 0.029894 0.029886 0.032182 0.035074 0.035109 0.035493 2017-11-03 0.027311 0.030052 0.032532 0.034992 0.035017 0.035461 2017-11-06 0.026155 0.030086 0.032532 0.034917 0.034992 0.035514 2017-11-07 0.026155 0.030127 0.032813 0.034788 0.035039 0.035465 2017-11-08 0.026759 0.029984 0.033226 0.035399 0.035034 0.035469 2017-11-09 0.027285 0.029925 0.033655 0.035553 0.034849 0.035629 2017-11-10 0.027618 0.029958 0.033720 0.035691 0.035939 0.035735 2017-11-13 0.028462 0.030854 0.034653 0.035708 0.035939 0.035935 2017-11-14 0.028462 0.031018 0.034988 0.035754 0.035939 0.035940 2017-11-15 0.028384 0.030871 0.035439 0.036412 0.036566 0.036252 2017-11-16 0.028338 0.030875 0.035427 0.036317 0.036502 0.036222 2017-11-17 0.027718 0.029956 0.035390 0.036981 0.036752 0.036183 2017-11-20 0.028198 0.030235 0.035431 0.036797 0.036686 0.036153 """ start, end = sanitize_dates(start, end) start, end = pd.Timestamp(start), pd.Timestamp(end) df = read_local_data() return _preprocess(df, start, end)
def _fetch_single_equity(stock_code, start, end): """读取本地原始数据""" start, end = sanitize_dates(start, end) db = get_db('wy_stock_daily') collection = db[stock_code] predicate = {'日期': {'$gte': start, '$lte': end}} projection = {'_id': 0} sort = [('日期', 1)] cursor = collection.find(predicate, projection, sort=sort) df = pd.DataFrame.from_records(cursor) if df.empty: return df df['股票代码'] = stock_code # 截取所需列 df = df[WY_DAILY_COL_MAPS.keys()] df.rename(columns=WY_DAILY_COL_MAPS, inplace=True) df.sort_values('date', inplace=True) return df
def fetch_history(code, start, end=None, is_index=False): """获取股票或者指数的历史交易数据(不复权) 备注: 提供的数据延迟一日 记录: `2018-12-12 16:00`时下载 002622 历史数据,数据截至日为2018-12-10 延迟2日 """ start, end = sanitize_dates(start, end) url_fmt = 'http://quotes.money.163.com/service/chddata.html?code={}&start={}&end={}' code = _query_code(code, is_index) start_str = start.strftime('%Y%m%d') end_str = end.strftime('%Y%m%d') url = url_fmt.format(code, start_str, end_str) + '#01b07' na_values = ['None', '--', 'none'] kwds = { 'index_col': 0, 'encoding': 'cp936', 'parse_dates': True, 'na_values': na_values, } page_response = get_page_response(url, 'get') df = pd.read_csv(BytesIO(page_response.content), **kwds) return df
def fetch_single_equity(stock_code, start, end): """ 从本地数据库读取股票期间日线交易数据 注 -- 1. 除OHLCV外,还包括涨跌幅、成交额、换手率、流通市值、总市值、流通股本、总股本 2. 添加后复权价格,使用复权价在图中去除间隙断层 3. 使用bcolz格式写入时,由于涨跌幅存在负数,必须剔除该列 Parameters ---------- stock_code : str 要获取数据的股票代码 start_date : datetime-like 自开始日期(包含该日) end_date : datetime-like 至结束日期 return ---------- DataFrame: OHLCV列的DataFrame对象。datetimeindex.tz 为 None Examples -------- >>> # 600710 股票代码重用 >>> stock_code = '600710' >>> start = '2016-03-29' >>> end = pd.Timestamp('2017-07-31') >>> df = fetch_single_equity(stock_code, start, end) >>> df.iloc[-6:,:8] date symbol open high low close prev_close change_pct 322 2017-07-24 600710 9.36 9.36 9.36 9.36 9.36 NaN 323 2017-07-25 600710 9.36 9.36 9.36 9.36 9.36 NaN 324 2017-07-26 600710 9.36 9.36 9.36 9.36 9.36 NaN 325 2017-07-27 600710 9.36 9.36 9.36 9.36 9.36 NaN 326 2017-07-28 600710 9.36 9.36 9.36 9.36 9.36 NaN 327 2017-07-31 600710 9.25 9.64 7.48 7.55 9.31 -18.9044 """ # 指数日线数据 if len(stock_code) == 7: return _fetch_single_index(stock_code, start, end) start, end = sanitize_dates(start, end) # 首先提取全部数据,确保自IPO以来复权价一致 df = _fetch_single_equity(stock_code, None, None) if df.empty: return df # 恢复0股价 df = _fill_zero(df) # 添加复权价格 df = _add_back_prices(df) cond = df['date'].between(start, end) df = df.loc[cond, :] if df.empty: return df t_start, t_end = df['date'].values[0], df['date'].values[-1] # 判断数据长度是否缺失 dts = [t for t in _tdates() if t >= t_start and t <= t_end] dts = pd.to_datetime(dts) # 填充停牌数据 df = _reindex(df, dts) assert len(df) == len(dts), f"股票:{stock_code},期间{t_start} ~ {t_end} 数据不足" df.loc[:, 'shares_outstanding'] = df.market_cap / df.close df.loc[:, 'total_shares'] = df.total_cap / df.close if not df.empty: cond = df['close'] > 0.0 df = df[cond] return df