def get_roll_yield(date=None, var="CU", symbol1=None, symbol2=None, df=None): """ 指定交易日指定品种(主力和次主力)或任意两个合约的展期收益率 Parameters ------ date: string 某一天日期 format: YYYYMMDD var: string 合约品种如RB、AL等 symbol1: string 合约 1如 rb1810 symbol2: string 合约 2 如 rb1812 df: DataFrame或None 从dailyBar得到合约价格,如果为空就在函数内部抓dailyBar,直接喂给数据可以让计算加快 Return ------- tuple roll_yield near_by deferred """ # date = "20200304" date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % date.strftime("%Y%m%d")) return None if symbol1: var = symbol_varieties(symbol1) if not isinstance(df, pd.DataFrame): market = symbol_market(var) df = get_futures_daily(start_day=date, end_day=date, market=market) if var: df = df[~df["symbol"].str.contains( "efp")] # 20200304 由于交易所获取的数据中会有比如 "CUefp",所以在这里过滤 df = df[df["variety"] == var].sort_values("open_interest", ascending=False) df["close"] = df["close"].astype("float") symbol1 = df["symbol"].tolist()[0] symbol2 = df["symbol"].tolist()[1] close1 = df["close"][df["symbol"] == symbol1.upper()].tolist()[0] close2 = df["close"][df["symbol"] == symbol2.upper()].tolist()[0] a = re.sub(r"\D", "", symbol1) a_1 = int(a[:-2]) a_2 = int(a[-2:]) b = re.sub(r"\D", "", symbol2) b_1 = int(b[:-2]) b_2 = int(b[-2:]) c = (a_1 - b_1) * 12 + (a_2 - b_2) if close1 == 0 or close2 == 0: return False if c > 0: return np.log(close2 / close1) / c * 12, symbol2, symbol1 else: return np.log(close2 / close1) / c * 12, symbol1, symbol2
def _table_cut_cal(table_cut, symbol): """ 表格切分 :param table_cut: 需要切分的表格 :type table_cut: pandas.DataFrame :param symbol: 品种 :type symbol: str :return: :rtype: pandas.DataFrame """ var = symbol_varieties(symbol) table_cut[intColumns + ['rank']] = table_cut[intColumns + ['rank']].astype(int) table_cut_sum = table_cut.sum() table_cut_sum['rank'] = 999 for col in ['vol_party_name', 'long_party_name', 'short_party_name']: table_cut_sum[col] = None table_cut = table_cut.append(pd.DataFrame(table_cut_sum).T, sort=True) table_cut['symbol'] = symbol table_cut['variety'] = var table_cut[intColumns + ['rank']] = table_cut[intColumns + ['rank']].astype(int) return table_cut
def get_rank_sum(date=None, vars_list=cons.contract_symbols): """ 抓取四个期货交易所前5、前10、前15、前20会员持仓排名数据 注1:由于上期所和中金所只公布每个品种内部的标的排名, 没有公布品种的总排名; 所以函数输出的品种排名是由品种中的每个标的加总获得, 并不是真实的品种排名列表 注2:大商所只公布了品种排名, 未公布标的排名 :param date: 日期 format: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如 RB, AL等列表 为空时为所有商品 :return: pd.DataFrame: 展期收益率数据 symbol 标的合约 string var 商品品种 string vol_top5 成交量前5会员成交量总和 int vol_chg_top5 成交量前5会员成交量变化总和 int long_open_interest_top5 持多单前5会员持多单总和 int long_open_interest_chg_top5 持多单前5会员持多单变化总和 int short_open_interest_top5 持空单前5会员持空单总和 int short_open_interest_chg_top5 持空单前5会员持空单变化总和 int vol_top10 成交量前10会员成交量总和 int ... date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None dce_var = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] shfe_var = [i for i in vars_list if i in cons.market_exchange_symbols['shfe']] czce_var = [i for i in vars_list if i in cons.market_exchange_symbols['czce']] cffex_var = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']] big_dict = {} if len(dce_var) > 0: data = get_dce_rank_table(date, dce_var) if data is False: return False big_dict.update(data) if len(shfe_var) > 0: data = get_shfe_rank_table(date, shfe_var) if data is False: return False big_dict.update(data) if len(czce_var) > 0: data = get_czce_rank_table(date, czce_var) if data is False: return False big_dict.update(data) if len(cffex_var) > 0: data = get_cffex_rank_table(date, cffex_var) if data is False: return False big_dict.update(data) records = pd.DataFrame() for symbol, table in big_dict.items(): table = table.applymap(lambda x: 0 if x == '' else x) for symbol_inner in set(table['symbol']): var = symbol_varieties(symbol_inner) if var in vars_list: table_cut = table[table['symbol'] == symbol_inner] table_cut['rank'] = table_cut['rank'].astype('float') table_cut_top5 = table_cut[table_cut['rank'] <= 5] table_cut_top10 = table_cut[table_cut['rank'] <= 10] table_cut_top15 = table_cut[table_cut['rank'] <= 15] table_cut_top20 = table_cut[table_cut['rank'] <= 20] big_dict = {'symbol': symbol_inner, 'variety': var, 'vol_top5': table_cut_top5['vol'].sum(), 'vol_chg_top5': table_cut_top5['vol_chg'].sum(), 'long_open_interest_top5': table_cut_top5['long_open_interest'].sum(), 'long_open_interest_chg_top5': table_cut_top5['long_open_interest_chg'].sum(), 'short_open_interest_top5': table_cut_top5['short_open_interest'].sum(), 'short_open_interest_chg_top5': table_cut_top5['short_open_interest_chg'].sum(), 'vol_top10': table_cut_top10['vol'].sum(), 'vol_chg_top10': table_cut_top10['vol_chg'].sum(), 'long_open_interest_top10': table_cut_top10['long_open_interest'].sum(), 'long_open_interest_chg_top10': table_cut_top10['long_open_interest_chg'].sum(), 'short_open_interest_top10': table_cut_top10['short_open_interest'].sum(), 'short_open_interest_chg_top10': table_cut_top10['short_open_interest_chg'].sum(), 'vol_top15': table_cut_top15['vol'].sum(), 'vol_chg_top15': table_cut_top15['vol_chg'].sum(), 'long_open_interest_top15': table_cut_top15['long_open_interest'].sum(), 'long_open_interest_chg_top15': table_cut_top15['long_open_interest_chg'].sum(), 'short_open_interest_top15': table_cut_top15['short_open_interest'].sum(), 'short_open_interest_chg_top15': table_cut_top15['short_open_interest_chg'].sum(), 'vol_top20': table_cut_top20['vol'].sum(), 'vol_chg_top20': table_cut_top20['vol_chg'].sum(), 'long_open_interest_top20': table_cut_top20['long_open_interest'].sum(), 'long_open_interest_chg_top20': table_cut_top20['long_open_interest_chg'].sum(), 'short_open_interest_top20': table_cut_top20['short_open_interest'].sum(), 'short_open_interest_chg_top20': table_cut_top20['short_open_interest_chg'].sum(), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(big_dict, index=[0])) if len(big_dict.items()) > 0: add_vars = [i for i in cons.market_exchange_symbols['shfe'] + cons.market_exchange_symbols['cffex'] if i in records['variety'].tolist()] for var in add_vars: records_cut = records[records['variety'] == var] var_record = pd.DataFrame(records_cut.sum()).T var_record['date'] = date.strftime('%Y%m%d') var_record.loc[:, ['variety', 'symbol']] = var records = records.append(var_record) return records.reset_index(drop=True)
def get_czce_rank_table(date=None, vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2005, 5, 9): print("czce数据源开始日期为20050509,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0) r = requests_link(url, 'utf-8') r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml', from_encoding="gb2312") symbols = [] for link in soup.find_all('b'): strings = (str(link).split(' ')) if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] symbols.append(symbol) big_dict = {} for i in range(len(symbols)): symbol = symbols[i] table_cut = data[i + 2] table_cut.columns = rank_columns table_cut = table_cut.iloc[:-1, :] table_cut.loc[:, 'rank'] = table_cut.index table_cut.loc['合计', 'rank'] = 999 table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None table_cut.loc[:, 'symbol'] = symbol table_cut.loc[:, 'variety'] = symbol_varieties(symbol) table_cut[intColumns] = table_cut[intColumns].fillna(0) table_cut[intColumns] = table_cut[intColumns].astype(str) table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', '')) table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x) table_cut[intColumns] = table_cut[intColumns].astype(float) table_cut[intColumns] = table_cut[intColumns].astype(int) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict elif date <= datetime.date(2015, 11, 11): url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[1] elif date < datetime.date(2017, 12, 28): url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=1)[0] else: url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0)[0] if len(data.columns) < 6: return {} table = pd.DataFrame(data.iloc[:, :9]) table.columns = rank_columns table.loc[:, 'rank'] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', '')) table = table.applymap(lambda x: 0 if x == '-' else x) indexes = [i for i in table.index if '合约' in i or '品种' in i] indexes.insert(0, 0) big_dict = {} for i in range(len(indexes)): if indexes[i] == 0: table_cut = table.loc[:indexes[i + 1], :] string = table_cut.index.name elif i < len(indexes) - 1: table_cut = table.loc[indexes[i]:indexes[i + 1], :] string = table_cut.index[0] else: table_cut = table.loc[indexes[i]:, :] string = table_cut.index[0] if 'PTA' in string: symbol = 'TA' else: try: symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0])) except: symbol = re.compile(r':(.*) ').findall(string)[0] var = symbol_varieties(symbol) if var in vars_list: table_cut = table_cut.dropna(how='any').iloc[1:, :] table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :] table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols): """ 上海期货交易所前 20 会员持仓排名数据明细 注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名 数据从20020107开始,每交易日16:30左右更新数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2002, 1, 7): print("shfe数据源开始日期为20020107,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d')) r = requests_link(url, 'utf-8') try: context = json.loads(r.text) except: return {} df = pd.DataFrame(context['o_cursor']) df = df.rename( columns={'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg', 'CJ3': 'short_open_interest', 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name', 'PARTICIPANTABBR2': 'long_party_name', 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank', 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2'}) if len(df.columns) < 3: return {} df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) df = df.applymap(lambda x: None if x == '' else x) df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x)) df = df[df['rank'] > 0] for col in ['PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2']: try: del df[col] except: pass get_vars = [var for var in vars_list if var in df['variety'].tolist()] big_dict = {} for var in get_vars: df_var = df[df['variety'] == var] for symbol in set(df_var['symbol']): df_symbol = df_var[df_var['symbol'] == symbol] big_dict[symbol] = df_symbol.reset_index(drop=True) return big_dict