def get_czce_receipt_1(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取郑州商品交易所注册仓单数据 适用20080222至20100824(包括) :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :type date: str :param vars_list: list :type vars_list: 合约品种如CF、TA等列表 为空时为所有商品 :return: 展期收益率数据 :rtype: pandas.DataFrame var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None if date == '20090820': return pd.DataFrame() url = cons.CZCE_RECEIPT_URL_1 % date r = requests_link(url, encoding='utf-8') context = r.text data = pd.read_html(context)[1] records = pd.DataFrame() indexes = [x for x in data.index if '品种:' in str(data[0].tolist()[x])] ends = [x for x in data.index if '总计' in str(data[0].tolist()[x])] for i in list(range(len(indexes))): if i != len(indexes) - 1: data_cut = data.loc[indexes[i]:ends[i], :] data_cut = data_cut.fillna(method='pad') else: data_cut = data.loc[indexes[i]:, :] data_cut = data_cut.fillna(method='pad') if 'PTA' in data_cut[0].tolist()[0]: var = 'TA' else: var = chinese_to_english( re.sub(r'[A-Z]+', '', data_cut[0].tolist()[0][3:])) if var == 'CF': receipt = data_cut[6].tolist()[-1] receipt_chg = data_cut[7].tolist()[-1] else: receipt = data_cut[5].tolist()[-1] receipt_chg = data_cut[6].tolist()[-1] data_dict = { 'var': var, 'receipt': int(receipt), 'receipt_chg': int(receipt_chg), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_cffex_rank_table(date="20200427", vars_list=cons.contract_symbols): """ 中国金融期货交易所前 20 会员持仓排名数据明细 注:该交易所既公布品种排名,也公布标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20100416开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['cffex']] date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2010, 4, 16): print(Exception("cffex数据源开始日期为20100416,跳过")) return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} big_dict = {} for var in vars_list: # print(var) # var = "IF" url = cons.CFFEX_VOL_RANK_URL % (date.strftime('%Y%m'), date.strftime('%d'), var) r = requests_link(url, encoding='gbk') if not r: return False if '网页错误' not in r.text: try: temp_chche = StringIO(r.text.split('\n交易日,')[1]) except: temp_chche = StringIO(r.text.split('\n交易日,')[0][4:]) # 20200316开始数据结构变化,统一格式 table = pd.read_csv(temp_chche) table = table.dropna(how='any') table = table.applymap(lambda x: x.strip() if isinstance(x, str) else x) for symbol in set(table['合约']): table_cut = table[table['合约'] == symbol] table_cut.columns = ['symbol', 'rank'] + rank_columns table_cut = _table_cut_cal(pd.DataFrame(table_cut), symbol) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict
def _czce_df_read(url, skip_rows, encoding='utf-8', header=0): """ 郑州商品交易所的网页数据 :param header: :type header: :param url: 网站 string :param skip_rows: 去掉前几行 int :param encoding: utf-8 or gbk or gb2312 :return: pd.DataFrame """ r = requests_link(url, encoding) data = pd.read_html(r.text, match='.+', flavor=None, header=header, index_col=0, skiprows=skip_rows, attrs=None, parse_dates=False, thousands=', ', encoding="gbk", decimal='.', converters=None, na_values=None, keep_default_na=True) return data
def get_shfe_receipt_2(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取上海商品交易所注册仓单数据 适用20140519(包括)至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如RB、AL等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None url = cons.SHFE_RECEIPT_URL_2 % date r = requests_link(url, encoding='utf-8') try: context = r.json() except: return pd.DataFrame() data = pd.DataFrame(context['o_cursor']) if len(data.columns) < 1: return pd.DataFrame() records = pd.DataFrame() for var in set(data['VARNAME'].tolist()): data_cut = data[data['VARNAME'] == var] data_dict = { 'var': chinese_to_english(re.sub(r"\W|[a-zA-Z]", "", var)), 'receipt': int(data_cut['WRTWGHTS'].tolist()[-1]), 'receipt_chg': int(data_cut['WRTCHANGE'].tolist()[-1]), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_shfe_v_wap(date="20200416"): """ 获取上期所日成交均价数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 Return ------- DataFrame 郑商所日交易数据(DataFrame): symbol 合约代码 date 日期 time_range v_wap时段,分09:00-10:15和09:00-15:00两类 v_wap 加权平均成交均价 或 None(给定日期没有数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: json_data = json.loads( requests_link( cons.SHFE_V_WAP_URL % (day.strftime("%Y%m%d")), headers=cons.headers, encoding="utf-8", ).text ) except requests.HTTPError as reason: if reason.response not in [404, 403]: print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason) return None if len(json_data["o_currefprice"]) == 0: return None try: df = pd.DataFrame(json_data["o_currefprice"]) df["INSTRUMENTID"] = df["INSTRUMENTID"].str.strip() df[":B1"].astype("int16") return df.rename(columns=cons.SHFE_V_WAP_COLUMNS)[ list(cons.SHFE_V_WAP_COLUMNS.values()) ] except: return None
def _czce_df_read(url, skip_rows, encoding='utf-8', header=0): """ 郑州商品交易所的网页数据 :param header: :type header: :param url: 网站 string :param skip_rows: 去掉前几行 int :param encoding: utf-8 or gbk or gb2312 :return: pd.DataFrame """ headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36", "Host": "www.czce.com.cn", "Cookie": "XquW6dFMPxV380S=CAaD3sMkdXv3fUoaJlICIEv0MVegGq5EoMyBcxkOjCgSjmpuovYFuTLtYFcxTZGw; XquW6dFMPxV380T=5QTTjUlA6f6WiDO7fMGmqNxHBWz.hKIc8lb_tc1o4nHrJM4nsXCAI9VHaKyV_jkHh4cIVvD25kGQAh.MvLL1SHRA20HCG9mVVHPhAzktNdPK3evjm0NYbTg2Gu_XGGtPhecxLvdFQ0.JlAxy_z0C15_KdO8kOI18i4K0rFERNPxjXq5qG1Gs.QiOm976wODY.pe8XCQtAsuLYJ.N4DpTgNfHJp04jhMl0SntHhr.jhh3dFjMXBx.JEHngXBzY6gQAhER7uSKAeSktruxFeuKlebse.vrPghHqWvJm4WPTEvDQ8q", } r = requests_link(url, encoding, headers=headers) data = pd.read_html(r.text, match='.+', flavor=None, header=header, index_col=0, skiprows=skip_rows, attrs=None, parse_dates=False, thousands=', ', encoding="gbk", decimal='.', converters=None, na_values=None, keep_default_na=True) return data
def get_dce_rank_table(date="20180404", vars_list=cons.contract_symbols): """ 大连商品交易所前 20 会员持仓排名数据明细 注: 该交易所既公布品种排名, 也公布标的合约排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date 对象, 为空时为当天 :param vars_list: 合约品种如 RB、AL等列表为空时为所有商品, 数据从 20060104 开始,每交易日 16:30 左右更新数据 :return: pandas.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2006, 1, 4): print(Exception("大连商品交易所数据源开始日期为20060104,跳过")) return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} vars_list = [i for i in vars_list if i in cons.market_exchange_symbols['dce']] big_dict = {} for var in vars_list: url = cons.DCE_VOL_RANK_URL % (var.lower(), var.lower(), date.year, date.month - 1, date.day) list_60_name = [] list_60 = [] list_60_chg = [] rank = [] texts = requests_link(url).content.splitlines() if not texts: return False if len(texts) > 30: for text in texts: line = text.decode("utf-8") string_list = line.split() try: if int(string_list[0]) <= 20: list_60_name.append(string_list[1]) list_60.append(string_list[2]) list_60_chg.append(string_list[3]) rank.append(string_list[0]) except: pass table_cut = pd.DataFrame({'rank': rank[0:20], 'vol_party_name': list_60_name[0:20], 'vol': list_60[0:20], 'vol_chg': list_60_chg[0:20], 'long_party_name': list_60_name[20:40], 'long_open_interest': list_60[20:40], 'long_open_interest_chg': list_60_chg[20:40], 'short_party_name': list_60_name[40:60], 'short_open_interest': list_60[40:60], 'short_open_interest_chg': list_60_chg[40:60] }) table_cut = table_cut.applymap(lambda x: x.replace(',', '')) table_cut = _table_cut_cal(table_cut, var) big_dict[var] = table_cut.reset_index(drop=True) return big_dict
def get_czce_rank_table(date="20081015", vars_list=cons.contract_symbols): """ 郑州商品交易所前 20 会员持仓排名数据明细 注:该交易所既公布了品种排名, 也公布了标的排名 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品, 数据从20050509开始,每交易日16:30左右更新数据 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2005, 5, 9): print("czce数据源开始日期为20050509,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} if date <= datetime.date(2010, 8, 25): url = cons.CZCE_VOL_RANK_URL_1 % (date.strftime('%Y%m%d')) r = requests.get(url) r.encoding = "utf-8" soup = BeautifulSoup(r.text, "lxml") data = _czce_df_read(url, skip_rows=0) r = requests_link(url, 'utf-8') r.encoding = 'utf-8' symbols = [] for link in soup.find_all('b'): strings = (str(link).split(' ')) if len(strings) > 5: try: symbol = chinese_to_english(strings[4]) except: symbol = strings[4] if symbol == "SR905日期:": symbol = "SR905" symbols.append(symbol) big_dict = {} for i in range(len(symbols)): symbol = symbols[i] table_cut = data[i + 1] table_cut.columns = rank_columns table_cut = table_cut.iloc[:-1, :] table_cut.loc[:, 'rank'] = table_cut.index table_cut.loc['合计', 'rank'] = 999 table_cut.loc['合计', ['vol_party_name', 'long_party_name', 'short_party_name']] = None table_cut.loc[:, 'symbol'] = symbol table_cut.loc[:, 'variety'] = symbol_varieties(symbol) table_cut[intColumns] = table_cut[intColumns].fillna(0) table_cut[intColumns] = table_cut[intColumns].astype(str) table_cut[intColumns] = table_cut[intColumns].applymap(lambda x: x.replace(',', '')) table_cut = table_cut.applymap(lambda x: 0 if x == '-' else x) table_cut[intColumns] = table_cut[intColumns].astype(float) table_cut[intColumns] = table_cut[intColumns].astype(int) big_dict[symbol] = table_cut.reset_index(drop=True) return big_dict elif date <= datetime.date(2015, 11, 11): # 20200311 格式修正 url = cons.CZCE_VOL_RANK_URL_2 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0, header=None)[3:] big_df = pd.DataFrame() for item in data: big_df = pd.concat([big_df, item], axis=0, ignore_index=False) big_df.columns = big_df.iloc[0, :].tolist() data = big_df.iloc[1:, :] elif date < datetime.date(2017, 12, 28): # 20200311 格式修正 url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0, header=0)[1] else: url = cons.CZCE_VOL_RANK_URL_3 % (date.year, date.strftime('%Y%m%d')) data = _czce_df_read(url, skip_rows=0)[0] if len(data.columns) < 6: return {} table = pd.DataFrame(data.iloc[:, :9]) table.index.name = table.columns[0] table.columns = rank_columns table.loc[:, 'rank'] = table.index table[intColumns] = table[intColumns].astype(str) table[intColumns] = table[intColumns].applymap(lambda x: x.replace(',', '')) table = table.applymap(lambda x: 0 if x == '-' else x) indexes = [i for i in table.index if '合约' in i or '品种' in i] indexes.insert(0, 0) big_dict = {} for i in range(len(indexes)): if indexes[i] == 0: table_cut = table.loc[:indexes[i + 1], :] string = table_cut.index.name elif i < len(indexes) - 1: table_cut = table.loc[indexes[i]:indexes[i + 1], :] string = table_cut.index[0] else: table_cut = table.loc[indexes[i]:, :] string = table_cut.index[0] if 'PTA' in string: symbol = 'TA' else: try: symbol = chinese_to_english(find_chinese(re.compile(r':(.*) ').findall(string)[0])) except: symbol = re.compile(r':(.*) ').findall(string)[0] var = symbol_varieties(symbol) if var in vars_list: table_cut = table_cut.dropna(how='any').iloc[1:, :] table_cut = table_cut.loc[[x for x in table_cut.index if x in [str(i) for i in range(21)]], :] table_cut = _table_cut_cal(table_cut, symbol) big_dict[symbol.strip()] = table_cut.reset_index(drop=True) return big_dict
def get_shfe_rank_table(date=None, vars_list=cons.contract_symbols): """ 上海期货交易所前 20 会员持仓排名数据明细 注:该交易所只公布每个品种内部的标的排名,没有公布品种的总排名 数据从20020107开始,每交易日16:30左右更新数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame rank 排名 int vol_party_name 成交量排序的当前名次会员 string(中文) vol 该会员成交量 int vol_chg 该会员成交量变化量 int long_party_name 持多单排序的当前名次会员 string(中文) long_open_interest 该会员持多单 int long_open_interest_chg 该会员持多单变化量 int short_party_name 持空单排序的当前名次会员 string(中文) short_open_interest 该会员持空单 int short_open_interest_chg 该会员持空单变化量 int symbol 标的合约 string var 品种 string date 日期 string YYYYMMDD """ date = cons.convert_date(date) if date is not None else datetime.date.today() if date < datetime.date(2002, 1, 7): print("shfe数据源开始日期为20020107,跳过") return {} if date.strftime('%Y%m%d') not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return {} url = cons.SHFE_VOL_RANK_URL % (date.strftime('%Y%m%d')) r = requests_link(url, 'utf-8') try: context = json.loads(r.text) except: return {} df = pd.DataFrame(context['o_cursor']) df = df.rename( columns={'CJ1': 'vol', 'CJ1_CHG': 'vol_chg', 'CJ2': 'long_open_interest', 'CJ2_CHG': 'long_open_interest_chg', 'CJ3': 'short_open_interest', 'CJ3_CHG': 'short_open_interest_chg', 'PARTICIPANTABBR1': 'vol_party_name', 'PARTICIPANTABBR2': 'long_party_name', 'PARTICIPANTABBR3': 'short_party_name', 'PRODUCTNAME': 'product1', 'RANK': 'rank', 'INSTRUMENTID': 'symbol', 'PRODUCTSORTNO': 'product2'}) if len(df.columns) < 3: return {} df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x) df = df.applymap(lambda x: None if x == '' else x) df['variety'] = df['symbol'].apply(lambda x: symbol_varieties(x)) df = df[df['rank'] > 0] for col in ['PARTICIPANTID1', 'PARTICIPANTID2', 'PARTICIPANTID3', 'product1', 'product2']: try: del df[col] except: pass get_vars = [var for var in vars_list if var in df['variety'].tolist()] big_dict = {} for var in get_vars: df_var = df[df['variety'] == var] for symbol in set(df_var['symbol']): df_symbol = df_var[df_var['symbol'] == symbol] big_dict[symbol] = df_symbol.reset_index(drop=True) return big_dict
def get_dce_daily(date="20200416", symbol_type="futures", retries=0): """ 大连商品交易所日交易数据 Parameters ------ date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 symbol_type: 数据类型, 为'futures'期货 或 'option'期权二者之一 retries: int, 当前重试次数,达到3次则获取数据失败 Return ------- DataFrame 大商所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 DataFrame 郑商所每日期权交易数据 symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 pre_settle 前结算价 settle 结算价 delta 对冲值 volume 成交量 open_interest 持仓量 oi_change 持仓变化 turnover 成交额 implied_volatility 隐含波动率 exercise_volume 行权量 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None if retries > 3: print("maximum retires for DCE market data: ", day.strftime("%Y%m%d")) return if symbol_type == "futures": url = ( cons.DCE_DAILY_URL + "?" + urllib.parse.urlencode( { "currDate": day.strftime("%Y%m%d"), "year": day.strftime("%Y"), "month": str(int(day.strftime("%m")) - 1), "day": day.strftime("%d"), } ) ) listed_columns = cons.DCE_COLUMNS output_columns = cons.OUTPUT_COLUMNS elif symbol_type == "option": url = ( cons.DCE_DAILY_URL + "?" + urllib.parse.urlencode( { "currDate": day.strftime("%Y%m%d"), "year": day.strftime("%Y"), "month": str(int(day.strftime("%m")) - 1), "day": day.strftime("%d"), "dayQuotes.trade_type": "1", } ) ) listed_columns = cons.DCE_OPTION_COLUMNS output_columns = cons.OPTION_OUTPUT_COLUMNS else: print( "invalid symbol_type :" + symbol_type + ', should be one of "futures" or "option"' ) return try: response = requests_link(url, method="post", headers=cons.headers).text except requests.exceptions.ContentDecodingError as reason: return get_dce_daily(day, retries=retries + 1) except requests.exceptions.HTTPError as reason: if reason.response == 504: return get_dce_daily(day, retries=retries + 1) elif reason.response != 404: print(cons.DCE_DAILY_URL, reason) return if "错误:您所请求的网址(URL)无法获取" in response: return get_dce_daily(day, retries=retries + 1) elif "暂无数据" in response: return data = BeautifulSoup(response, "html.parser").find_all("tr") if len(data) == 0: return dict_data = list() implied_data = list() for i_data in data[1:]: if "小计" in i_data.text or "总计" in i_data.text: continue x = i_data.find_all("td") if symbol_type == "futures": row_dict = {"variety": cons.DCE_MAP[x[0].text.strip()]} row_dict["symbol"] = row_dict["variety"] + x[1].text.strip() for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if "-" in field_content: row_dict[field] = 0 elif field in ["volume", "open_interest"]: row_dict[field] = int(field_content.replace(",", "")) else: row_dict[field] = float(field_content.replace(",", "")) dict_data.append(row_dict) elif len(x) == 16: m = cons.FUTURES_SYMBOL_PATTERN.match(x[1].text.strip()) if not m: continue row_dict = { "symbol": x[1].text.strip(), "variety": m.group(1).upper(), "contract_id": m.group(0), } for i, field in enumerate(listed_columns): field_content = x[i + 2].text.strip() if "-" in field_content: row_dict[field] = 0 elif field in ["volume", "open_interest"]: row_dict[field] = int(field_content.replace(",", "")) else: row_dict[field] = float(field_content.replace(",", "")) dict_data.append(row_dict) elif len(x) == 2: implied_data.append( { "contract_id": x[0].text.strip(), "implied_volatility": float(x[1].text.strip()), } ) df = pd.DataFrame(dict_data) df["date"] = day.strftime("%Y%m%d") if symbol_type == "futures": return df[output_columns] else: return pd.merge( df, pd.DataFrame(implied_data), on="contract_id", how="left", indicator=False, )[output_columns]
def get_shfe_daily(date="20200416"): """ 上海期货交易所-日频率-量价数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 默认为当前交易日 :type date: str or datetime.date :return: 上海期货交易所-日频率-量价数据 :rtype: pandas.DataFrame or None 上期所日交易数据(DataFrame): symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定交易日没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: json_data = json.loads( requests_link( cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), headers=cons.shfe_headers, ).text ) except requests.HTTPError as reason: if reason.response != 404: print(cons.SHFE_DAILY_URL % (day.strftime("%Y%m%d")), reason) return if len(json_data["o_curinstrument"]) == 0: return df = pd.DataFrame( [ row for row in json_data["o_curinstrument"] if row["DELIVERYMONTH"] not in ["小计", "合计"] and row["DELIVERYMONTH"] != "" ] ) df["variety"] = df.PRODUCTID.str.slice(0, -6).str.upper() df["symbol"] = df["variety"] + df["DELIVERYMONTH"] df["date"] = day.strftime("%Y%m%d") v_wap_df = get_shfe_v_wap(day) if v_wap_df is not None: df = pd.merge( df, v_wap_df[v_wap_df.time_range == "9:00-15:00"], on=["date", "symbol"], how="left", ) df["turnover"] = df.v_wap * df.VOLUME else: df["VOLUME"] = df["VOLUME"].apply(lambda x: 0 if x == "" else x) df["turnover"] = df["VOLUME"] * df["SETTLEMENTPRICE"] df.rename(columns=cons.SHFE_COLUMNS, inplace=True) return df[cons.OUTPUT_COLUMNS]
def get_cffex_daily(date="20200416"): """ 中国金融期货交易所日交易数据 :param date: 日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象; 为空时为当天 :return: pandas.DataFrame 中国金融期货交易所日: symbol 合约代码 date 日期 open 开盘价 high 最高价 low 最低价 close 收盘价 volume 成交量 open_interest 持仓量 turnover 成交额 settle 结算价 pre_settle 前结算价 variety 合约类别 或 None(给定日期没有交易数据) """ day = cons.convert_date(date) if date is not None else datetime.date.today() if day.strftime("%Y%m%d") not in calendar: warnings.warn("%s非交易日" % day.strftime("%Y%m%d")) return None try: html = requests_link( cons.CFFEX_DAILY_URL.format( day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d") ), encoding="gbk", headers=cons.headers, ).text except requests.exceptions.HTTPError as reason: if reason.response != 404: print( cons.CFFEX_DAILY_URL % (day.strftime("%Y%m"), day.strftime("%d"), day.strftime("%Y%m%d")), reason, ) return if html.find("网页错误") >= 0: return html = [ i.replace(" ", "").split(",") for i in html.split("\n")[:-2] if i[0][0] != "小" ] if html[0][0] != "合约代码": return dict_data = list() day_const = day.strftime("%Y%m%d") for row in html[1:]: m = cons.FUTURES_SYMBOL_PATTERN.match(row[0]) if not m: continue row_dict = {"date": day_const, "symbol": row[0], "variety": m.group(1)} for i, field in enumerate(cons.CFFEX_COLUMNS): if row[i + 1] == "": row_dict[field] = 0.0 elif field in ["volume", "open_interest", "oi_chg"]: row_dict[field] = int(row[i + 1]) else: try: row_dict[field] = float(row[i + 1]) except: pass row_dict["pre_settle"] = row_dict["close"] - row_dict["change1"] dict_data.append(row_dict) return pd.DataFrame(dict_data)[cons.OUTPUT_COLUMNS]
def get_czce_receipt_3(date: str = None, vars_list: List = cons.contract_symbols): """ 郑州商品交易所注册仓单数据 适用 20151112-至今 Parameters ------ date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 vars_list: 合约品种如CF、TA等列表 为空时为所有商品 Return ------- DataFrame: 展期收益率数据(DataFrame):`1 var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn('%s非交易日' % date.strftime('%Y%m%d')) return None url = cons.CZCE_RECEIPT_URL_3 % (date[:4], date) r = requests_link(url, encoding='utf-8') r.encoding = 'utf-8' data = pd.read_html(r.text, encoding='gb2312') records = pd.DataFrame() if len(data) < 4: return records if int(date) <= 20171227: data = data[1:] for data_cut in data: if len(data_cut.columns) > 3 and len(data_cut.index) > 7: last_indexes = [ x for x in data_cut.index if '注:' in str(data_cut[0].tolist()[x]) ] if len(last_indexes) > 0: last_index = last_indexes[0] - 1 data_cut = data_cut.loc[:last_index, :] if 'PTA' in data_cut[0].tolist()[0]: var = 'TA' else: strings = data_cut[0].tolist()[0] string = strings.split(' ')[0][3:] if len(string) > 7: continue print(string) var = chinese_to_english(re.sub('[A-Z]+', '', string)) data_cut.columns = data_cut.loc[1, :] data_cut = data_cut.fillna(method='pad') try: receipt = data_cut.loc[:, '仓单数量'].tolist()[-1] except: receipt = data_cut.loc[:, '仓单数量(保税)'].tolist()[-1] receipt_chg = data_cut.loc[:, '当日增减'].tolist()[-1] data_dict = { 'var': var, 'receipt': int(receipt), 'receipt_chg': int(receipt_chg), 'date': date } records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)