def futures_spot_price_previous(date: str = "20110110") -> pd.DataFrame: """ #TODO 修改该接口并观察历史数据能否获取 :param date: :type date: :return: :rtype: """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2011, 1, 4): raise Exception("数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后") if date.strftime("%Y%m%d") not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None url = date.strftime('http://www.100ppi.com/sf2/day-%Y-%m-%d.html') content = pandas_read_html_link(url) main = content[1] # Header header = _join_head(main) # Values values = main[main[4].str.endswith('%')] values.columns = header # Basis basis = pd.concat(content[2:-1]) basis.columns = ['主力合约基差', '主力合约基差(%)'] basis['商品'] = values['商品'].tolist() basis = pd.merge(values[["商品", "现货价格", "主力合约代码", "主力合约价格"]], basis) basis = pd.merge( basis, values[["商品", "180日内主力基差最高", "180日内主力基差最低", "180日内主力基差平均"]]) return basis
def futures_spot_price(date: str = "20200110", vars_list=cons.contract_symbols): """ 某个交易日大宗商品现货价格及相应基差 :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pandas.DataFrame 展期收益率数据: var 商品品种 string sp 现货价格 float near_symbol 临近交割合约 string near_price 临近交割合约结算价 float dom_symbol 主力合约 string dom_price 主力合约结算价 float near_basis 临近交割合约相对现货的基差 float dom_basis 主力合约相对现货的基差 float near_basis_rate 临近交割合约相对现货的基差率 float dom_basis_rate 主力合约相对现货的基差率 float date 日期 string YYYYMMDD """ date = cons.convert_date( date) if date is not None else datetime.date.today() if date < datetime.date(2011, 1, 4): raise Exception("数据源开始日期为 20110104, 请将获取数据时间点设置在 20110104 后") if date.strftime("%Y%m%d") not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None u1 = cons.SYS_SPOT_PRICE_LATEST_URL u2 = cons.SYS_SPOT_PRICE_URL.format(date.strftime("%Y-%m-%d")) i = 1 while True: for url in [u2, u1]: try: # url = u2 r = pandas_read_html_link(url) string = r[0].loc[1, 1] news = "".join(re.findall(r"[0-9]", string)) if news[3:11] == date.strftime("%Y%m%d"): records = _check_information(r[1], date) records.index = records["symbol"] var_list_in_market = [ i for i in vars_list if i in records.index ] temp_df = records.loc[var_list_in_market, :] temp_df.reset_index(drop=True, inplace=True) return temp_df else: time.sleep(3) except: print( f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败,第{str(i)}次尝试,最多5次") i += 1 if i > 5: print( f"{date.strftime('%Y-%m-%d')}日生意社数据连接失败, 如果当前交易日是 2018-09-12, 由于生意社源数据缺失, 无法访问, 否则为重复访问已超过5次,您的地址被网站墙了,请保存好返回数据,稍后从该日期起重试" ) return False
def get_shfe_receipt_1(date: str = None, vars_list: List = cons.contract_symbols): """ 抓取上海期货交易所注册仓单数据, 适用20081006至20140518(包括) 20100126、20101029日期交易所格式混乱,直接回复脚本中DataFrame, 20100416、20130821日期交易所数据丢失 :param date: 开始日期 format:YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象 为空时为当天 :param vars_list: 合约品种如RB、AL等列表 为空时为所有商品 :return: pd.DataFrame 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ date = cons.convert_date(date).strftime( '%Y%m%d') if date is not None else datetime.date.today() if date not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None if date == '20100126': shfe_20100126['date'] = date return shfe_20100126 elif date == '20101029': shfe_20101029['date'] = date return shfe_20101029 elif date in ['20100416', '20130821']: return warnings.warn('20100416、20130821日期交易所数据丢失') else: var_list = [ '天然橡胶', '沥青仓库', '沥青厂库', '热轧卷板', '燃料油', '白银', '线材', '螺纹钢', '铅', '铜', '铝', '锌', '黄金', '锡', '镍' ] url = cons.SHFE_RECEIPT_URL_1 % date data = pandas_read_html_link(url)[0] indexes = [x for x in data.index if (data[0].tolist()[x] in var_list)] last_index = [ x for x in data.index if '注' in str(data[0].tolist()[x]) ][0] - 1 records = pd.DataFrame() for i in list(range(len(indexes))): if i != len(indexes) - 1: data_cut = data.loc[indexes[i]:indexes[i + 1] - 1, :] else: data_cut = data.loc[indexes[i]:last_index, :] data_cut = data_cut.fillna(method='pad') data_dict = dict() data_dict['var'] = chinese_to_english(data_cut[0].tolist()[0]) data_dict['receipt'] = int(data_cut[2].tolist()[-1]) data_dict['receipt_chg'] = int(data_cut[3].tolist()[-1]) data_dict['date'] = date records = records.append(pd.DataFrame(data_dict, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in vars_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)
def get_dce_receipt(date: str = None, symbol_list: List = cons.contract_symbols): """ 大连商品交易所注册仓单数据 :param date: format 开始日期: YYYY-MM-DD 或 YYYYMMDD 或 datetime.date对象, 为空时为当天 :param symbol_list: 合约品种如 RB, AL等列表, 为空时为所有商品数据从 20060106开始,每周五更新仓单数据。直到20090407起,每交易日都更新仓单数据 :return: pd.DataFrame 展期收益率数据(DataFrame): var 商品品种 string receipt 仓单数 int date 日期 string YYYYMMDD """ if not isinstance(symbol_list, list): return warnings.warn(f"symbol_list: 必须是列表") date = cons.convert_date( date) if date is not None else datetime.date.today() if date.strftime('%Y%m%d') not in calendar: warnings.warn(f"{date.strftime('%Y%m%d')}非交易日") return None payload = { "weekQuotes.variety": "all", "year": date.year, "month": date.month - 1, # 网站月份描述少 1 个月, 属于网站问题 "day": date.day } data = pandas_read_html_link(cons.DCE_RECEIPT_URL, method="post", data=payload, headers=cons.dce_headers)[0] records = pd.DataFrame() for x in data.to_dict(orient='records'): if isinstance(x['品种'], str): if x['品种'][-2:] == '小计': var = x['品种'][:-2] temp_data = { 'var': chinese_to_english(var), 'receipt': int(x['今日仓单量']), 'receipt_chg': int(x['增减']), 'date': date.strftime('%Y%m%d') } records = records.append(pd.DataFrame(temp_data, index=[0])) if len(records.index) != 0: records.index = records['var'] vars_in_market = [i for i in symbol_list if i in records.index] records = records.loc[vars_in_market, :] return records.reset_index(drop=True)