def get_receipt_from_shfe(date_str): url_template = "http://www.shfe.com.cn/data/dailydata/kx/pm{}.dat" url = url_template.format(date_str, encoding='unicode') text = get_html_text(url) try: json_obj = json.loads(text) except: log.warning( 'Get {} receipt data fail. Status code: {}. Url: {}'.format( text, url)) return [] df = pd.DataFrame(json_obj['o_cursor']) data = df[['PRODUCTNAME', 'INSTRUMENTID', 'CJ2', 'CJ3']] x = df.loc[df['RANK'] > 20, ['PRODUCTNAME', 'INSTRUMENTID', 'CJ2', 'CJ3']] data = pd.DataFrame(x.values, index=[pd.to_datetime('20130315')] * len(x), columns=['品种', '合约代码', '持买单量', '持卖单量']) for idx, l in enumerate(json_obj['o_cursor']): if not re.match(r'\S+?\$\$Total$', l['WHABBRNAME']): continue data.append([ tradingday, l['VARNAME'].split('$$')[0], l['WRTWGHTS'], l['WRTCHANGE'] ]) return data
def get_inventory_from_shfe(date_str): """ 上海期货交易所指定交割仓库库存周报 :param date_str: str of datetime :return: list """ url_template = "http://www.shfe.com.cn/data/dailydata/{}weeklystock.dat" url = url_template.format(date_str) text = get_html_text(url, encoding='unicode') try: json_obj = json.loads(text) except: log.warning( 'Get {} inventory data fail. Status code: {}. Url: {}'.format( text, url)) return [] tradingday = json_obj['o_tradingday'] data = [] for idx, l in enumerate(json_obj['o_cursor']): if not re.match(r'\S+?\$\$Total$', l['WHABBRNAME']): continue data.append([ tradingday, l['VARNAME'].split('$$')[0], l['PRESPOTWGHTS'], l['PREWRTWGHTS'], l['SPOTWGHTS'], l['WRTWGHTS'], l['SPOTCHANGE'], l['WRTCHANGE'], l['PREWHSTOCKS'], l['WHSTOCKS'], l['WHSTOCKCHANGE'] ]) return data, text
def download_czce_receipt_by_date(date: datetime): """ 抓取郑州商品交易所注册仓单数据, 20080215,20080222,20080229 是周报 20080303(包括)至20100824(包括) 20090820数据不存在 'http://www.czce.com.cn/cn/exchange/jyxx/sheet/sheet20100824.html' 20100825(包括)至20150930(包括) 'http://www.czce.com.cn/cn/exchange/%s/datawhsheet/20150930.htm' http://www.czce.com.cn/cn/exchange/2015/datawhsheet/20150930.txt http://www.czce.com.cn/cn/exchange/2015/datawhsheet/20150930.xls 20151008(包括)至今 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/2015/20151008/FutureDataWhsheet.htm' 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/2015/20151112/FutureDataWhsheet.txt' 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/2015/20151008/FutureDataWhsheet.xls' :return: str """ assert date <= datetime.today() if date > datetime(2015, 10, 7): url_template = 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/{}/{}/FutureDataWhsheet.htm' url = url_template.format(date.year, date.strftime('%Y%m%d')) index = 1 elif date > datetime(2010, 8, 24): # 仓单数据从第4个table开始,没有合约乘数 url_template = 'http://www.czce.com.cn/cn/exchange/{}/datawhsheet/{}.htm' url = url_template.format(date.year, date.strftime('%Y%m%d')) index = 3 elif date > datetime(2008, 3, 2) or (date > datetime(2008, 2, 14) and date.weekday() == 4): # 只有一张表 url_template = 'http://www.czce.com.cn/cn/exchange/jyxx/sheet/sheet{}.html' url = url_template.format(date.strftime('%Y%m%d')) index = 1 else: log.info("Czce has no {} receipt data!".format( date.strftime('%Y%m%d'))) return pd.DataFrame() try: text_data = get_html_text(url) df = pd.read_html(text_data, encoding='gb2312') data = pd.concat(df[index:]) except: log.warning("{} czce receipt data is not exist!".format( date.strftime('%Y%m%d'))) data = pd.DataFrame() return data
def download_cffex_hq_by_date(date: datetime, category=0): """ 获取中国金融期货交易所交易所日交易数据 datetime(2010, 4, 30) http://www.cffex.com.cn/sj/hqsj/rtj/201903/13/20190313_1.csv 没有期权,预留接口 :param date: datetime :param category: 行情类型, 0期货 或 1期权 :return str """ assert date <= datetime.today() assert category in [0, 1] url_template = 'http://www.cffex.com.cn/fzjy/mrhq/{}/{}/{}_1.csv' url = url_template.format(date.strftime('%Y%m'), date.strftime('%d'), date.strftime('%Y%m%d')) return get_html_text(url)
def download_shfe_receipt_by_date(date: datetime): """ 抓取上海商品交易所注册仓单数据, 20140519(包括)至今 http://www.shfe.com.cn/data/dailydata/20190520dailystock.dat 20081006至20140516(包括) http://www.shfe.com.cn/data/dailydata/20140516dailystock.html http://www.shfe.com.cn/txt.jsp 20100126、20101029日期 英文版本 20100416 格式不一样 20130821日期交易所数据丢失 :param date: datetime :return: str """ assert date <= datetime.today() if date > datetime(2014, 5, 18): url_template = "http://www.shfe.com.cn/data/dailydata/{}dailystock.dat" url = url_template.format(date.strftime('%Y%m%d')) try: text_data = get_html_text(url) json_data = json.loads(text_data) data = pd.DataFrame(json_data['o_cursor']) except: log.warning("{} shfe receipt data is not exist!".format( date.strftime('%Y%m%d'))) data = pd.DataFrame() elif date > datetime(2008, 1, 5): url_template = "http://www.shfe.com.cn/data/dailydata/{}dailystock.html" url = url_template.format(date.strftime('%Y%m%d')) try: data = pd.read_html(url, encoding='unicode')[0] except ValueError: log.warning("{} shfe receipt data is not exist!".format( date.strftime('%Y%m%d'))) data = pd.DataFrame() else: data = pd.DataFrame() log.info("Shfe has no {} receipt data!".format( date.strftime('%Y%m%d'))) return data
def get_convertible_list(): file_path = RAW_DATA_DIR / "convertible_list.csv" file_name = str(file_path) # 文件存在并且不是交易时间,还需要判断文件足够新才行 if file_path.exists() and is_not_trading(): convertible_df = pd.read_csv(file_name, encoding='gb2312') else: convertible_df = pd.DataFrame( [x['cell'] for x in json.loads(get_html_text(CBS_URL))['rows']]) convertible_df['premium_rt'] = convertible_df['premium_rt'].apply( convert_percent) convertible_df['ytm_rt_tax'] = convertible_df['ytm_rt_tax'].apply( convert_percent) convertible_df['ytm_rt'] = convertible_df['ytm_rt'].apply( convert_percent) convertible_df.to_csv(file_name, encoding='gb2312') # TODO 将日期转换为字符 pd.to_datetime return convertible_df
def download_shfe_hq_by_date(date: datetime, category=0): """ 获取上海商品交易所日交易数据 20020108/20090105 期货数据起始日(还可以往前取) 2018921 期权数据起始日 http://www.shfe.com.cn/data/dailydata/kx/kx20190318.dat http://www.shfe.com.cn/data/dailydata/option/kx/kx20190315.dat :param date: datetime :param category: 行情类型, 0期货 或 1期权 :return str """ assert date <= datetime.today() assert category in [0, 1] url_template = [ 'http://www.shfe.com.cn/data/dailydata/kx/kx{}.dat', 'http://www.shfe.com.cn/data/dailydata/option/kx/kx{}.dat' ] url = url_template[category].format(date.strftime('%Y%m%d')) return get_html_text(url)
def download_czce_hq_by_date(date: datetime, category=0): """ 获取郑州商品交易所日交易数据 http://www.czce.com.cn/cn/DFSStaticFiles/Future/2019/20190314/FutureDataDaily.txt http://www.czce.com.cn/cn/DFSStaticFiles/Future/2019/20190314/FutureDataDaily.htm 期权 datetime(2017, 4, 19) http://www.czce.com.cn/cn/DFSStaticFiles/Option/2018/20180816/OptionDataDaily.htm http://www.czce.com.cn/cn/DFSStaticFiles/Option/2017/20171109/OptionDataDaily.htm datetime(2015, 10, 8) http://www.czce.com.cn/cn/exchange/2015/datadaily/20150821.htm http://www.czce.com.cn/cn/exchange/2015/datadaily/20150930.txt datetime(2010, 8, 24) http://www.czce.com.cn/cn/exchange/jyxx/hq/hq20100806.html datetime(2005, 4, 29) :param date: datetime :param category: 行情类型, 0期货 或 1期权 :return pd.DataFrame """ assert date <= datetime.today() assert category in [0, 1] index = 0 ret = pd.DataFrame() if date > datetime(2015, 10, 7): template = [ 'http://www.czce.com.cn/cn/DFSStaticFiles/Future/{}/{}/FutureDataDaily.htm', 'http://www.czce.com.cn/cn/DFSStaticFiles/Option/{}/{}/OptionDataDaily.htm' ] url_template = template[category] url = url_template.format(date.year, date.strftime('%Y%m%d')) elif date > datetime(2010, 8, 23): url_template = 'http://www.czce.com.cn/cn/exchange/{}/datadaily/{}.htm' url = url_template.format(date.year, date.strftime('%Y%m%d')) index = 3 elif date > datetime(2005, 4, 28): url_template = 'http://www.czce.com.cn/cn/exchange/jyxx/hq/hq{}.html' url = url_template.format(date.strftime('%Y%m%d')) index = 1 else: return pd.DataFrame() text = get_html_text(url) if is_data_empty(text): return ret tables = pd.read_html(text, header=0) df = tables[index] bflag = df.empty or len(df.columns) < 10 or len(df.columns) > 20 if not bflag: return df # 处理特殊的例外情况 2017-12-27 index=3 for df in tables: bflag = df.empty or len(df.columns) < 10 or len(df.columns) > 20 if not bflag: return df return ret