def stock_em_gpzy_profile() -> pd.DataFrame: """ 东方财富网-数据中心-特色数据-股权质押-股权质押市场概况 http://data.eastmoney.com/gpzy/marketProfile.aspx :return: 股权质押市场概况 :rtype: pandas.DataFrame """ url = "http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get" params = { "type": "ZD_SUM", "token": "70f12f2f4f091e459a279469fe49eca5", "cmd": "", "st": "tdate", "sr": "-1", "p": "1", "ps": "5000", "js": "var zvxnZOnT={pages:(tp),data:(x),font:(font)}", "rt": "52583914", } temp_df = pd.DataFrame() res = requests.get(url, params=params) data_text = res.text data_json = demjson.decode(data_text[data_text.find("={") + 1 :]) map_dict = dict( zip( pd.DataFrame(data_json["font"]["FontMapping"])["code"], pd.DataFrame(data_json["font"]["FontMapping"])["value"], ) ) for key, value in map_dict.items(): data_text = data_text.replace(key, str(value)) data_json = demjson.decode(data_text[data_text.find("={") + 1 :]) temp_df = temp_df.append(pd.DataFrame(data_json["data"]), ignore_index=True) temp_df.columns = [ "交易日期", "sc_zsz", "平均质押比例(%)", "涨跌幅", "A股质押总比例(%)", "质押公司数量", "质押笔数", "质押总股数(股)", "质押总市值(元)", "沪深300指数", ] temp_df = temp_df[ [ "交易日期", "平均质押比例(%)", "涨跌幅", "A股质押总比例(%)", "质押公司数量", "质押笔数", "质押总股数(股)", "质押总市值(元)", "沪深300指数", ] ] temp_df["交易日期"] = pd.to_datetime(temp_df["交易日期"]) return temp_df
def _get_tx_start_year(symbol: str = "sh000919") -> pd.DataFrame: """ 腾讯证券-获取所有股票数据的第一天, 注意这个数据是腾讯证券的历史数据第一天 http://gu.qq.com/sh000919/zs :param symbol: 带市场标识的股票代码 :type symbol: str :return: 开始日期 :rtype: pandas.DataFrame """ url = "http://web.ifzq.gtimg.cn/other/klineweb/klineWeb/weekTrends" params = { "code": symbol, "type": "qfq", "_var": "trend_qfq", "r": "0.3506048543943414", } r = requests.get(url, params=params) data_text = r.text if not demjson.decode(data_text[data_text.find("={") + 1 :])["data"]: url = "https://proxy.finance.qq.com/ifzqgtimg/appstock/app/newfqkline/get" params = { "_var": "kline_dayqfq", "param": f"{symbol},day,,,320,qfq", "r": "0.751892490072597", } r = requests.get(url, params=params) data_text = r.text start_date = demjson.decode(data_text[data_text.find("={") + 1 :])["data"][ symbol ]["day"][0][0] return start_date start_date = demjson.decode(data_text[data_text.find("={") + 1 :])["data"][0][0] return start_date
def stock_em_gpzy_industry_data() -> pd.DataFrame: """ 东方财富网-数据中心-特色数据-股权质押-上市公司质押比例-行业数据 http://data.eastmoney.com/gpzy/industryData.aspx :return: pandas.DataFrame """ url = "http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get" page_num = _get_page_num_gpzy_industry_data() temp_df = pd.DataFrame() for page in range(1, page_num + 1): print(f"一共{page_num}页, 正在下载第{page}页") params = { "type": "ZD_HY_SUM", "token": "70f12f2f4f091e459a279469fe49eca5", "cmd": "", "st": "amtshareratio_pj", "sr": "-1", "p": str(page), "ps": "5000", "js": "var SIqThurI={pages:(tp),data:(x),font:(font)}", "rt": "52584617", } res = requests.get(url, params=params) data_text = res.text data_json = demjson.decode(data_text[data_text.find("={") + 1:]) map_dict = dict( zip( pd.DataFrame(data_json["font"]["FontMapping"])["code"], pd.DataFrame(data_json["font"]["FontMapping"])["value"], )) for key, value in map_dict.items(): data_text = data_text.replace(key, str(value)) data_json = demjson.decode(data_text[data_text.find("={") + 1:]) temp_df = temp_df.append(pd.DataFrame(data_json["data"]), ignore_index=True) temp_df.columns = [ "统计时间", "-", "行业", "平均质押比例(%)", "公司家数", "质押总笔数", "质押总股本", "最新质押市值", ] temp_df = temp_df[[ "统计时间", "行业", "平均质押比例(%)", "公司家数", "质押总笔数", "质押总股本", "最新质押市值" ]] temp_df["统计时间"] = pd.to_datetime(temp_df["统计时间"]) return temp_df
def macro_australia_retail_rate_monthly() -> pd.DataFrame: """ 东方财富-经济数据-澳大利亚-零售销售月率 http://data.eastmoney.com/cjsj/foreign_5_0.html :return: 零售销售月率 :rtype: pandas.DataFrame """ url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "GJZB", "sty": "HKZB", "js": "({data:[(x)],pages:(pc)})", "p": "1", "ps": "2000", "mkt": "5", "stat": "0", "pageNo": "1", "pageNum": "1", "_": "1625474966006", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.columns = [ "时间", "前值", "现值", "发布日期", ] temp_df['时间'] = pd.to_datetime(temp_df['时间']).dt.date temp_df["前值"] = pd.to_numeric(temp_df["前值"]) temp_df["现值"] = pd.to_numeric(temp_df["现值"]) temp_df['发布日期'] = pd.to_datetime(temp_df['发布日期']).dt.date return temp_df
def macro_china_hk_rate_of_unemployment() -> pd.DataFrame: """ 东方财富-经济数据一览-中国香港-失业率 https://data.eastmoney.com/cjsj/foreign_8_2.html :return: 失业率 :rtype: pandas.DataFrame """ url = "https://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "GJZB", "sty": "HKZB", "js": "({data:[(x)],pages:(pc)})", "p": "1", "ps": "2000", "mkt": "8", "stat": "2", "pageNo": "1", "pageNum": "1", "_": "1621332091873", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.columns = [ "时间", "前值", "现值", "发布日期", ] temp_df['前值'] = pd.to_numeric(temp_df['前值']) temp_df['现值'] = pd.to_numeric(temp_df['现值']) temp_df['时间'] = pd.to_datetime(temp_df['时间']).dt.date temp_df['发布日期'] = pd.to_datetime(temp_df['发布日期']).dt.date return temp_df
def match_main_contract(symbol: str = "shfe") -> pd.DataFrame: """ 指定交易所的所有可以提供数据的合约 https://finance.sina.com.cn/futuremarket/index.shtml :param symbol: choice of {"dce", "czce", "shfe", "cffex"} :type symbol: str :return: 指定交易所的所有可以提供数据的合约 :rtype: pandas.DataFrame """ subscribe_list = [] exchange_symbol_list = zh_subscribe_exchange_symbol( symbol).iloc[:, 1].tolist() for item in exchange_symbol_list: zh_match_main_contract_payload.update({"node": item}) res = requests.get(zh_match_main_contract_url, params=zh_match_main_contract_payload) data_json = demjson.decode(res.text) data_df = pd.DataFrame(data_json) try: main_contract = data_df[ data_df['name'].str.contains("连续") & data_df['symbol'].str.extract( r'([\w])(\d)').iloc[:, 1].str.contains("0")].iloc[0, :3] subscribe_list.append(main_contract) except: # print(item, "无主力连续合约") continue # print("主力连续合约获取成功") temp_df = pd.DataFrame(subscribe_list) return temp_df
def _get_page_num_dxsyl(market: str = "上海主板") -> int: """ 东方财富网-数据中心-新股数据-打新收益率-总页数 http://data.eastmoney.com/xg/xg/dxsyl.html :param market: choice of {"上海主板", "创业板", "深圳主板"} :type market: str :return: 总页数 :rtype: int """ market_map = {"上海主板": "2", "创业板": "3", "深圳主板": "4"} url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "NS", "sty": "NSDXSYL", "st": "16", "sr": "-1", "p": "1", "ps": "50", "js": "var oyfyNYmO={pages:(pc),data:[(x)]}", "stat": market_map[market], "rt": "52898446", } res = requests.get(url, params=params) data_json = demjson.decode(res.text[res.text.find("={") + 1:]) return data_json["pages"]
def sw_index_representation_spot() -> pd.DataFrame: """ 申万-市场表征实时行情数据 http://www.swsindex.com/idx0120.aspx?columnid=8831 :return: 市场表征实时行情数据 :rtype: pandas.DataFrame """ url = "http://www.swsindex.com/handler.aspx" params = { "tablename": "swzs", "key": "L1", "p": "1", "where": "L1 in('801001','801002','801003','801005','801300','801901','801903','801905','801250','801260','801270','801280','802613')", "orderby": "", "fieldlist": "L1,L2,L3,L4,L5,L6,L7,L8,L11", "pagecount": "9", "timed": "1632300641756", } r = requests.get(url, params=params) data_json = demjson.decode(r.text) temp_df = pd.DataFrame(data_json["root"]) temp_df.columns = ["指数代码", "指数名称", "昨收盘", "今开盘", "成交额", "最高价", "最低价", "最新价", "成交量"] temp_df["昨收盘"] = pd.to_numeric(temp_df["昨收盘"]) temp_df["今开盘"] = pd.to_numeric(temp_df["今开盘"]) temp_df["成交额"] = pd.to_numeric(temp_df["成交额"]) temp_df["最高价"] = pd.to_numeric(temp_df["最高价"]) temp_df["最低价"] = pd.to_numeric(temp_df["最低价"]) temp_df["最新价"] = pd.to_numeric(temp_df["最新价"]) temp_df["成交量"] = pd.to_numeric(temp_df["成交量"]) return temp_df
def macro_germany_cpi_yearly() -> pd.DataFrame: """ 消费者物价指数年率终值 http://data.eastmoney.com/cjsj/foreign_1_2.html :return: 消费者物价指数年率终值 :rtype: pandas.DataFrame """ url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "GJZB", "sty": "HKZB", "js": "({data:[(x)],pages:(pc)})", "p": "1", "ps": "2000", "mkt": "1", "stat": "2", "pageNo": "1", "pageNum": "1", "_": "1625474966006", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.columns = [ "时间", "前值", "现值", "发布日期", ] temp_df["前值"] = pd.to_numeric(temp_df["前值"]) temp_df["现值"] = pd.to_numeric(temp_df["现值"]) return temp_df
def macro_swiss_gbd_bank_rate(): """ 东方财富-经济数据-瑞士-央行公布利率决议 http://data.eastmoney.com/cjsj/foreign_2_5.html :return: 央行公布利率决议 :rtype: pandas.DataFrame """ url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "GJZB", "sty": "HKZB", "js": "({data:[(x)],pages:(pc)})", "p": "1", "ps": "2000", "mkt": "2", "stat": "5", 'pageNo': '1', 'pageNum': '1', "_": "1625474966006", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.columns = [ "时间", "前值", "现值", "发布日期", ] temp_df["前值"] = pd.to_numeric(temp_df["前值"]) temp_df["现值"] = pd.to_numeric(temp_df["现值"]) return temp_df
def zh_subscribe_exchange_symbol(symbol: str = "dce") -> dict: """ 交易所具体的可交易品种 http://vip.stock.finance.sina.com.cn/quotes_service/view/qihuohangqing.html#titlePos_1 :param symbol: choice of {'czce', 'dce', 'shfe', 'cffex'} :type symbol: str :return: 交易所具体的可交易品种 :rtype: dict """ r = requests.get(zh_subscribe_exchange_symbol_url) r.encoding = "gbk" data_text = r.text data_json = demjson.decode( data_text[data_text.find("{"):data_text.find("};") + 1]) if symbol == "czce": data_json["czce"].remove("郑州商品交易所") return pd.DataFrame(data_json["czce"]) if symbol == "dce": data_json["dce"].remove("大连商品交易所") return pd.DataFrame(data_json["dce"]) if symbol == "shfe": data_json["shfe"].remove("上海期货交易所") return pd.DataFrame(data_json["shfe"]) if symbol == "cffex": data_json["cffex"].remove("中国金融期货交易所") return pd.DataFrame(data_json["cffex"])
def fund_em_new_found() -> pd.DataFrame: """ 基金数据-新发基金-新成立基金 http://fund.eastmoney.com/data/xinfound.html :return: 新成立基金 :rtype: pandas.DataFrame """ url = "http://fund.eastmoney.com/data/FundNewIssue.aspx" params = { "t": "xcln", "sort": "jzrgq,desc", "y": "", "page": "1,50000", "isbuy": "1", "v": "0.4069919776543214", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text.strip("var newfunddata=")) temp_df = pd.DataFrame(data_json["datas"]) temp_df.columns = [ "基金代码", "基金简称", "发行公司", "_", "基金类型", "募集份额", "成立日期", "成立来涨幅", "基金经理", "申购状态", "集中认购期", "_", "_", "_", "_", "_", "_", "_", "优惠费率", ] temp_df = temp_df[[ "基金代码", "基金简称", "发行公司", "基金类型", "集中认购期", "募集份额", "成立日期", "成立来涨幅", "基金经理", "申购状态", "优惠费率", ]] temp_df['募集份额'] = pd.to_numeric(temp_df['募集份额']) temp_df['成立日期'] = pd.to_datetime(temp_df['成立日期']).dt.date temp_df['成立来涨幅'] = pd.to_numeric(temp_df['成立来涨幅'].str.replace(',', '')) temp_df['优惠费率'] = temp_df['优惠费率'].str.strip("%") temp_df['优惠费率'] = pd.to_numeric(temp_df['优惠费率']) return temp_df
def _get_page_num_sy_yq_list(symbol: str = "沪深两市", trade_date: str = "2019-12-31") -> int: """ 东方财富网-数据中心-特色数据-商誉-商誉减值预期明细 http://data.eastmoney.com/sy/yqlist.html :return: int 获取 商誉减值预期明细 的总页数 """ symbol_dict = { "沪市主板": f"(MKT='shzb' and ENDDATE=^{trade_date}^)", "深市主板": f"(MKT='szzb' and ENDDATE=^{trade_date}^)", "中小板": f"(MKT='zxb' and ENDDATE=^{trade_date}^)", "创业板": f"(MKT='cyb' and ENDDATE=^{trade_date}^)", "沪深两市": f"(ENDDATE=^{trade_date}^)", } url = "http://dcfm.eastmoney.com/EM_MutiSvcExpandInterface/api/js/get" params = { "type": "SY_YG", "token": "894050c76af8597a853f5b408b759f5d", "st": "NOTICEDATE", "sr": "-1", "p": "1", "ps": "50", "js": "var {name}=".format(name=ctx.call("getCode", 8)) + "{pages:(tp),data:(x),font:(font)}", "filter": symbol_dict[symbol], "rt": "52589731", } res = requests.get(url, params=params) data_json = demjson.decode(res.text[res.text.find("={") + 1:]) return data_json["pages"]
def stock_hk_spot() -> pd.DataFrame: """ 新浪财经-港股的所有港股的实时行情数据 http://vip.stock.finance.sina.com.cn/mkt/#qbgg_hk :return: 实时行情数据 :rtype: pandas.DataFrame """ res = requests.get(hk_sina_stock_list_url, params=hk_sina_stock_dict_payload) data_json = [ demjson.decode(tt) for tt in [ item + "}" for item in res.text[1:-1].split("},") if not item.endswith("}") ] ] data_df = pd.DataFrame(data_json) data_df = data_df[[ "symbol", "name", "engname", "tradetype", "lasttrade", "prevclose", "open", "high", "low", "volume", "amount", "ticktime", "buy", "sell", "pricechange", "changepercent", ]] return data_df
def _get_page_num_sy_list(symbol: str = "沪市主板", trade_date: str = "2019-12-31") -> int: """ 东方财富网-数据中心-特色数据-商誉-个股商誉明细 http://data.eastmoney.com/sy/list.html :param symbol: choice of {"沪市主板", "深市主板", "中小板", "创业板", "沪深两市"} :type symbol: str :param trade_date: 参考网站指定的数据日期 :type trade_date: str :return: 个股商誉明细 的总页数 :rtype: int """ symbol_dict = { "沪市主板": f"""(TRADE_BOARD="shzb")(REPORT_DATE='{trade_date}')""", "深市主板": f"""(TRADE_BOARD="szzb")(REPORT_DATE='{trade_date}')""", "中小板": f"""(TRADE_BOARD="zxb")(REPORT_DATE='{trade_date}')""", "创业板": f"""(TRADE_BOARD="cyb")(REPORT_DATE='{trade_date}')""", "沪深两市": f"(REPORT_DATE='{trade_date}')", } url = "http://datacenter.eastmoney.com/api/data/get" params = { "type": "RPT_GOODWILL_STOCKDETAILS", "sty": "ALL", "p": "1", "ps": "50", "sr": "-1,-1", "st": "NOTICE_DATE,SECURITY_CODE", "var": "QvxsKBaH", "filter": symbol_dict[symbol], "rt": "53324381", } res = requests.get(url, params=params) data_json = demjson.decode(res.text[res.text.find("{"):-1]) return data_json["result"]["pages"]
def stock_em_qbzf() -> pd.DataFrame: """ 东方财富网-数据中心-新股数据-增发-全部增发 http://data.eastmoney.com/other/gkzf.html :return: 全部增发 :rtype: pandas.DataFrame """ url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "st": "5", "sr": "-1", "ps": "5000", "p": "1", "type": "SR", "sty": "ZF", "js": '({"pages":(pc),"data":[(x)]})', "stat": "0", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.columns = [ "股票代码", "股票简称", "发行方式", "发行总数", "发行价格", "最新价", "发行日期", "增发上市日期", "_", "增发代码", "网上发行", "_", "_", "_", "_", "_", "_", ] temp_df = temp_df[[ "股票代码", "股票简称", "增发代码", "发行方式", "发行总数", "网上发行", "发行价格", "最新价", "发行日期", "增发上市日期", ]] temp_df["锁定期"] = "1-3年" temp_df['发行总数'] = pd.to_numeric(temp_df['发行总数']) temp_df['发行价格'] = pd.to_numeric(temp_df['发行价格']) temp_df['最新价'] = pd.to_numeric(temp_df['最新价']) temp_df['发行日期'] = pd.to_datetime(temp_df['发行日期']).dt.date temp_df['增发上市日期'] = pd.to_datetime(temp_df['增发上市日期']).dt.date return temp_df
def match_main_contract(symbol: str = "cffex") -> str: """ 新浪财经-期货-主力合约 http://vip.stock.finance.sina.com.cn/quotes_service/view/qihuohangqing.html#titlePos_1 :param symbol: choice of {'czce', 'dce', 'shfe', 'cffex'} :type symbol: str :return: 主力合约的字符串 :rtype: str """ subscribe_exchange_list = [] exchange_symbol_list = zh_subscribe_exchange_symbol( symbol).iloc[:, 1].tolist() for item in exchange_symbol_list: # item = 'sngz_qh' zh_match_main_contract_payload.update({"node": item}) res = requests.get(zh_match_main_contract_url, params=zh_match_main_contract_payload) data_json = demjson.decode(res.text) data_df = pd.DataFrame(data_json) try: main_contract = data_df[data_df.iloc[:, 3:].duplicated()] print(main_contract["symbol"].values[0]) subscribe_exchange_list.append(main_contract["symbol"].values[0]) except: if len(data_df) == 1: subscribe_exchange_list.append(data_df["symbol"].values[0]) print(data_df["symbol"].values[0]) else: print(item, "无主力合约") continue print(f"{symbol}主力合约获取成功") return ",".join([item for item in subscribe_exchange_list])
def macro_canada_trade() -> pd.DataFrame: """ 东方财富-经济数据-加拿大-贸易帐 http://data.eastmoney.com/cjsj/foreign_7_2.html :return: 贸易帐 :rtype: pandas.DataFrame """ url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "GJZB", "sty": "HKZB", "js": "({data:[(x)],pages:(pc)})", "p": "1", "ps": "2000", "mkt": "7", "stat": "2", 'pageNo': '1', 'pageNum': '1', "_": "1625474966006", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.columns = [ "时间", "前值", "现值", "发布日期", ] temp_df['时间'] = pd.to_datetime(temp_df['时间']).dt.date temp_df['发布日期'] = pd.to_datetime(temp_df['发布日期']).dt.date temp_df["前值"] = pd.to_numeric(temp_df["前值"]) / 100 temp_df["现值"] = pd.to_numeric(temp_df["现值"]) / 100 return temp_df
def stock_em_yysj(date: str = "20200331") -> pd.DataFrame: """ 东方财富-数据中心-年报季报-预约披露时间 http://data.eastmoney.com/bbsj/202003/yysj.html :param date: "20190331", "20190630", "20190930", "20191231"; 从 20081231 开始 :type date: str :return: 指定时间的上市公司预约披露时间数据 :rtype: pandas.DataFrame """ url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get" params = { "type": "YJBB21_YYPL", "token": "70f12f2f4f091e459a279469fe49eca5", "st": "frdate", "sr": "1", "p": "1", "ps": "5000", "js": "var HXutCoUP={pages:(tp),data: (x),font:(font)}", "filter": f"(securitytypecode='058001001')(reportdate=^{'-'.join([date[:4], date[4:6], date[6:]])}^)", "rt": "52907209", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[data_text.find("{") :]) temp_df = pd.DataFrame(data_json["data"]) return temp_df
def stock_zh_index_spot() -> pd.DataFrame: """ 新浪财经-行情中心首页-A股-分类-所有指数 大量采集会被目标网站服务器封禁 IP, 如果被封禁 IP, 请 10 分钟后再试 http://vip.stock.finance.sina.com.cn/mkt/#hs_s :return: 所有指数的实时行情数据 :rtype: pandas.DataFrame """ big_df = pd.DataFrame() page_count = get_zh_index_page_count() zh_sina_stock_payload_copy = zh_sina_index_stock_payload.copy() for page in tqdm(range(1, page_count + 1), leave=False): zh_sina_stock_payload_copy.update({"page": page}) res = requests.get(zh_sina_index_stock_url, params=zh_sina_stock_payload_copy) data_json = demjson.decode(res.text) big_df = big_df.append(pd.DataFrame(data_json), ignore_index=True) big_df = big_df.applymap(_replace_comma) big_df["trade"] = big_df["trade"].astype(float) big_df["pricechange"] = big_df["pricechange"].astype(float) big_df["changepercent"] = big_df["changepercent"].astype(float) big_df["buy"] = big_df["buy"].astype(float) big_df["sell"] = big_df["sell"].astype(float) big_df["settlement"] = big_df["settlement"].astype(float) big_df["open"] = big_df["open"].astype(float) big_df["high"] = big_df["high"].astype(float) big_df["low"] = big_df["low"].astype(float) big_df.columns = [ "代码", "名称", "最新价", "涨跌额", "涨跌幅", "_", "_", "昨收", "今开", "最高", "最低", "成交量", "成交额", "_", "_", ] big_df = big_df[ [ "代码", "名称", "最新价", "涨跌额", "涨跌幅", "昨收", "今开", "最高", "最低", "成交量", "成交额", ] ] return big_df
def futures_inventory_em(exchange: str = "上海期货交易所", symbol: str = "沪铝") -> pd.DataFrame: """ 东方财富网-数据中心-期货库存数据 http://data.eastmoney.com/ifdata/kcsj.html :param exchange: choice of {"上海期货交易所", "郑州商品交易所", "大连商品交易所"} :type exchange: str :param symbol: http://data.eastmoney.com/ifdata/kcsj.html 对应的中文名称, 如: 沪铝 :type symbol: str :return: 指定交易所和指定品种的库存数据 :rtype: pandas.DataFrame """ url = "http://data.eastmoney.com/ifdata/kcsj.html" r = requests.get(url) soup = BeautifulSoup(r.text, "lxml") temp_soup = soup.find(attrs={"id": "select_jys"}).find_all("option") temp_key = [item.text for item in temp_soup] temp_value = [item.get("value") for item in temp_soup] exchange_dict = dict(zip(temp_key, temp_value)) url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx' params = { 'type': 'QHKC', 'sty': 'QHKCSX', '_': '1618311930407', } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame(data_json) temp_df = temp_df.iloc[:, 0].str.split(',', expand=True) symbol_dict = dict(zip(temp_df.iloc[:, 3], temp_df.iloc[:, 2])) url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "type": "QHKC", "sty": "QHKCMX", "mkt": exchange_dict[exchange], "code": symbol_dict[symbol], "stat": "1", "_": "1587887394138", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame(data_json).iloc[:, 0].str.split(",", expand=True) temp_df.columns = ["日期", "库存", "增减"] return temp_df
def stock_zh_index_daily_tx(symbol: str = "sz980017") -> pd.DataFrame: """ 腾讯证券-日频-股票或者指数历史数据 作为 stock_zh_index_daily 的补充, 因为在新浪中有部分指数数据缺失 注意都是: 前复权, 不同网站复权方式不同, 不可混用数据 http://gu.qq.com/sh000919/zs :param symbol: 带市场标识的股票或者指数代码 :type symbol: str :return: 后复权的股票和指数数据 :rtype: pandas.DataFrame """ start_date = _get_tx_start_year(symbol=symbol) url = "https://proxy.finance.qq.com/ifzqgtimg/appstock/app/newfqkline/get" range_start = int(start_date.split("-")[0]) range_end = datetime.date.today().year + 1 temp_df = pd.DataFrame() for year in tqdm(range(range_start, range_end)): params = { "_var": "kline_dayqfq", "param": f"{symbol},day,{year}-01-01,{year + 1}-12-31,640,qfq", "r": "0.8205512681390605", } res = requests.get(url, params=params) text = res.text try: inner_temp_df = pd.DataFrame( demjson.decode(text[text.find("={") + 1 :])["data"][symbol]["day"] ) except: inner_temp_df = pd.DataFrame( demjson.decode(text[text.find("={") + 1 :])["data"][symbol]["qfqday"] ) temp_df = temp_df.append(inner_temp_df, ignore_index=True) if temp_df.shape[1] == 6: temp_df.columns = ["date", "open", "close", "high", "low", "amount"] else: temp_df = temp_df.iloc[:, :6] temp_df.columns = ["date", "open", "close", "high", "low", "amount"] temp_df["date"] = pd.to_datetime(temp_df["date"]).dt.date temp_df["open"] = pd.to_numeric(temp_df["open"]) temp_df["close"] = pd.to_numeric(temp_df["close"]) temp_df["high"] = pd.to_numeric(temp_df["high"]) temp_df["low"] = pd.to_numeric(temp_df["low"]) temp_df["amount"] = pd.to_numeric(temp_df["amount"]) temp_df.drop_duplicates(inplace=True) return temp_df
def fund_hold_structure_em() -> pd.DataFrame: """ 天天基金网-基金数据-规模份额-持有人结构 http://fund.eastmoney.com/data/cyrjglist.html :return: 持有人结构 :rtype: pandas.DataFrame """ url = "http://fund.eastmoney.com/data/FundDataPortfolio_Interface.aspx" params = { "dt": "11", "pi": "1", "pn": "50", "mc": "hypzDetail", "st": "desc", "sc": "reportdate", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[data_text.find("{"):-1]) total_page = data_json["pages"] big_df = pd.DataFrame() for page in range(1, int(total_page) + 1): params.update({"pi": page}) r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[data_text.find("{"):-1]) temp_df = pd.DataFrame(data_json["data"]) big_df = big_df.append(temp_df, ignore_index=True) big_df.reset_index(inplace=True) big_df["index"] = big_df["index"] + 1 big_df.columns = [ "序号", "截止日期", "基金家数", '机构持有比列', '个人持有比列', '内部持有比列', '总份额', ] big_df["截止日期"] = pd.to_datetime(big_df["截止日期"]).dt.date big_df["基金家数"] = pd.to_numeric(big_df["基金家数"]) big_df["机构持有比列"] = pd.to_numeric(big_df["机构持有比列"]) big_df["个人持有比列"] = pd.to_numeric(big_df["个人持有比列"]) big_df["内部持有比列"] = pd.to_numeric(big_df["内部持有比列"]) big_df["总份额"] = pd.to_numeric(big_df["总份额"].str.replace(",", "")) return big_df
def index_stock_cons_sina(symbol: str = "000300") -> pd.DataFrame: """ 新浪新版股票指数成份页面, 目前该接口可获取指数数量较少 http://vip.stock.finance.sina.com.cn/mkt/#zhishu_000040 :param symbol: 指数代码 :type symbol: str :return: 指数的成份股 :rtype: pandas.DataFrame """ if symbol == "000300": symbol = "hs300" url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeStockCountSimple" params = {"node": f"{symbol}"} r = requests.get(url, params=params) page_num = math.ceil(int(r.json()) / 80) + 1 temp_df = pd.DataFrame() for page in range(1, page_num): url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData" params = { "page": str(page), "num": "80", "sort": "symbol", "asc": "1", "node": "hs300", "symbol": "", "_s_r_a": "init", } r = requests.get(url, params=params) temp_df = temp_df.append( pd.DataFrame(demjson.decode(r.text)), ignore_index=True ) return temp_df url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeDataSimple" params = { "page": 1, "num": "3000", "sort": "symbol", "asc": "1", "node": f"zhishu_{symbol}", "_s_r_a": "setlen", } r = requests.get(url, params=params) return pd.DataFrame(demjson.decode(r.text))
def fund_etf_fund_info_em(fund: str = "511280", start_date: str = "20000101", end_date: str = "20500101") -> pd.DataFrame: """ 东方财富网站-天天基金网-基金数据-场内交易基金-历史净值明细 http://fundf10.eastmoney.com/jjjz_511280.html :param fund: 场内交易基金代码, 可以通过 fund_etf_fund_daily_em 来获取 :type fund: str :param start_date: 开始统计时间 :type start_date: str :param end_date: 结束统计时间 :type end_date: str :return: 东方财富网站-天天基金网-基金数据-场内交易基金-历史净值明细 :rtype: pandas.DataFrame """ url = "http://api.fund.eastmoney.com/f10/lsjz" headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36", "Referer": f"http://fundf10.eastmoney.com/jjjz_{fund}.html", } params = { "callback": "jQuery183023608994033331676_1588250653363", "fundCode": fund, "pageIndex": "1", "pageSize": "10000", "startDate": "-".join([start_date[:4], start_date[4:6], start_date[6:]]), "endDate": "-".join([end_date[:4], end_date[4:6], end_date[6:]]), "_": round(time.time() * 1000), } r = requests.get(url, params=params, headers=headers) text_data = r.text data_json = demjson.decode(text_data[text_data.find("{"):-1]) temp_df = pd.DataFrame(data_json["Data"]["LSJZList"]) temp_df.columns = [ "净值日期", "单位净值", "累计净值", "_", "_", "_", "日增长率", "申购状态", "赎回状态", "_", "_", "_", "_", ] temp_df = temp_df[["净值日期", "单位净值", "累计净值", "日增长率", "申购状态", "赎回状态"]] temp_df["净值日期"] = pd.to_datetime(temp_df["净值日期"]).dt.date temp_df["单位净值"] = pd.to_numeric(temp_df["单位净值"]) temp_df["累计净值"] = pd.to_numeric(temp_df["累计净值"]) temp_df["日增长率"] = pd.to_numeric(temp_df["日增长率"]) return temp_df
def stock_ipo_declare() -> pd.DataFrame: """ 东方财富网-数据中心-新股申购-首发申报信息-首发申报企业信息 https://data.eastmoney.com/xg/xg/sbqy.html :return: 首发申报企业信息 :rtype: pandas.DataFrame """ url = "https://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" params = { "st": "1", "sr": "-1", "ps": "500", "p": "1", "type": "NS", "sty": "NSFR", "js": "({data:[(x)],pages:(pc)})", "mkt": "1", "fd": "2021-04-02", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text[1:-1]) temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) temp_df.reset_index(inplace=True) temp_df["index"] = temp_df.index + 1 temp_df.columns = [ "序号", "会计师事务所", "_", "保荐机构", "_", "律师事务所", "_", "_", "拟上市地", "_", "_", "备注", "申报企业", "_", "_", "_", "_", ] temp_df = temp_df[[ "序号", "申报企业", "拟上市地", "保荐机构", "会计师事务所", "律师事务所", "备注", ]] return temp_df
def air_quality_watch_point(city: str = "杭州", start_date: str = "20220408", end_date: str = "20220409") -> pd.DataFrame: """ 真气网-监测点空气质量-细化到具体城市的每个监测点 指定之间段之间的空气质量数据 https://www.zq12369.com/ :param city: 调用 ak.air_city_table() 接口获取 :type city: str :param start_date: e.g., "20190327" :type start_date: str :param end_date: e.g., ""20200327"" :type end_date: str :return: 指定城市指定日期区间的观测点空气质量 :rtype: pandas.DataFrame """ start_date = "-".join([start_date[:4], start_date[4:6], start_date[6:]]) end_date = "-".join([end_date[:4], end_date[4:6], end_date[6:]]) url = "https://www.zq12369.com/api/zhenqiapi.php" file_data = _get_file_content(file_name="crypto.js") ctx = py_mini_racer.MiniRacer() ctx.eval(file_data) method = "GETCITYPOINTAVG" ctx.call("encode_param", method) ctx.call("encode_param", start_date) ctx.call("encode_param", end_date) city_param = ctx.call("encode_param", city) ctx.call("encode_secret", method, city_param, start_date, end_date) payload = { "appId": "a01901d3caba1f362d69474674ce477f", "method": ctx.call("encode_param", method), "city": city_param, "startTime": ctx.call("encode_param", start_date), "endTime": ctx.call("encode_param", end_date), "secret": ctx.call("encode_secret", method, city_param, start_date, end_date), } headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36" } r = requests.post(url, data=payload, headers=headers) data_text = r.text data_json = demjson.decode(ctx.call("decode_result", data_text)) temp_df = pd.DataFrame(data_json["rows"]) return temp_df
def _get_zh_stock_ah_page_count() -> int: """ 腾讯财经-港股-AH-总页数 https://stockapp.finance.qq.com/mstats/#mod=list&id=hk_ah&module=HK&type=AH&sort=3&page=3&max=20 :return: 总页数 :rtype: int """ hk_payload_copy = hk_payload.copy() hk_payload_copy.update({"reqPage": 1}) res = requests.get(hk_url, params=hk_payload_copy, headers=hk_headers) data_json = demjson.decode( res.text[res.text.find("{"):res.text.rfind("}") + 1]) page_count = data_json["data"]["page_count"] return page_count
def stock_sector_detail(sector: str = "gn_gfgn") -> pd.DataFrame: """ 新浪行业-板块行情-成份详情 http://finance.sina.com.cn/stock/sl/#area_1 :param sector: stock_sector_spot 返回的 label 值, choice of {"新浪行业", "概念", "地域", "行业"}; "启明星行业" 无详情 :type sector: str :return: 指定 sector 的板块详情 :rtype: pandas.DataFrame """ url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeStockCount" params = { "node": sector } r = requests.get(url, params=params) total_num = int(r.json()) total_page_num = math.ceil(int(total_num) / 80) big_df = pd.DataFrame() url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/Market_Center.getHQNodeData" for page in tqdm(range(1, total_page_num+1), leave=True): params = { "page": str(page), "num": "80", "sort": "symbol", "asc": "1", "node": sector, "symbol": "", "_s_r_a": "page", } r = requests.get(url, params=params) data_text = r.text data_json = demjson.decode(data_text) temp_df = pd.DataFrame(data_json) big_df = big_df.append(temp_df, ignore_index=True) big_df['trade'] = pd.to_numeric(big_df['trade']) big_df['pricechange'] = pd.to_numeric(big_df['pricechange']) big_df['changepercent'] = pd.to_numeric(big_df['changepercent']) big_df['buy'] = pd.to_numeric(big_df['buy']) big_df['sell'] = pd.to_numeric(big_df['sell']) big_df['settlement'] = pd.to_numeric(big_df['settlement']) big_df['open'] = pd.to_numeric(big_df['open']) big_df['high'] = pd.to_numeric(big_df['high']) big_df['low'] = pd.to_numeric(big_df['low']) big_df['volume'] = pd.to_numeric(big_df['volume']) big_df['amount'] = pd.to_numeric(big_df['amount']) big_df['per'] = pd.to_numeric(big_df['per']) big_df['pb'] = pd.to_numeric(big_df['pb']) big_df['mktcap'] = pd.to_numeric(big_df['mktcap']) big_df['nmc'] = pd.to_numeric(big_df['nmc']) big_df['turnoverratio'] = pd.to_numeric(big_df['turnoverratio']) return big_df
def stock_institute_hold_detail(stock: str = "600433", quarter: str = "20201") -> pd.DataFrame: """ 新浪财经-股票-机构持股详情 http://vip.stock.finance.sina.com.cn/q/go.php/vComStockHold/kind/jgcg/index.phtml :param stock: 股票代码 :type stock: str :param quarter: 从 2005 年开始, {"一季报":1, "中报":2 "三季报":3 "年报":4}, e.g., "20191", 其中的 1 表示一季报; "20193", 其中的 3 表示三季报; :type quarter: str :return: 指定股票和财报时间的机构持股数据 :rtype: pandas.DataFrame """ url = "http://vip.stock.finance.sina.com.cn/q/api/jsonp.php/var%20details=/ComStockHoldService.getJGCGDetail" params = { "symbol": stock, "quarter": quarter, } r = requests.get(url, params=params) text_data = r.text json_data = demjson.decode(text_data[text_data.find("{"):-2]) big_df = pd.DataFrame() for item in json_data["data"].keys(): inner_temp_df = pd.DataFrame(json_data["data"][item]).T.iloc[:-1, :] inner_temp_df.reset_index(inplace=True) big_df = big_df.append(inner_temp_df, ignore_index=True) if not big_df.empty: big_df["index"] = big_df["index"].str.split("_", expand=True)[0] big_df.rename(columns={"index": "institute"}, inplace=True) big_df = big_df.iloc[:, :12] big_df.columns = [ "持股机构类型", "持股机构代码", "持股机构简称", "持股机构全称", "持股数", "最新持股数", "持股比例", "最新持股比例", "占流通股比例", "最新占流通股比例", "持股比例增幅", "占流通股比例增幅", ] big_df["持股机构类型"] = big_df["持股机构类型"].str.replace("fund", "基金") big_df["持股机构类型"] = big_df["持股机构类型"].str.replace( "socialSecurity", "全国社保") big_df["持股机构类型"] = big_df["持股机构类型"].str.replace("qfii", "QFII") return big_df else: return None