def QA_ts_update_stock_basic(): """ 本地化所有股票基本信息 """ coll = DATABASE.stock_basic coll.create_index( [("code", ASCENDING), ("status", ASCENDING), ("list_date_stamp", ASCENDING)], unique=True, ) # 初始化数据接口 pro = get_pro() # 获取所有数据 df_1 = pro.stock_basic(exchange="", list_status="L") df_2 = pro.stock_basic(exchange="", list_status="P") df_3 = pro.stock_basic(exchange="", list_status="D") df_1["status"] = "L" df_2["status"] = "P" df_3["status"] = "D" df = df_1.append(df_2).append(df_3) df["code"] = QA_fmt_code_list(df.ts_code) df["list_date_stamp"] = df.list_date.apply(QA_util_date_stamp) df = df.where(pd.notnull(df), None) js = QA_util_to_json_from_pandas(df.drop(columns=["ts_code", "symbol"])) for item in js: qry = { "code": item["code"], "status": item["status"], "list_date_stamp": item["list_date_stamp"], } if coll.count_documents(qry) == 0: # 增量更新 coll.insert_one(item)
def QA_ts_update_namechange(): """ 保存所有股票的历史曾用名 """ # 建表 coll = DATABASE.namechange coll.create_index( [ ("code", ASCENDING), ("start_date_stamp", ASCENDING), ("end_date_stamp", ASCENDING), ("ann_date_stamp", ASCENDING), ], unique=True, ) # 初始化数据接口 pro = get_pro() # 获取历史所有股票 symbol_list = sorted( list(set(QA_fmt_code_list(QA_fetch_stock_basic().index.tolist(), "ts")))) df = pd.DataFrame() for i, symbol in enumerate(symbol_list): if i % 100 == 0: print(f"Saving {i}th stock name, stock is {symbol}") try: df = df.append(pro.namechange(ts_code=symbol)) except: time.sleep(1) try: df = df.append(pro.namechange(ts_code=symbol)) except: raise ValueError("[ERROR]\t数据获取失败") # df.to_csv("test.csv") df["code"] = QA_fmt_code_list(df["ts_code"]) df["start_date_stamp"] = df["start_date"].apply(QA_util_date_stamp) df["end_date_stamp"] = df["end_date"].apply(QA_util_date_stamp) df["ann_date_stamp"] = df["ann_date"].apply(QA_util_date_stamp) df = df.where(pd.notnull(df), None) js = QA_util_to_json_from_pandas(df.drop(columns=["ts_code"])) for item in js: if not item["end_date"]: item["end_date_stamp"] = 9999999999 qry = { "code": item["code"], "start_date_stamp": item["start_date_stamp"], "end_date_stamp": item["end_date_stamp"], "ann_date_stamp": item["ann_date_stamp"], } if coll.count_documents(qry) == 0: coll.insert_one(item)
def QA_ts_update_industry( level: Union[str, List, Tuple] = ["L1", "L2", "L3"], src: Union[str, List, Tuple] = "SW2021", ): """ 保存个股行业信息 """ pro = get_pro() if isinstance(level, str): level = [level] if isinstance(src, str): src = [src] df_industry = pd.DataFrame() for s in src: for lv in level: try: df_tmp = pro.index_classify(level=lv, src=s) df_tmp["src"] = "sw" df_industry = df_industry.append(df_tmp) except Exception as e1: print(e1) time.sleep(1) try: df_tmp = pro.index_classify(level=lv, src=s) df_tmp["src"] = "sw" df_industry = df_industry.append(df_tmp) except Exception as e2: raise ValueError(e2) df_results = pd.DataFrame() for idx, item in df_industry.iterrows(): time.sleep(0.3) if idx % 100 == 0: print(f"currently saving {idx}th record") try: df_tmp = pro.index_member(index_code=item["index_code"]) df_tmp["industry_name"] = item["industry_name"] df_tmp["level"] = item["level"].lower() df_tmp["src"] = item["src"].lower() df_results = df_results.append(df_tmp) except Exception as e1: print(e1) time.sleep(1) try: df_tmp = pro.index_member(index_code=item["index_code"]) df_tmp["industry_name"] = item["industry_name"] df_tmp["level"] = item["level"].lower() df_tmp["src"] = item["src"].lower() df_results = df_results.append(df_tmp) except Exception as e2: raise ValueError(e2) df_results.con_code = QA_fmt_code_list(df_results.con_code) df_results = df_results.rename(columns={"con_code": "code"}) df_results = df_results.sort_values(by="code") coll = DATABASE.industry coll.create_index( [ ("code", ASCENDING), ("level", ASCENDING), ("src", ASCENDING), ("in_date_stamp", DESCENDING), ("out_date_stamp", DESCENDING), ], unique=False, ) for item in QA_util_to_json_from_pandas(df_results): item["in_date_stamp"] = QA_util_date_stamp(item["in_date"]) if not item["out_date"]: item["out_date_stamp"] = 9999999999 else: item["out_date_stamp"] = QA_util_date_stamp(item["out_date_stamp"]) coll.update_one( { "code": item["code"], "level": item["level"], "src": item["src"], "in_date_stamp": item["in_date_stamp"], }, {"$set": item}, upsert=True, ) print('finished saving industry')
def QA_ts_update_all( start: Union[str, datetime.datetime] = None, end: Union[str, datetime.datetime] = None, wait_seconds: int = 61, report_type: Union[int, str] = "1", max_trial=3, ) -> pd.DataFrame: """ 全量更新 --- :param start: 开始时间 (以 report_date 作为比较标准) :param end: 结束时间 (以 report_date 作为比较标准) :param report_type: 报告类型 :param wait_seconds: 等待时间 :param max_trial: 超时重试次数 """ def _ts_update_all(report_date, report_type, sheet_type, wait_seconds, max_trial): # 获取指定报告期,指定报告类型,指定报表类型的截面数据 df = QA_fetch_get_crosssection_financial( report_date=report_date, report_type=report_type, sheet_type=sheet_type, wait_seconds=wait_seconds, max_trial=max_trial, ) coll = eval(f"DATABASE.{sheet_type}") # 考虑到查找的方式,可能根据股票代码查找,可能根据报告期查找,可能根据公告期查找,可能根据最后公告期查找,可能根据报告类型查找 coll.create_index( [ ("code", ASCENDING), ("report_label", ASCENDING), ("report_date", DESCENDING), ("report_type", ASCENDING), ("report_date_stamp", ASCENDING), ("ann_date_stamp", ASCENDING), ("f_ann_date_stamp", ASCENDING), ], unique=False, ) # FIXME: insert_many may be better for item in QA_util_to_json_from_pandas(df): report_label = None if item["report_date"].endswith("0331"): report_label = "1" elif item["report_date"].endswith("0630"): report_label = "2" elif item["report_date"].endswith("0930"): report_label = "3" elif item["report_date"].endswith("1231"): report_label = "4" item["report_label"] = report_label coll.update_one( { "code": item["code"], "report_label": report_label, "report_date": item["report_date"], "report_type": item["report_type"], "report_date_stamp": QA_util_date_stamp( item["report_date"]), "ann_date_stamp": QA_util_date_stamp(item["ann_date"]), "f_ann_date_stamp": QA_util_date_stamp(item["f_ann_date"]), }, {"$set": item}, upsert=True, ) # 如果不指定起始年份,默认从 1990-01-01 开始 if not start: start = pd.Timestamp("1990-01-01") else: start = pd.Timestamp(start) if not end: end = pd.Timestamp(datetime.date.today()) else: end = pd.Timestamp(end) # 生成报告期列表 start_year = start.year end_year = end.year origin_report_dates = pd.Series([ pd.Timestamp(year + date_tail) for year in pd.date_range( str(start_year), str(end_year + 1), freq="1Y").map(str).str.slice(0, 4) for date_tail in REPORT_DATE_TAILS ]) report_dates = origin_report_dates.loc[(origin_report_dates >= start) & (origin_report_dates <= end)] # Tushare 接口配置 pro = get_pro() # 对 SHEET_TYPE 中所列财务报表进行循环 for sheet_type in SHEET_TYPE: # 对指定报告期列表进行循环 for report_date in report_dates: _ts_update_all( report_date=report_date.strftime("%Y%m%d"), report_type=report_type, sheet_type=sheet_type, wait_seconds=wait_seconds, max_trial=max_trial, )
def QA_fetch_get_individual_financial(code: str, start: Union[str, datetime.datetime, pd.Timestamp] = None, end: Union[str, datetime.datetime, pd.Timestamp] = None, report_date: Union[ str, datetime.datetime] = None, sheet_type: str = "income", report_type: Union[int, str] = 1, fields: Union[str, Tuple, List] = None, wait_seconds: int = 61, max_trial: int = 3) -> pd.DataFrame: """个股财务报表网络查询接口,注意,这里的 start 与 end 是针对 report_date 进行范围查询 Args: code (str): 股票代码 start (Union[str, datetime.datetime, pd.Timestamp], optional): 查询起始时间,默认为 None end (Union[str, datetime.datetime, pd.Timestamp], optional): 查询结束时间,默认为 None report_date (Union[str, datetime.datetime], optional): 报告期. 默认为 None,如果使用了 report_date, 则 start 与 end 参数不再起作用 sheet_type (str, optional): 报表类型,默认为 "income" 类型 (利润表 "income"| 资产负债表 "balancesheet"| 现金流量表 "cashflow"| 业绩预告 "forecast"| 业绩快报 "express") report_type (Union[int, str], optional): 报告类型. 默认为 1。 (1 合并报表 上市公司最新报表(默认)| 2 单季合并 单一季度的合并报表 | 3 调整单季合并表 调整后的单季合并报表(如果有) | 4 调整合并报表 本年度公布上年同期的财务报表数据,报告期为上年度 | 5 调整前合并报表 数据发生变更,将原数据进行保留,即调整前的原数据 | 6 母公司报表 该公司母公司的财务报表数据 | 7 母公司单季表 母公司的单季度表 | 8 母公司调整单季表 母公司调整后的单季表 | 9 母公司调整表 该公司母公司的本年度公布上年同期的财务报表数据 | 10 母公司调整前报表 母公司调整之前的原始财务报表数据 | 11 调整前合并报表 调整之前合并报表原数据 | 12 母公司调整前报表 母公司报表发生变更前保留的原数据) fields (Union[str, Tuple, List], optional): 指定数据范围,如果设置为 None,则返回所有数据. 默认为 None. wait_seconds (int, optional): 等待重试时间. 默认为 61 秒. max_trial (int, optional): 最大重试次数. 默认为 3. Returns: pd.DataFrame: 返回指定个股时间范围内指定类型的报表数据 """ def _get_individual_financial(code, report_date, report_type, sheet_type, fields, wait_seconds, trial_count): nonlocal pro, max_trial if trial_count >= max_trial: raise ValueError("[ERROR]\tEXCEED MAX TRIAL!") try: if not fields: df = eval( f"pro.{sheet_type}(ts_code='{code}', period='{report_date}', report_type={report_type})" ) else: df = eval( f"pro.{sheet_type}(ts_code='{code}', period='{report_date}', report_type={report_type}, fields={fields})" ) return df.rename(columns={ "ts_code": "code", "end_date": "report_date" }) except Exception as e: print(e) time.sleep(wait_seconds) _get_individual_financial(code, report_date, report_type, sheet_type, fields, wait_seconds, trial_count + 1) pro = get_pro() report_type = int(report_type) if (not start) and (not end) and (not report_date): raise ValueError( "[QRY_DATES ERROR]\tparam 'start', 'end' and 'report_date' should not be none at the same time!" ) if isinstance(fields, str): fields = sorted( list( set([ fields, "ts_code", "end_date", "ann_date", "f_ann_date", "report_type", "update_flag" ]))) if report_date: report_date = pd.Timestamp(report_date) year = report_date.year report_date_lists = [ pd.Timestamp(str(year) + report_date_tail) for report_date_tail in REPORT_DATE_TAILS ] if report_date not in report_date_lists: raise ValueError("[REPORT_DATE ERROR]") if sheet_type not in [ "income", "balancesheet", "cashflow", "forecast", "express" ]: raise ValueError("[SHEET_TYPE ERROR]") if report_type not in range(1, 13): raise ValueError("[REPORT_TYPE ERROR]") report_dates = [report_date] else: start = pd.Timestamp(start) start_year = start.year end = pd.Timestamp(end) end_year = end.year origin_year_ranges = pd.date_range(str(start_year), str(end_year + 1), freq='Y').map(str).str.slice( 0, 4).tolist() origin_report_ranges = pd.Series([ pd.Timestamp(year + report_date_tail) for year in origin_year_ranges for report_date_tail in REPORT_DATE_TAILS ]) report_dates = origin_report_ranges.loc[ (origin_report_ranges >= start) & (origin_report_ranges <= end)] df = pd.DataFrame() for report_date in report_dates: df = df.append( _get_individual_financial( code=QA_fmt_code(code, "ts"), report_date=report_date.strftime("%Y%m%d"), report_type=report_type, sheet_type=sheet_type, fields=fields, wait_seconds=wait_seconds, trial_count=0)) df.code = QA_fmt_code_list(df.code) return df.reset_index(drop=True)
def QA_fetch_get_daily_basic(code: Union[str, List, Tuple] = None, trade_date: Union[str, pd.Timestamp, datetime.datetime] = None, fields: Union[str, List, Tuple] = None, wait_seconds: int = 61, max_trial: int = 3) -> pd.DataFrame: """ 从网络获取市场指定交易日重要基本面指标,用于选股分析和报表展示 Args: code(Union[str, List, Tuple], optional): 指定股票代码,默认为 None,即对应交易日的全市场股票 trade_date(Union[str, pd.Timestamp, datetime.datetime], optional): 指定交易日期, 默认为 None, 即距离当前 日期最近的交易日 fields(Union[str, List, Tuple], optional): 默认为 None,如果指定为某一单个 str,默认返回 DataFrame 包括 交易日等附加信息 wait_seconds (int, optional): 查询超时时间, 默认为 61. max_trial (int, optional): 查询最大尝试次数, 默认为 3. Returns: pd.DataFrame: 指定交易日指定范围指定标的的每日基本面指标信息 """ def _fetch_get_daily_basic(trade_date, fields, trial_count): nonlocal pro, max_trial try: if trial_count >= max_trial: raise ValueError("[ERROR]\tEXCEED MAX TRIAL!") if not trade_date: trade_date = QA_util_get_pre_trade_date( datetime.date.today(), 1).replace("-", "") else: trade_date = pd.Timestamp(trade_date).strftime("%Y%m%d") if not fields: qry = f"pro.daily_basic(trade_date={trade_date})" else: if isinstance(fields, str): fields = list(set([fields] + ["ts_code", "trade_date"])) fields = ",".join(fields) qry = "pro.daily_basic(trade_date={trade_date}, fields={fields})" df = eval(qry) if df is None: raise ValueError("[ERROR]") return df except: time.sleep(61) _fetch_get_daily_basic(trade_date, fields, trial_count + 1) pro = get_pro() df = _fetch_get_daily_basic(trade_date=trade_date, fields=fields, trial_count=0) if df.empty: return df else: df = df.rename(columns={"ts_code": "code"}) df.code = QA_fmt_code_list(df.code) df = df.set_index("code") if not code: return df if isinstance(code, str): code = (code, ) # exclude code which not in rtn dataframe filter_idx = df.index.intersection(code) return df.loc[filter_idx]
def QA_fetch_get_crosssection_financial(report_date: Union[str, datetime.datetime, pd.Timestamp], report_type: Union[int, str] = 1, sheet_type: str = "income", fields: Union[str, Tuple, List] = None, wait_seconds: int = 61, max_trial: int = 3) -> pd.DataFrame: """截面财务报表网络查询接口 Args: report_date (Union[str, datetime.datetime, pd.Timestamp]): 报告期 report_type (Union[int, str], optional): 报告类型,默认值为 1. (1 合并报表 上市公司最新报表(默认)| 2 单季合并 单一季度的合并报表 | 3 调整单季合并表 调整后的单季合并报表(如果有) | 4 调整合并报表 本年度公布上年同期的财务报表数据,报告期为上年度 | 5 调整前合并报表 数据发生变更,将原数据进行保留,即调整前的原数据 | 6 母公司报表 该公司母公司的财务报表数据 | 7 母公司单季表 母公司的单季度表 | 8 母公司调整单季表 母公司调整后的单季表 | 9 母公司调整表 该公司母公司的本年度公布上年同期的财务报表数据 | 10 母公司调整前报表 母公司调整之前的原始财务报表数据 | 11 调整前合并报表 调整之前合并报表原数据 | 12 母公司调整前报表 母公司报表发生变更前保留的原数据) sheet_type (str, optional): 报表类型,默认为 "income". (利润表 "income"| 资产负债表 "balancesheet"| 现金流量表 "cashflow"| 业绩预告 "forecast"| 业绩快报 "express") fields (Union[str, List], optional): 数据范围,默认为 None,返回所有数据. wait_seconds (int, optional): 查询超时时间, 默认为 61. max_trial (int, optional): 查询最大尝试次数, 默认为 3. Returns: pd.DataFrame: 指定报告期的指定财务报表数据 """ def _get_crosssection_financial(report_date, report_type, sheet_type, fields, wait_seconds, trial_count): nonlocal pro, max_trial if trial_count >= max_trial: raise ValueError("[ERROR]\tEXCEED MAX TRIAL!") try: if not fields: print( f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type})" ) df = eval( f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type})" ) else: df = eval( f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type}, fields={fields})" ) if df.empty: return df df.ts_code = QA_fmt_code_list(df.ts_code) return df.rename(columns={ "ts_code": "code", "end_date": "report_date" }).sort_values(by=['ann_date', 'f_ann_date']) except Exception as e: print(e) time.sleep(wait_seconds) _get_crosssection_financial(report_date, report_type, sheet_type, fields, wait_seconds, trial_count + 1) # Tushare 账号配置 pro = get_pro() # 设置标准报告期格式 report_date = pd.Timestamp(report_date) report_type = int(report_type) year = report_date.year std_report_dates = [ str(year) + report_date_tail for report_date_tail in REPORT_DATE_TAILS ] # Tushare 接口支持的日期格式 if report_date.strftime("%Y%m%d") not in std_report_dates: raise ValueError("[REPORT_DATE ERROR]") # fields 格式化处理 if isinstance(fields, str): fields = sorted( list( set([ fields, "ts_code", "end_date", "ann_date", "f_ann_date", "report_type", "update_flag" ]))) # 目前支持利润表,资产负债表和现金流量表 if sheet_type not in SHEET_TYPE: raise ValueError("[SHEET_TYPE ERROR]") if report_type not in range(1, 13): raise ValueError("[REPORT_TYTPE ERROR]") return _get_crosssection_financial( report_date=report_date.strftime("%Y%m%d"), report_type=report_type, sheet_type=sheet_type, fields=fields, wait_seconds=wait_seconds, trial_count=0)
def QA_fetch_get_fina_indicator(report_date: Union[str, datetime.datetime, pd.Timestamp], fields: Union[str, Tuple, List] = None, wait_seconds: int = 61, max_trial: int = 3) -> pd.DataFrame: """截面获取上市公司财务指标数据网络查询接口 Args: report_date (Union[str, datetime.datetime, pd.Timestamp]): 报告期 fields (Union[str, List], optional): 数据范围,默认为 None,返回所有数据. wait_seconds (int, optional): 查询超时时间, 默认为 61. max_trial (int, optional): 查询最大尝试次数, 默认为 3. Returns: pd.DataFrame: 指定报告期的指定财务报表数据 """ def _get_fina_indicator(report_date, fields, wait_seconds, trial_count): nonlocal pro, max_trial if trial_count >= max_trial: raise ValueError("[ERROR]\tEXCEED MAX TRIAL!") try: if not fields: print(f"pro.fina_indicator_vip(period='{report_date}')") df = eval(f"pro.fina_indicator_vip(period='{report_date}')") else: df = eval( f"pro.fina_indicator_vip(period='{report_date}', fields={fields})" ) if df.empty: return df df.ts_code = QA_fmt_code_list(df.ts_code) return df.rename(columns={ "ts_code": "code", "end_date": "report_date" }).sort_values(by=['ann_date']) except Exception as e: print(e) time.sleep(wait_seconds) _get_fina_indicator(report_date, fields, wait_seconds, trial_count + 1) # Tushare 账号配置 pro = get_pro() # 设置标准报告期格式 report_date = pd.Timestamp(report_date) year = report_date.year std_report_dates = [ str(year) + report_date_tail for report_date_tail in REPORT_DATE_TAILS ] # Tushare 接口支持的日期格式 if report_date.strftime("%Y%m%d") not in std_report_dates: raise ValueError("[REPORT_DATE ERROR]") # fields 格式化处理 if isinstance(fields, str): fields = sorted( list( set([fields, "ts_code", "end_date", "ann_date", "update_flag"]))) return _get_fina_indicator(report_date=report_date.strftime("%Y%m%d"), fields=fields, wait_seconds=wait_seconds, trial_count=0)