Exemplo n.º 1
0
def QA_ts_update_stock_basic():
    """
    本地化所有股票基本信息
    """
    coll = DATABASE.stock_basic
    coll.create_index(
        [("code", ASCENDING), ("status", ASCENDING),
         ("list_date_stamp", ASCENDING)],
        unique=True,
    )

    # 初始化数据接口
    pro = get_pro()
    # 获取所有数据
    df_1 = pro.stock_basic(exchange="", list_status="L")
    df_2 = pro.stock_basic(exchange="", list_status="P")
    df_3 = pro.stock_basic(exchange="", list_status="D")
    df_1["status"] = "L"
    df_2["status"] = "P"
    df_3["status"] = "D"
    df = df_1.append(df_2).append(df_3)
    df["code"] = QA_fmt_code_list(df.ts_code)
    df["list_date_stamp"] = df.list_date.apply(QA_util_date_stamp)
    df = df.where(pd.notnull(df), None)
    js = QA_util_to_json_from_pandas(df.drop(columns=["ts_code", "symbol"]))
    for item in js:
        qry = {
            "code": item["code"],
            "status": item["status"],
            "list_date_stamp": item["list_date_stamp"],
        }
        if coll.count_documents(qry) == 0:  # 增量更新
            coll.insert_one(item)
Exemplo n.º 2
0
def QA_ts_update_namechange():
    """
    保存所有股票的历史曾用名
    """
    # 建表
    coll = DATABASE.namechange
    coll.create_index(
        [
            ("code", ASCENDING),
            ("start_date_stamp", ASCENDING),
            ("end_date_stamp", ASCENDING),
            ("ann_date_stamp", ASCENDING),
        ],
        unique=True,
    )
    # 初始化数据接口
    pro = get_pro()
    # 获取历史所有股票
    symbol_list = sorted(
        list(set(QA_fmt_code_list(QA_fetch_stock_basic().index.tolist(),
                                  "ts"))))
    df = pd.DataFrame()
    for i, symbol in enumerate(symbol_list):
        if i % 100 == 0:
            print(f"Saving {i}th stock name, stock is {symbol}")
        try:
            df = df.append(pro.namechange(ts_code=symbol))
        except:
            time.sleep(1)
            try:
                df = df.append(pro.namechange(ts_code=symbol))
            except:
                raise ValueError("[ERROR]\t数据获取失败")
    # df.to_csv("test.csv")
    df["code"] = QA_fmt_code_list(df["ts_code"])
    df["start_date_stamp"] = df["start_date"].apply(QA_util_date_stamp)
    df["end_date_stamp"] = df["end_date"].apply(QA_util_date_stamp)
    df["ann_date_stamp"] = df["ann_date"].apply(QA_util_date_stamp)
    df = df.where(pd.notnull(df), None)
    js = QA_util_to_json_from_pandas(df.drop(columns=["ts_code"]))
    for item in js:
        if not item["end_date"]:
            item["end_date_stamp"] = 9999999999
        qry = {
            "code": item["code"],
            "start_date_stamp": item["start_date_stamp"],
            "end_date_stamp": item["end_date_stamp"],
            "ann_date_stamp": item["ann_date_stamp"],
        }
        if coll.count_documents(qry) == 0:
            coll.insert_one(item)
Exemplo n.º 3
0
def QA_ts_update_industry(
    level: Union[str, List, Tuple] = ["L1", "L2", "L3"],
    src: Union[str, List, Tuple] = "SW2021",
):
    """
    保存个股行业信息
    """
    pro = get_pro()
    if isinstance(level, str):
        level = [level]
    if isinstance(src, str):
        src = [src]
    df_industry = pd.DataFrame()
    for s in src:
        for lv in level:
            try:
                df_tmp = pro.index_classify(level=lv, src=s)
                df_tmp["src"] = "sw"
                df_industry = df_industry.append(df_tmp)
            except Exception as e1:
                print(e1)
                time.sleep(1)
                try:
                    df_tmp = pro.index_classify(level=lv, src=s)
                    df_tmp["src"] = "sw"
                    df_industry = df_industry.append(df_tmp)
                except Exception as e2:
                    raise ValueError(e2)
    df_results = pd.DataFrame()
    for idx, item in df_industry.iterrows():
        time.sleep(0.3)
        if idx % 100 == 0:
            print(f"currently saving {idx}th record")
        try:
            df_tmp = pro.index_member(index_code=item["index_code"])
            df_tmp["industry_name"] = item["industry_name"]
            df_tmp["level"] = item["level"].lower()
            df_tmp["src"] = item["src"].lower()
            df_results = df_results.append(df_tmp)
        except Exception as e1:
            print(e1)
            time.sleep(1)
            try:
                df_tmp = pro.index_member(index_code=item["index_code"])
                df_tmp["industry_name"] = item["industry_name"]
                df_tmp["level"] = item["level"].lower()
                df_tmp["src"] = item["src"].lower()
                df_results = df_results.append(df_tmp)
            except Exception as e2:
                raise ValueError(e2)
    df_results.con_code = QA_fmt_code_list(df_results.con_code)
    df_results = df_results.rename(columns={"con_code": "code"})
    df_results = df_results.sort_values(by="code")
    coll = DATABASE.industry
    coll.create_index(
        [
            ("code", ASCENDING),
            ("level", ASCENDING),
            ("src", ASCENDING),
            ("in_date_stamp", DESCENDING),
            ("out_date_stamp", DESCENDING),
        ],
        unique=False,
    )
    for item in QA_util_to_json_from_pandas(df_results):
        item["in_date_stamp"] = QA_util_date_stamp(item["in_date"])
        if not item["out_date"]:
            item["out_date_stamp"] = 9999999999
        else:
            item["out_date_stamp"] = QA_util_date_stamp(item["out_date_stamp"])
        coll.update_one(
            {
                "code": item["code"],
                "level": item["level"],
                "src": item["src"],
                "in_date_stamp": item["in_date_stamp"],
            },
            {"$set": item},
            upsert=True,
        )
    print('finished saving industry')
Exemplo n.º 4
0
def QA_ts_update_all(
    start: Union[str, datetime.datetime] = None,
    end: Union[str, datetime.datetime] = None,
    wait_seconds: int = 61,
    report_type: Union[int, str] = "1",
    max_trial=3,
) -> pd.DataFrame:
    """
    全量更新

    ---
    :param start: 开始时间 (以 report_date 作为比较标准)
    :param end: 结束时间 (以 report_date 作为比较标准)
    :param report_type: 报告类型
    :param wait_seconds: 等待时间
    :param max_trial: 超时重试次数
    """
    def _ts_update_all(report_date, report_type, sheet_type, wait_seconds,
                       max_trial):
        # 获取指定报告期,指定报告类型,指定报表类型的截面数据
        df = QA_fetch_get_crosssection_financial(
            report_date=report_date,
            report_type=report_type,
            sheet_type=sheet_type,
            wait_seconds=wait_seconds,
            max_trial=max_trial,
        )
        coll = eval(f"DATABASE.{sheet_type}")
        # 考虑到查找的方式,可能根据股票代码查找,可能根据报告期查找,可能根据公告期查找,可能根据最后公告期查找,可能根据报告类型查找
        coll.create_index(
            [
                ("code", ASCENDING),
                ("report_label", ASCENDING),
                ("report_date", DESCENDING),
                ("report_type", ASCENDING),
                ("report_date_stamp", ASCENDING),
                ("ann_date_stamp", ASCENDING),
                ("f_ann_date_stamp", ASCENDING),
            ],
            unique=False,
        )
        # FIXME: insert_many may be better
        for item in QA_util_to_json_from_pandas(df):
            report_label = None
            if item["report_date"].endswith("0331"):
                report_label = "1"
            elif item["report_date"].endswith("0630"):
                report_label = "2"
            elif item["report_date"].endswith("0930"):
                report_label = "3"
            elif item["report_date"].endswith("1231"):
                report_label = "4"
            item["report_label"] = report_label
            coll.update_one(
                {
                    "code": item["code"],
                    "report_label": report_label,
                    "report_date": item["report_date"],
                    "report_type": item["report_type"],
                    "report_date_stamp": QA_util_date_stamp(
                        item["report_date"]),
                    "ann_date_stamp": QA_util_date_stamp(item["ann_date"]),
                    "f_ann_date_stamp": QA_util_date_stamp(item["f_ann_date"]),
                },
                {"$set": item},
                upsert=True,
            )

    # 如果不指定起始年份,默认从 1990-01-01 开始
    if not start:
        start = pd.Timestamp("1990-01-01")
    else:
        start = pd.Timestamp(start)
    if not end:
        end = pd.Timestamp(datetime.date.today())
    else:
        end = pd.Timestamp(end)

    # 生成报告期列表
    start_year = start.year
    end_year = end.year
    origin_report_dates = pd.Series([
        pd.Timestamp(year + date_tail) for year in pd.date_range(
            str(start_year), str(end_year +
                                 1), freq="1Y").map(str).str.slice(0, 4)
        for date_tail in REPORT_DATE_TAILS
    ])
    report_dates = origin_report_dates.loc[(origin_report_dates >= start)
                                           & (origin_report_dates <= end)]

    # Tushare 接口配置
    pro = get_pro()

    # 对 SHEET_TYPE 中所列财务报表进行循环
    for sheet_type in SHEET_TYPE:
        # 对指定报告期列表进行循环
        for report_date in report_dates:
            _ts_update_all(
                report_date=report_date.strftime("%Y%m%d"),
                report_type=report_type,
                sheet_type=sheet_type,
                wait_seconds=wait_seconds,
                max_trial=max_trial,
            )
Exemplo n.º 5
0
def QA_fetch_get_individual_financial(code: str,
                                      start: Union[str, datetime.datetime,
                                                   pd.Timestamp] = None,
                                      end: Union[str, datetime.datetime,
                                                 pd.Timestamp] = None,
                                      report_date: Union[
                                          str, datetime.datetime] = None,
                                      sheet_type: str = "income",
                                      report_type: Union[int, str] = 1,
                                      fields: Union[str, Tuple, List] = None,
                                      wait_seconds: int = 61,
                                      max_trial: int = 3) -> pd.DataFrame:
    """个股财务报表网络查询接口,注意,这里的 start 与 end 是针对 report_date 进行范围查询

    Args:
        code (str): 股票代码
        start (Union[str, datetime.datetime, pd.Timestamp], optional): 查询起始时间,默认为 None
        end (Union[str, datetime.datetime, pd.Timestamp], optional): 查询结束时间,默认为 None
        report_date (Union[str, datetime.datetime], optional): 报告期. 默认为 None,如果使用了 report_date, 则 start 与 end 参数不再起作用
        sheet_type (str, optional): 报表类型,默认为 "income" 类型
            (利润表 "income"|
            资产负债表 "balancesheet"|
            现金流量表 "cashflow"|
            业绩预告 "forecast"|
            业绩快报 "express")
        report_type (Union[int, str], optional): 报告类型. 默认为 1。
            (1	合并报表	上市公司最新报表(默认)|
            2	单季合并	单一季度的合并报表 |
            3	调整单季合并表	调整后的单季合并报表(如果有) |
            4	调整合并报表	本年度公布上年同期的财务报表数据,报告期为上年度 |
            5	调整前合并报表	数据发生变更,将原数据进行保留,即调整前的原数据 |
            6	母公司报表	该公司母公司的财务报表数据 |
            7	母公司单季表	母公司的单季度表 |
            8	母公司调整单季表	母公司调整后的单季表 |
            9	母公司调整表	该公司母公司的本年度公布上年同期的财务报表数据 |
            10 母公司调整前报表	母公司调整之前的原始财务报表数据 |
            11 调整前合并报表	调整之前合并报表原数据 |
            12 母公司调整前报表	母公司报表发生变更前保留的原数据)
        fields (Union[str, Tuple, List], optional): 指定数据范围,如果设置为 None,则返回所有数据. 默认为 None.
        wait_seconds (int, optional): 等待重试时间. 默认为 61 秒.
        max_trial (int, optional): 最大重试次数. 默认为 3.

    Returns:
        pd.DataFrame: 返回指定个股时间范围内指定类型的报表数据
    """
    def _get_individual_financial(code, report_date, report_type, sheet_type,
                                  fields, wait_seconds, trial_count):
        nonlocal pro, max_trial
        if trial_count >= max_trial:
            raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
        try:
            if not fields:
                df = eval(
                    f"pro.{sheet_type}(ts_code='{code}', period='{report_date}', report_type={report_type})"
                )
            else:
                df = eval(
                    f"pro.{sheet_type}(ts_code='{code}', period='{report_date}', report_type={report_type}, fields={fields})"
                )
            return df.rename(columns={
                "ts_code": "code",
                "end_date": "report_date"
            })
        except Exception as e:
            print(e)
            time.sleep(wait_seconds)
            _get_individual_financial(code, report_date, report_type,
                                      sheet_type, fields, wait_seconds,
                                      trial_count + 1)

    pro = get_pro()
    report_type = int(report_type)
    if (not start) and (not end) and (not report_date):
        raise ValueError(
            "[QRY_DATES ERROR]\tparam 'start', 'end' and 'report_date' should not be none at the same time!"
        )
    if isinstance(fields, str):
        fields = sorted(
            list(
                set([
                    fields, "ts_code", "end_date", "ann_date", "f_ann_date",
                    "report_type", "update_flag"
                ])))
    if report_date:
        report_date = pd.Timestamp(report_date)
        year = report_date.year
        report_date_lists = [
            pd.Timestamp(str(year) + report_date_tail)
            for report_date_tail in REPORT_DATE_TAILS
        ]
        if report_date not in report_date_lists:
            raise ValueError("[REPORT_DATE ERROR]")
        if sheet_type not in [
                "income", "balancesheet", "cashflow", "forecast", "express"
        ]:
            raise ValueError("[SHEET_TYPE ERROR]")
        if report_type not in range(1, 13):
            raise ValueError("[REPORT_TYPE ERROR]")
        report_dates = [report_date]
    else:
        start = pd.Timestamp(start)
        start_year = start.year
        end = pd.Timestamp(end)
        end_year = end.year
        origin_year_ranges = pd.date_range(str(start_year),
                                           str(end_year + 1),
                                           freq='Y').map(str).str.slice(
                                               0, 4).tolist()
        origin_report_ranges = pd.Series([
            pd.Timestamp(year + report_date_tail)
            for year in origin_year_ranges
            for report_date_tail in REPORT_DATE_TAILS
        ])
        report_dates = origin_report_ranges.loc[
            (origin_report_ranges >= start) & (origin_report_ranges <= end)]
    df = pd.DataFrame()
    for report_date in report_dates:
        df = df.append(
            _get_individual_financial(
                code=QA_fmt_code(code, "ts"),
                report_date=report_date.strftime("%Y%m%d"),
                report_type=report_type,
                sheet_type=sheet_type,
                fields=fields,
                wait_seconds=wait_seconds,
                trial_count=0))
    df.code = QA_fmt_code_list(df.code)
    return df.reset_index(drop=True)
Exemplo n.º 6
0
def QA_fetch_get_daily_basic(code: Union[str, List, Tuple] = None,
                             trade_date: Union[str, pd.Timestamp,
                                               datetime.datetime] = None,
                             fields: Union[str, List, Tuple] = None,
                             wait_seconds: int = 61,
                             max_trial: int = 3) -> pd.DataFrame:
    """
    从网络获取市场指定交易日重要基本面指标,用于选股分析和报表展示

    Args:
        code(Union[str, List, Tuple], optional): 指定股票代码,默认为 None,即对应交易日的全市场股票
        trade_date(Union[str, pd.Timestamp, datetime.datetime], optional): 指定交易日期, 默认为 None, 即距离当前
            日期最近的交易日
        fields(Union[str, List, Tuple], optional): 默认为 None,如果指定为某一单个 str,默认返回 DataFrame 包括
            交易日等附加信息
        wait_seconds (int, optional): 查询超时时间, 默认为 61.
        max_trial (int, optional): 查询最大尝试次数, 默认为 3.

    Returns:
        pd.DataFrame: 指定交易日指定范围指定标的的每日基本面指标信息
    """
    def _fetch_get_daily_basic(trade_date, fields, trial_count):
        nonlocal pro, max_trial
        try:
            if trial_count >= max_trial:
                raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
            if not trade_date:
                trade_date = QA_util_get_pre_trade_date(
                    datetime.date.today(), 1).replace("-", "")
            else:
                trade_date = pd.Timestamp(trade_date).strftime("%Y%m%d")
            if not fields:
                qry = f"pro.daily_basic(trade_date={trade_date})"
            else:
                if isinstance(fields, str):
                    fields = list(set([fields] + ["ts_code", "trade_date"]))
                fields = ",".join(fields)
                qry = "pro.daily_basic(trade_date={trade_date}, fields={fields})"
            df = eval(qry)
            if df is None:
                raise ValueError("[ERROR]")
            return df
        except:
            time.sleep(61)
            _fetch_get_daily_basic(trade_date, fields, trial_count + 1)

    pro = get_pro()
    df = _fetch_get_daily_basic(trade_date=trade_date,
                                fields=fields,
                                trial_count=0)
    if df.empty:
        return df
    else:
        df = df.rename(columns={"ts_code": "code"})
        df.code = QA_fmt_code_list(df.code)
        df = df.set_index("code")
    if not code:
        return df
    if isinstance(code, str):
        code = (code, )
    # exclude code which not in rtn dataframe
    filter_idx = df.index.intersection(code)
    return df.loc[filter_idx]
Exemplo n.º 7
0
def QA_fetch_get_crosssection_financial(report_date: Union[str,
                                                           datetime.datetime,
                                                           pd.Timestamp],
                                        report_type: Union[int, str] = 1,
                                        sheet_type: str = "income",
                                        fields: Union[str, Tuple, List] = None,
                                        wait_seconds: int = 61,
                                        max_trial: int = 3) -> pd.DataFrame:
    """截面财务报表网络查询接口

    Args:
        report_date (Union[str, datetime.datetime, pd.Timestamp]): 报告期
        report_type (Union[int, str], optional): 报告类型,默认值为 1.
            (1	合并报表	上市公司最新报表(默认)|
             2	单季合并	单一季度的合并报表 |
             3	调整单季合并表	调整后的单季合并报表(如果有) |
             4	调整合并报表	本年度公布上年同期的财务报表数据,报告期为上年度 |
             5	调整前合并报表	数据发生变更,将原数据进行保留,即调整前的原数据 |
             6	母公司报表	该公司母公司的财务报表数据 |
             7	母公司单季表	母公司的单季度表 |
             8	母公司调整单季表	母公司调整后的单季表 |
             9	母公司调整表	该公司母公司的本年度公布上年同期的财务报表数据 |
             10 母公司调整前报表	母公司调整之前的原始财务报表数据 |
             11 调整前合并报表	调整之前合并报表原数据 |
             12 母公司调整前报表	母公司报表发生变更前保留的原数据)

        sheet_type (str, optional): 报表类型,默认为 "income".
            (利润表 "income"|
             资产负债表 "balancesheet"|
             现金流量表 "cashflow"|
             业绩预告 "forecast"|
             业绩快报 "express")
        fields (Union[str, List], optional): 数据范围,默认为 None,返回所有数据.
        wait_seconds (int, optional): 查询超时时间, 默认为 61.
        max_trial (int, optional): 查询最大尝试次数, 默认为 3.

    Returns:
        pd.DataFrame: 指定报告期的指定财务报表数据
    """
    def _get_crosssection_financial(report_date, report_type, sheet_type,
                                    fields, wait_seconds, trial_count):
        nonlocal pro, max_trial
        if trial_count >= max_trial:
            raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
        try:
            if not fields:
                print(
                    f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type})"
                )
                df = eval(
                    f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type})"
                )
            else:
                df = eval(
                    f"pro.{sheet_type}_vip(period='{report_date}', report_type={report_type}, fields={fields})"
                )
            if df.empty:
                return df
            df.ts_code = QA_fmt_code_list(df.ts_code)
            return df.rename(columns={
                "ts_code": "code",
                "end_date": "report_date"
            }).sort_values(by=['ann_date', 'f_ann_date'])
        except Exception as e:
            print(e)
            time.sleep(wait_seconds)
            _get_crosssection_financial(report_date, report_type, sheet_type,
                                        fields, wait_seconds, trial_count + 1)

    # Tushare 账号配置
    pro = get_pro()

    # 设置标准报告期格式
    report_date = pd.Timestamp(report_date)
    report_type = int(report_type)
    year = report_date.year
    std_report_dates = [
        str(year) + report_date_tail for report_date_tail in REPORT_DATE_TAILS
    ]

    # Tushare 接口支持的日期格式
    if report_date.strftime("%Y%m%d") not in std_report_dates:
        raise ValueError("[REPORT_DATE ERROR]")

    # fields 格式化处理
    if isinstance(fields, str):
        fields = sorted(
            list(
                set([
                    fields, "ts_code", "end_date", "ann_date", "f_ann_date",
                    "report_type", "update_flag"
                ])))

    # 目前支持利润表,资产负债表和现金流量表
    if sheet_type not in SHEET_TYPE:
        raise ValueError("[SHEET_TYPE ERROR]")
    if report_type not in range(1, 13):
        raise ValueError("[REPORT_TYTPE ERROR]")

    return _get_crosssection_financial(
        report_date=report_date.strftime("%Y%m%d"),
        report_type=report_type,
        sheet_type=sheet_type,
        fields=fields,
        wait_seconds=wait_seconds,
        trial_count=0)
Exemplo n.º 8
0
def QA_fetch_get_fina_indicator(report_date: Union[str, datetime.datetime,
                                                   pd.Timestamp],
                                fields: Union[str, Tuple, List] = None,
                                wait_seconds: int = 61,
                                max_trial: int = 3) -> pd.DataFrame:
    """截面获取上市公司财务指标数据网络查询接口

    Args:
        report_date (Union[str, datetime.datetime, pd.Timestamp]): 报告期
        fields (Union[str, List], optional): 数据范围,默认为 None,返回所有数据.
        wait_seconds (int, optional): 查询超时时间, 默认为 61.
        max_trial (int, optional): 查询最大尝试次数, 默认为 3.

    Returns:
        pd.DataFrame: 指定报告期的指定财务报表数据
    """
    def _get_fina_indicator(report_date, fields, wait_seconds, trial_count):
        nonlocal pro, max_trial
        if trial_count >= max_trial:
            raise ValueError("[ERROR]\tEXCEED MAX TRIAL!")
        try:
            if not fields:
                print(f"pro.fina_indicator_vip(period='{report_date}')")
                df = eval(f"pro.fina_indicator_vip(period='{report_date}')")
            else:
                df = eval(
                    f"pro.fina_indicator_vip(period='{report_date}', fields={fields})"
                )
            if df.empty:
                return df
            df.ts_code = QA_fmt_code_list(df.ts_code)
            return df.rename(columns={
                "ts_code": "code",
                "end_date": "report_date"
            }).sort_values(by=['ann_date'])
        except Exception as e:
            print(e)
            time.sleep(wait_seconds)
            _get_fina_indicator(report_date, fields, wait_seconds,
                                trial_count + 1)

    # Tushare 账号配置
    pro = get_pro()

    # 设置标准报告期格式
    report_date = pd.Timestamp(report_date)
    year = report_date.year
    std_report_dates = [
        str(year) + report_date_tail for report_date_tail in REPORT_DATE_TAILS
    ]

    # Tushare 接口支持的日期格式
    if report_date.strftime("%Y%m%d") not in std_report_dates:
        raise ValueError("[REPORT_DATE ERROR]")

    # fields 格式化处理
    if isinstance(fields, str):
        fields = sorted(
            list(
                set([fields, "ts_code", "end_date", "ann_date",
                     "update_flag"])))

    return _get_fina_indicator(report_date=report_date.strftime("%Y%m%d"),
                               fields=fields,
                               wait_seconds=wait_seconds,
                               trial_count=0)