Пример #1
0
def get_last_reported_values(financial_indicator, recent_report_type):

    # 取出当天所有出现的财报类型

    unique_recent_report_type = recent_report_type.unique().tolist()

    last_reported_values = pd.Series()

    # 循环每一类型的报告,再合并返回

    for report_type in unique_recent_report_type:

        stock_list = recent_report_type[recent_report_type ==
                                        report_type].index.tolist()

        if len(stock_list) == 1:

            last_reported_values = last_reported_values.append(
                rqdatac.get_financials(
                    rqdatac.query(financial_indicator).filter(
                        rqdatac.financials.stockcode.in_(stock_list)),
                    report_type))

        else:

            last_reported_values = last_reported_values.append(
                rqdatac.get_financials(
                    rqdatac.query(financial_indicator).filter(
                        rqdatac.financials.stockcode.in_(stock_list)),
                    report_type).iloc[0])

    return last_reported_values
Пример #2
0
def get_ttm_sum(financial_indicator, recent_report_type):
    def _get_ttm_date(quarter):
        # 假设最新的为年报,则为年报数值
        if quarter[-2:] == "q4":
            return [np.nan, np.nan, quarter]
        # 假设当前为1/2/3季度报
        elif quarter[-2:] == "q3" or quarter[-2:] == "q2" or quarter[
                -2:] == "q1":
            return [
                str(int(quarter[:4]) - 1) + quarter[-2:],
                str(int(quarter[:4]) - 1) + "q4", quarter
            ]
        else:
            print(quarter)

            raise Exception("what?")

    # 获得所有股票中最新的quarter
    max_quarter = max(recent_report_type)
    # 获得所有股票前8期的财报数据
    financial_data = rqdatac.get_financials(rqdatac.query(financial_indicator),
                                            quarter=max_quarter,
                                            interval="8q",
                                            country='cn').T

    effective_quarter = pd.DataFrame(
        recent_report_type.apply(_get_ttm_date).to_dict()).T

    # 获得每个股票计算ttm需要的三个财报日期
    effective_quarter = effective_quarter.unstack()

    effective_quarter.index = effective_quarter.index.droplevel(0)
    merged_data = pd.DataFrame(effective_quarter)
    merged_data['mask'] = 1
    previous_quarters_mask = merged_data.dropna().reset_index().pivot(
        index='index', columns=0, values='mask').reindex(
            columns=financial_data.columns).astype(float).replace(
                np.nan, 0).astype(bool)
    latest_data = financial_data.where(previous_quarters_mask)

    # (最近一期年报财务数据 + 最近一期报告财务数据 - 去年同期报告财务数据)

    def _calc_ttm(data):
        # print(data)
        data = data.dropna().sort_index()
        if len(data) > 1:
            return data.iloc[-2:].sum() - data.iloc[0]
        elif len(data) == 1:
            return data.iloc[0]
        else:
            return np.nan

    financial_values = {
        item[0]: _calc_ttm(item[1])
        for item in latest_data.iterrows()
    }

    return pd.Series(financial_values)
Пример #3
0
    def get_financials(self, codes, years=10, type='y'):
        q = query(fds.financial_indicator.adjusted_return_on_equity_diluted,
                  fds.announce_date).filter(fds.stockcode.in_(codes))
        S = str(int(public.getDate().split("-")[0]) - 1) + 'q4'

        Y = str(years) + type
        res = rq.get_financials(q, S, interval=Y)
        d = {}
        for c in codes:
            try:
                d[c] = res.minor_xs(c)
            except:
                continue
        return d
Пример #4
0
def get_financials(query, quarter=None, interval='4q', expect_df=False):
    if quarter is None:
        valid = True
    else:
        valid = isinstance(quarter, six.string_types) and quarter[-2] == 'q'
        if valid:
            try:
                valid = 1990 <= int(quarter[:-2]) <= 2050 and 1 <= int(
                    quarter[-1]) <= 4
            except ValueError:
                valid = False
    if not valid:
        raise RQInvalidArgument(
            _(u"function {}: invalid {} argument, quarter should be in form of '2012q3', "
              u"got {} (type: {})").format('get_financials', 'quarter',
                                           quarter, type(quarter)))
    env = Environment.get_instance()
    dt = env.calendar_dt.date() - datetime.timedelta(
        days=1)  # Take yesterday's data as default
    year = dt.year
    mon = dt.month
    day = dt.day
    int_date = year * 10000 + mon * 100 + day
    q = (mon - 4) // 3 + 1
    y = year
    if q <= 0:
        y -= 1
        q = 4
    default_quarter = str(y) + 'q' + str(q)
    if quarter is None or quarter > default_quarter:
        quarter = default_quarter

    include_date = False
    for d in query.column_descriptions:
        if d['name'] == 'announce_date':
            include_date = True
    if not include_date:
        query = query.add_column(rqdatac.fundamentals.announce_date)

    result = rqdatac.get_financials(query,
                                    quarter,
                                    interval,
                                    expect_df=expect_df)
    if result is None:
        return pd.DataFrame()
    if isinstance(result, pd.Series):
        return result
    elif isinstance(result, pd.DataFrame):
        result = result[(result['announce_date'] <= int_date)
                        | pd.isnull(result['announce_date'])]
        if not include_date:
            del result['announce_date']
    else:
        d = dict()
        for order_book_id in result.minor_axis:
            df = result.minor_xs(order_book_id)
            df = df[(df.announce_date < int_date) |
                    (pd.isnull(df.announce_date))]
            d[order_book_id] = df
        pl = pd.Panel.from_dict(d, orient='minor')
        if not include_date:
            pl.drop('announce_date', axis=0, inplace=True)
            if len(pl.items) == 1:
                pl = pl[pl.items[0]]
        return pl

    return result
Пример #5
0
def recent_five_annual_values(financial_indicator, date, recent_report_type):

    previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1

    # 获得最近一期报告为年报的股票列表

    annual_report_published_stocks = recent_report_type[
        recent_report_type == str(previous_year) + 'q4'].index.tolist()

    # 把 index 和 list 转为集合类型,再计算补集

    annual_report_not_published_stocks = list(
        set(recent_report_type.index) - set(annual_report_published_stocks))

    # 对于去年年报已经发布的上市公司,最近五期年报的列表

    annual_report_published_list = [
        str(previous_year) + 'q4',
        str(previous_year - 1) + 'q4',
        str(previous_year - 2) + 'q4',
        str(previous_year - 3) + 'q4',
        str(previous_year - 4) + 'q4'
    ]

    # 对于去年年报尚未经发布的上市公司,最近五期年报的列表

    annual_report_not_published_list = [
        str(previous_year - 1) + 'q4',
        str(previous_year - 2) + 'q4',
        str(previous_year - 3) + 'q4',
        str(previous_year - 4) + 'q4',
        str(previous_year - 5) + 'q4'
    ]

    # 获得最近一期报告为年报的股票列表

    recent_five_reports = rqdatac.get_financials(
        rqdatac.query(financial_indicator),
        str(previous_year) + 'q4', '25q').T

    annual_report_published_values = recent_five_reports[
        annual_report_published_list].loc[annual_report_published_stocks]

    annual_report_not_published_values = recent_five_reports[
        annual_report_not_published_list].loc[
            annual_report_not_published_stocks]

    # 重新命名 columns,方便合并 dataframes

    annual_report_published_values.columns = [
        'first', 'second', 'third', 'fourth', 'fifth'
    ]

    annual_report_not_published_values.columns = [
        'first', 'second', 'third', 'fourth', 'fifth'
    ]

    recent_five_reports_values = pd.concat(
        [annual_report_published_values, annual_report_not_published_values],
        axis=0)

    return recent_five_reports_values
Пример #6
0
def get_earnings_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['EGRO'])

    for stock in growth_qualified_stocks:
        # 实际操作中发现有部分公司会在财报发布后对报表进行多次调整,调整后eps为空,比如'601519.XSHG',该公司报表在发布后经过多次调整,2014年年报主要财务指标表"基本eps"数据缺失,但是在利润表中"基本eps"数据存在,
        # 所以在取数据时进行判断,如果financial_indicator为首选表,income_statement 为备选表
        query_f = rqdatac.query(
            rqdatac.financials.financial_indicator.earnings_per_share).filter(
                rqdatac.financials.stockcode.in_([stock]))

        query_i = rqdatac.query(rqdatac.financials.income_statement.
                                basic_earnings_per_share).filter(
                                    rqdatac.financials.stockcode.in_([stock]))

        eps_recent = rqdatac.get_financials(query_f, annual_report[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report[stock], '1q')

        eps_last_year = rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_last_year[stock], '1q')

        eps_2_year_ago = rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_2_year_ago[stock], '1q')

        eps_3_year_ago = rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_3_year_ago[stock], '1q')

        eps_4_year_ago = rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q') if \
            rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q').isnull().sum() == 0 \
            else rqdatac.get_financials(query_i, annual_report_4_year_ago[stock], '1q')

        regression = linear_model.LinearRegression()
        eps = pd.Series([
            eps_recent, eps_last_year, eps_2_year_ago, eps_3_year_ago,
            eps_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), eps)
        factor['EGRO'][stock] = float(regression.coef_) / abs(eps.mean())
    earning_growth = winsorization_and_market_cap_weighed_standardization(
        factor['EGRO'], market_cap_on_current_day)

    return earning_growth
Пример #7
0
def get_sales_growth(date, year, market_cap_on_current_day):
    recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report(
        date)
    growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") -
                                    timedelta(days=1825)).strftime("%Y-%m-%d")
    growth_qualified_stocks = [
        i for i in annual_report.index.tolist()
        if rqdatac.instruments(i).listed_date < growth_listed_date_threshold
    ]

    factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO'])

    # 根据年报数据计算每只股票过去五年每年的sales per share

    for stock in growth_qualified_stocks:

        query = rqdatac.query(
            rqdatac.financials.income_statement.revenue).filter(
                rqdatac.financials.stockcode.in_([stock]))
        sales_recent = rqdatac.get_financials(query, annual_report[stock],
                                              '1q')

        latest_trading_date_recent = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(annual_report[stock][:4] +
                                  '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_recent = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_recent,
            end_date=latest_trading_date_recent,
            fields='total')

        sales_per_share_recent = sales_recent.values / shares_recent.values

        sales_last_year = rqdatac.get_financials(
            query, annual_report_last_year[stock], '1q')

        latest_trading_date_last_year = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_last_year[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_last_year = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_last_year,
            end_date=latest_trading_date_last_year,
            fields='total')

        sales_per_share_last_year = sales_last_year.values / shares_last_year.values

        sales_2_year_ago = rqdatac.get_financials(
            query, annual_report_2_year_ago[stock], '1q')

        latest_trading_date_2_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_2_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_2_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_2_year_ago,
            end_date=latest_trading_date_2_year_ago,
            fields='total')

        sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values

        sales_3_year_ago = rqdatac.get_financials(
            query, annual_report_3_year_ago[stock], '1q')

        latest_trading_date_3_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_3_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_3_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_3_year_ago,
            end_date=latest_trading_date_3_year_ago,
            fields='total')

        sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values

        sales_4_year_ago = rqdatac.get_financials(
            query, annual_report_4_year_ago[stock], '1q')

        latest_trading_date_4_year_ago = str(
            rqdatac.get_previous_trading_date(
                datetime.strptime(
                    annual_report_4_year_ago[stock][:4] +
                    '-12-31', '%Y-%m-%d') + timedelta(days=1)))

        shares_4_year_ago = rqdatac.get_shares(
            stock,
            start_date=latest_trading_date_4_year_ago,
            end_date=latest_trading_date_4_year_ago,
            fields='total')

        sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values

        regression = linear_model.LinearRegression()
        sales_per_share = pd.Series([
            sales_per_share_recent, sales_per_share_last_year,
            sales_per_share_2_year_ago, sales_per_share_3_year_ago,
            sales_per_share_4_year_ago
        ]).fillna(value=0)
        regression.fit(year.reshape(-1, 1), sales_per_share)
        factor['SGRO'][stock] = float(
            regression.coef_) / abs(sales_per_share).mean()

    sale_growth = winsorization_and_market_cap_weighed_standardization(
        factor['SGRO'], market_cap_on_current_day)

    return sale_growth