def get_last_reported_values(financial_indicator, recent_report_type): # 取出当天所有出现的财报类型 unique_recent_report_type = recent_report_type.unique().tolist() last_reported_values = pd.Series() # 循环每一类型的报告,再合并返回 for report_type in unique_recent_report_type: stock_list = recent_report_type[recent_report_type == report_type].index.tolist() if len(stock_list) == 1: last_reported_values = last_reported_values.append( rqdatac.get_financials( rqdatac.query(financial_indicator).filter( rqdatac.financials.stockcode.in_(stock_list)), report_type)) else: last_reported_values = last_reported_values.append( rqdatac.get_financials( rqdatac.query(financial_indicator).filter( rqdatac.financials.stockcode.in_(stock_list)), report_type).iloc[0]) return last_reported_values
def get_ttm_sum(financial_indicator, recent_report_type): def _get_ttm_date(quarter): # 假设最新的为年报,则为年报数值 if quarter[-2:] == "q4": return [np.nan, np.nan, quarter] # 假设当前为1/2/3季度报 elif quarter[-2:] == "q3" or quarter[-2:] == "q2" or quarter[ -2:] == "q1": return [ str(int(quarter[:4]) - 1) + quarter[-2:], str(int(quarter[:4]) - 1) + "q4", quarter ] else: print(quarter) raise Exception("what?") # 获得所有股票中最新的quarter max_quarter = max(recent_report_type) # 获得所有股票前8期的财报数据 financial_data = rqdatac.get_financials(rqdatac.query(financial_indicator), quarter=max_quarter, interval="8q", country='cn').T effective_quarter = pd.DataFrame( recent_report_type.apply(_get_ttm_date).to_dict()).T # 获得每个股票计算ttm需要的三个财报日期 effective_quarter = effective_quarter.unstack() effective_quarter.index = effective_quarter.index.droplevel(0) merged_data = pd.DataFrame(effective_quarter) merged_data['mask'] = 1 previous_quarters_mask = merged_data.dropna().reset_index().pivot( index='index', columns=0, values='mask').reindex( columns=financial_data.columns).astype(float).replace( np.nan, 0).astype(bool) latest_data = financial_data.where(previous_quarters_mask) # (最近一期年报财务数据 + 最近一期报告财务数据 - 去年同期报告财务数据) def _calc_ttm(data): # print(data) data = data.dropna().sort_index() if len(data) > 1: return data.iloc[-2:].sum() - data.iloc[0] elif len(data) == 1: return data.iloc[0] else: return np.nan financial_values = { item[0]: _calc_ttm(item[1]) for item in latest_data.iterrows() } return pd.Series(financial_values)
def get_financials(self, codes, years=10, type='y'): q = query(fds.financial_indicator.adjusted_return_on_equity_diluted, fds.announce_date).filter(fds.stockcode.in_(codes)) S = str(int(public.getDate().split("-")[0]) - 1) + 'q4' Y = str(years) + type res = rq.get_financials(q, S, interval=Y) d = {} for c in codes: try: d[c] = res.minor_xs(c) except: continue return d
def get_financials(query, quarter=None, interval='4q', expect_df=False): if quarter is None: valid = True else: valid = isinstance(quarter, six.string_types) and quarter[-2] == 'q' if valid: try: valid = 1990 <= int(quarter[:-2]) <= 2050 and 1 <= int( quarter[-1]) <= 4 except ValueError: valid = False if not valid: raise RQInvalidArgument( _(u"function {}: invalid {} argument, quarter should be in form of '2012q3', " u"got {} (type: {})").format('get_financials', 'quarter', quarter, type(quarter))) env = Environment.get_instance() dt = env.calendar_dt.date() - datetime.timedelta( days=1) # Take yesterday's data as default year = dt.year mon = dt.month day = dt.day int_date = year * 10000 + mon * 100 + day q = (mon - 4) // 3 + 1 y = year if q <= 0: y -= 1 q = 4 default_quarter = str(y) + 'q' + str(q) if quarter is None or quarter > default_quarter: quarter = default_quarter include_date = False for d in query.column_descriptions: if d['name'] == 'announce_date': include_date = True if not include_date: query = query.add_column(rqdatac.fundamentals.announce_date) result = rqdatac.get_financials(query, quarter, interval, expect_df=expect_df) if result is None: return pd.DataFrame() if isinstance(result, pd.Series): return result elif isinstance(result, pd.DataFrame): result = result[(result['announce_date'] <= int_date) | pd.isnull(result['announce_date'])] if not include_date: del result['announce_date'] else: d = dict() for order_book_id in result.minor_axis: df = result.minor_xs(order_book_id) df = df[(df.announce_date < int_date) | (pd.isnull(df.announce_date))] d[order_book_id] = df pl = pd.Panel.from_dict(d, orient='minor') if not include_date: pl.drop('announce_date', axis=0, inplace=True) if len(pl.items) == 1: pl = pl[pl.items[0]] return pl return result
def recent_five_annual_values(financial_indicator, date, recent_report_type): previous_year = datetime.strptime(date, '%Y-%m-%d').year - 1 # 获得最近一期报告为年报的股票列表 annual_report_published_stocks = recent_report_type[ recent_report_type == str(previous_year) + 'q4'].index.tolist() # 把 index 和 list 转为集合类型,再计算补集 annual_report_not_published_stocks = list( set(recent_report_type.index) - set(annual_report_published_stocks)) # 对于去年年报已经发布的上市公司,最近五期年报的列表 annual_report_published_list = [ str(previous_year) + 'q4', str(previous_year - 1) + 'q4', str(previous_year - 2) + 'q4', str(previous_year - 3) + 'q4', str(previous_year - 4) + 'q4' ] # 对于去年年报尚未经发布的上市公司,最近五期年报的列表 annual_report_not_published_list = [ str(previous_year - 1) + 'q4', str(previous_year - 2) + 'q4', str(previous_year - 3) + 'q4', str(previous_year - 4) + 'q4', str(previous_year - 5) + 'q4' ] # 获得最近一期报告为年报的股票列表 recent_five_reports = rqdatac.get_financials( rqdatac.query(financial_indicator), str(previous_year) + 'q4', '25q').T annual_report_published_values = recent_five_reports[ annual_report_published_list].loc[annual_report_published_stocks] annual_report_not_published_values = recent_five_reports[ annual_report_not_published_list].loc[ annual_report_not_published_stocks] # 重新命名 columns,方便合并 dataframes annual_report_published_values.columns = [ 'first', 'second', 'third', 'fourth', 'fifth' ] annual_report_not_published_values.columns = [ 'first', 'second', 'third', 'fourth', 'fifth' ] recent_five_reports_values = pd.concat( [annual_report_published_values, annual_report_not_published_values], axis=0) return recent_five_reports_values
def get_earnings_growth(date, year, market_cap_on_current_day): recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report( date) growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") - timedelta(days=1825)).strftime("%Y-%m-%d") growth_qualified_stocks = [ i for i in annual_report.index.tolist() if rqdatac.instruments(i).listed_date < growth_listed_date_threshold ] factor = pd.DataFrame(index=growth_qualified_stocks, columns=['EGRO']) for stock in growth_qualified_stocks: # 实际操作中发现有部分公司会在财报发布后对报表进行多次调整,调整后eps为空,比如'601519.XSHG',该公司报表在发布后经过多次调整,2014年年报主要财务指标表"基本eps"数据缺失,但是在利润表中"基本eps"数据存在, # 所以在取数据时进行判断,如果financial_indicator为首选表,income_statement 为备选表 query_f = rqdatac.query( rqdatac.financials.financial_indicator.earnings_per_share).filter( rqdatac.financials.stockcode.in_([stock])) query_i = rqdatac.query(rqdatac.financials.income_statement. basic_earnings_per_share).filter( rqdatac.financials.stockcode.in_([stock])) eps_recent = rqdatac.get_financials(query_f, annual_report[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report[stock], '1q') eps_last_year = rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_last_year[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_last_year[stock], '1q') eps_2_year_ago = rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_2_year_ago[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_2_year_ago[stock], '1q') eps_3_year_ago = rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_3_year_ago[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_3_year_ago[stock], '1q') eps_4_year_ago = rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q') if \ rqdatac.get_financials(query_f, annual_report_4_year_ago[stock], '1q').isnull().sum() == 0 \ else rqdatac.get_financials(query_i, annual_report_4_year_ago[stock], '1q') regression = linear_model.LinearRegression() eps = pd.Series([ eps_recent, eps_last_year, eps_2_year_ago, eps_3_year_ago, eps_4_year_ago ]).fillna(value=0) regression.fit(year.reshape(-1, 1), eps) factor['EGRO'][stock] = float(regression.coef_) / abs(eps.mean()) earning_growth = winsorization_and_market_cap_weighed_standardization( factor['EGRO'], market_cap_on_current_day) return earning_growth
def get_sales_growth(date, year, market_cap_on_current_day): recent_report, annual_report, annual_report_last_year, annual_report_2_year_ago, annual_report_3_year_ago, annual_report_4_year_ago = last_five_annual_report( date) growth_listed_date_threshold = (datetime.strptime(date, "%Y-%m-%d") - timedelta(days=1825)).strftime("%Y-%m-%d") growth_qualified_stocks = [ i for i in annual_report.index.tolist() if rqdatac.instruments(i).listed_date < growth_listed_date_threshold ] factor = pd.DataFrame(index=growth_qualified_stocks, columns=['SGRO']) # 根据年报数据计算每只股票过去五年每年的sales per share for stock in growth_qualified_stocks: query = rqdatac.query( rqdatac.financials.income_statement.revenue).filter( rqdatac.financials.stockcode.in_([stock])) sales_recent = rqdatac.get_financials(query, annual_report[stock], '1q') latest_trading_date_recent = str( rqdatac.get_previous_trading_date( datetime.strptime(annual_report[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_recent = rqdatac.get_shares( stock, start_date=latest_trading_date_recent, end_date=latest_trading_date_recent, fields='total') sales_per_share_recent = sales_recent.values / shares_recent.values sales_last_year = rqdatac.get_financials( query, annual_report_last_year[stock], '1q') latest_trading_date_last_year = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_last_year[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_last_year = rqdatac.get_shares( stock, start_date=latest_trading_date_last_year, end_date=latest_trading_date_last_year, fields='total') sales_per_share_last_year = sales_last_year.values / shares_last_year.values sales_2_year_ago = rqdatac.get_financials( query, annual_report_2_year_ago[stock], '1q') latest_trading_date_2_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_2_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_2_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_2_year_ago, end_date=latest_trading_date_2_year_ago, fields='total') sales_per_share_2_year_ago = sales_2_year_ago.values / shares_2_year_ago.values sales_3_year_ago = rqdatac.get_financials( query, annual_report_3_year_ago[stock], '1q') latest_trading_date_3_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_3_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_3_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_3_year_ago, end_date=latest_trading_date_3_year_ago, fields='total') sales_per_share_3_year_ago = sales_3_year_ago.values / shares_3_year_ago.values sales_4_year_ago = rqdatac.get_financials( query, annual_report_4_year_ago[stock], '1q') latest_trading_date_4_year_ago = str( rqdatac.get_previous_trading_date( datetime.strptime( annual_report_4_year_ago[stock][:4] + '-12-31', '%Y-%m-%d') + timedelta(days=1))) shares_4_year_ago = rqdatac.get_shares( stock, start_date=latest_trading_date_4_year_ago, end_date=latest_trading_date_4_year_ago, fields='total') sales_per_share_4_year_ago = sales_4_year_ago.values / shares_4_year_ago.values regression = linear_model.LinearRegression() sales_per_share = pd.Series([ sales_per_share_recent, sales_per_share_last_year, sales_per_share_2_year_ago, sales_per_share_3_year_ago, sales_per_share_4_year_ago ]).fillna(value=0) regression.fit(year.reshape(-1, 1), sales_per_share) factor['SGRO'][stock] = float( regression.coef_) / abs(sales_per_share).mean() sale_growth = winsorization_and_market_cap_weighed_standardization( factor['SGRO'], market_cap_on_current_day) return sale_growth