def save_sector(only_old_data: bool): columns = [DATE, CODE, RET_1] rolling_columns = [ E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S, SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO, EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO ] columns.extend(rolling_columns) portfolio = Portfolio() # 최소 시가총액 100억 portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :] # KRX_SECTOR가 존재하지 않는 데이터 제거 portfolio.dropna(subset=[KRX_SECTOR], inplace=True) portfolio = portfolio.sort_values(by=[CODE, DATE]).reset_index(drop=True) # sector를 숫자로 나타냄 label_encoder = LabelEncoder() labeled_sector = label_encoder.fit_transform(portfolio[KRX_SECTOR]) krx_sectors = label_encoder.classes_ # 숫자로 나타낸 것을 모스부호로 표현 one_hot_encoder = OneHotEncoder(sparse=False) one_hot_encoded_sector = one_hot_encoder.fit_transform( labeled_sector.reshape(len(labeled_sector), 1)) # 기존 데이터에 붙히기 df_one_hot_encoded_sector = pd.DataFrame( one_hot_encoded_sector, columns=krx_sectors).reset_index(drop=True) portfolio[krx_sectors] = df_one_hot_encoded_sector krx_sectors = list(krx_sectors) save_data(only_old_data, portfolio, SECTOR, rolling_columns, krx_sectors)
def plot_rank_ic(portfolio: Portfolio, factor: str, rolling: int = 6, title: str = '') -> pd.DataFrame: portfolio = portfolio.periodic_rank(min_rank=1, max_rank=10000, factor=factor, drop_rank=False) factor_rank = "{factor}_rank".format(factor=factor) portfolio = portfolio.rename(index=str, columns={"rank": factor_rank}) portfolio = portfolio.periodic_rank(min_rank=1, max_rank=10000, factor=RET_3, drop_rank=False) ret_1_rank = "ret_1_rank" portfolio = portfolio.rename(index=str, columns={"rank": ret_1_rank}) rank_ic = portfolio.groupby(by=[DATE]).apply(lambda x: 1 - (6 * ((x[ factor_rank] - x[ret_1_rank])**2).sum()) / (len(x) * (len(x)**2 - 1))) rank_ic = pd.DataFrame(rank_ic, columns=['rank_ic']) rolling_column_name = 'rolling_{}'.format(rolling) rank_ic[rolling_column_name] = rank_ic['rank_ic'].rolling( window=rolling).mean() rank_ic = rank_ic.dropna(subset=[rolling_column_name]) rank_ic.plot() plt.title(title) plt.axhline(y=0, color='black') plt.ylabel('Rank IC') plt.xlabel('Date') plt.show() return rank_ic
def save_bollinger(only_old_data: bool): rolling_columns = [ E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S, SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO, EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO ] portfolio = Portfolio() # 최소 시가총액 100억 portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :] # Bollinger portfolio = portfolio.sort_values(by=[CODE, DATE]).reset_index(drop=True) portfolio['mean'] = portfolio.groupby(CODE)[ENDP].rolling( 20).mean().reset_index(drop=True) portfolio['std'] = portfolio.groupby(CODE)[ENDP].rolling( 20).std().reset_index(drop=True) portfolio[BOLLINGER] = portfolio['mean'] - 2 * portfolio['std'] bollingers = portfolio.loc[portfolio[ENDP] < portfolio[BOLLINGER], [DATE, CODE]] save_data(only_old_data, portfolio, BOLLINGER, rolling_columns, filtering_dataframe=bollingers)
def save_all(only_old_data: bool): rolling_columns = [ E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S, SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO, EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO ] portfolio = Portfolio() # 최소 시가총액 100억 portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :] save_data(only_old_data, portfolio, ALL, rolling_columns)
def test_show_plot(self): pf = Portfolio() pf = pf.loc[pf[DATE] >= datetime(year=2011, month=5, day=31), :] pf.show_plot() pf.show_plot(cumulative=False, weighted=True, title='title', show_benchmark=False)
def save_concepts(old_data: bool): log_mktcap = 'log_mktcap' portfolio = Portfolio() portfolio[log_mktcap] = np.log(portfolio[MKTCAP]) value_factors = [E_P, B_P, S_P, C_P, DIVP] size_factors = [log_mktcap] momentum_factors = [MOM1, MOM12] quality_factors = [ ROA, ROE, ROIC, S_A, DEBT_RATIO, EQUITY_RATIO, LIQ_RATIO ] volatility_factors = [VOL_1D] factor_groups = {} for value_factor, value_name in zip([value_factors, []], ['value_', '']): for size_factor, size_name in zip([size_factors, []], ['size_', '']): for momentum_factor, momentum_name in zip([momentum_factors, []], ['momentum_', '']): for quality_factor, quality_name in zip([quality_factors, []], ['quality_', '']): for volatility_factor, volatility_name in zip( [volatility_factors, []], ['volatility_', '']): factor_group = [] factor_group.extend(value_factor) factor_group.extend(size_factor) factor_group.extend(momentum_factor) factor_group.extend(quality_factor) factor_group.extend(volatility_factor) factor_names = [] factor_names.extend(value_name) factor_names.extend(size_name) factor_names.extend(momentum_name) factor_names.extend(quality_name) factor_names.extend(volatility_name) factor_name = ''.join(factor_names) if factor_name: factor_groups[factor_name[:-1]] = factor_group factor_group_len = len(factor_groups) with Pool(os.cpu_count()) as p: rs = [ p.apply_async(save_data, [old_data, pf, key, value]) for pf, (key, value) in zip( [portfolio for _ in range(factor_group_len)], factor_groups.items()) ] for r in rs: r.wait() p.close() p.join()
def save_macro(only_old_data: bool): rolling_columns = [ E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S, SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO, EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO, TERM_SPREAD_KOR, TERM_SPREAD_US, CREDIT_SPREAD_KOR, LOG_USD2KRW, LOG_CHY2KRW, LOG_EURO2KRW, TED_SPREAD, LOG_NYSE, LOG_NASDAQ, LOG_OIL ] portfolio = Portfolio() # 최소 시가총액 100억 portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :] save_data(only_old_data, portfolio, MACRO, rolling_columns)
def save_data(old_data: bool, portfolio: Portfolio, data_name: str, rolling_columns: list, dummy_columns: list = None, filtering_dataframe=None): print("Start saving {}...".format(data_name)) portfolio = portfolio.sort_values(by=[CODE, DATE]).reset_index(drop=True) if old_data: # old data # RET_1이 존재하지 않는 마지막 달 제거 old_portfolio = portfolio.loc[~pd.isna(portfolio[RET_1]), :] old_set = get_data_set(old_portfolio, rolling_columns, dummy_columns) if isinstance(filtering_dataframe, pd.DataFrame) and not filtering_dataframe.empty: filtering_dataframe = filtering_dataframe[[DATE, CODE]] old_set = pd.merge(old_set, filtering_dataframe, on=[DATE, CODE]) old_set.reset_index(drop=True).to_dataframe().to_hdf( 'data/{}.h5'.format(data_name), key='df', format='table', mode='w') else: # recent data recent_set = get_data_set(portfolio, rolling_columns, dummy_columns, return_y=False) # 마지막 달만 사용 last_month = np.sort(recent_set[DATE].unique())[-1] recent_set = recent_set.loc[recent_set[DATE] == last_month, :] if isinstance(filtering_dataframe, pd.DataFrame) and not filtering_dataframe.empty: filtering_dataframe = filtering_dataframe[[DATE, CODE]] recent_set = pd.merge(recent_set, filtering_dataframe, on=[DATE, CODE]) recent_set.reset_index(drop=True).to_dataframe().to_hdf( 'data/{}_recent.h5'.format(data_name), key='df', format='table', mode='w')
def save_filter(only_old_data: bool): rolling_columns = [ E_P, B_P, S_P, C_P, OP_P, GP_P, ROA, ROE, QROA, QROE, GP_A, ROIC, GP_S, SALESQOQ, GPQOQ, ROAQOQ, MOM6, MOM12, BETA_1D, VOL_5M, LIQ_RATIO, EQUITY_RATIO, DEBT_RATIO, FOREIGN_OWNERSHIP_RATIO ] portfolio = Portfolio() # 최소 시가총액 100억 portfolio = portfolio.loc[portfolio[MKTCAP] > 10000000000, :] # 2 < PER < 10.0 (http://pluspower.tistory.com/9) portfolio = portfolio.loc[(portfolio[PER] < 10) & (portfolio[PER] > 2)] # 0.2 < PBR < 1.0 portfolio = portfolio.loc[(portfolio[PBR] < 1) & (portfolio[PBR] > 0.2)] # 2 < PCR < 8 portfolio = portfolio.loc[(portfolio[PCR] < 8) & (portfolio[PCR] > 2)] # 0 < PSR < 0.8 portfolio = portfolio.loc[portfolio[PSR] < 0.8] save_data(only_old_data, portfolio, FILTER, rolling_columns)
import pandas as pd import numpy as np from ksif import Portfolio, columns from ksif.core.columns import CODE, DATE, B_P, E_P, MOM12_1, GP_A, VOL_3M, RET_1 from tabulate import tabulate INFORMATION_RATIO = 'information_ratio' ALL = 'all' VALUE_MOMENTUM = 'value+momentum' VOLATILITY = 'volatility' QUALITY = 'quality' MOMENTUM = 'momentum' VALUE = 'value' std = 'std_' pf = Portfolio() universe = pf.loc[(pf[columns.MKTCAP] >= 50000000000) & (~np.isnan(pf[RET_1])) & (~np.isnan(pf[B_P])) & (~np.isnan(pf[E_P])) & (~np.isnan(pf[MOM12_1])) & (~np.isnan(pf[GP_A])) & (~np.isnan(pf[VOL_3M])), :] universe = universe.periodic_standardize(factor=B_P) universe = universe.periodic_standardize(factor=E_P) universe[VALUE] = (universe[std + B_P] + universe[std + E_P]) / 2 universe = universe.periodic_standardize(factor=MOM12_1) universe[MOMENTUM] = universe[std + MOM12_1] universe = universe.periodic_standardize(factor=GP_A) universe[QUALITY] = universe[std + GP_A] universe = universe.periodic_standardize(factor=VOL_3M) universe[VOLATILITY] = universe[std + VOL_3M] universe[VALUE_MOMENTUM] = universe[VALUE] + universe[MOMENTUM]
scaler = MinMaxScaler() if __name__ == '__main__': columns = [ DATE, CODE, RET_1, B_P, E_P, DIVP, S_P, C_P, ROE, ROA, ROIC, S_A, LIQ_RATIO, EQUITY_RATIO, ASSETSYOY, BETA_3M, MKTCAP, MOM1, MOM12, VOL_3M, TRADING_VOLUME_RATIO ] rolling_columns = [ B_P, E_P, DIVP, S_P, C_P, ROE, ROA, ROIC, S_A, LIQ_RATIO, EQUITY_RATIO, ASSETSYOY, BETA_3M, MKTCAP, MOM1, MOM12, VOL_3M, TRADING_VOLUME_RATIO ] pf = Portfolio() pf = pf.loc[~pd.isna(pf[RET_1]), :] months = sorted(pf[DATE].unique()) result_columns = [DATE, CODE, RET_1] rolled_columns = [] all_set = pf.reset_index(drop=True) for column in rolling_columns: t_0 = column + '_t' t_1 = column + '_t-1' t_2 = column + '_t-2' t_3 = column + '_t-3' t_4 = column + '_t-4' t_5 = column + '_t-5' t_6 = column + '_t-6' t_7 = column + '_t-7'
def compare_ensemble(methods, models, quantiles, start_number: int = 0, end_number: int = 9, step: int = 1, to_csv: bool = True, show_plot: bool = False): file_names = [] CAGRs = [] GAGR_rank_correlations = [] CAGR_rank_p_values = [] IRs = [] IR_rank_correlations = [] IR_rank_p_values = [] SRs = [] SR_rank_correlations = [] SR_rank_p_values = [] MDDs = [] alphas = [] alpha_rank_correlations = [] alpha_rank_p_values = [] betas = [] rigid_accuracies = [] decile_accuracies = [] quarter_accuracies = [] half_accuracies = [] kospi_larges = [] kospi_middles = [] kospi_smalls = [] kosdaq_larges = [] kosdaq_middles = [] kosdaq_smalls = [] firms = Portfolio(include_holding=True, include_finance=True, include_managed=True, include_suspended=True).loc[:, [DATE, CODE, MKTCAP, EXCHANGE]] firms[DATE] = pd.to_datetime(firms[DATE]) firms[RANK] = firms[[DATE, EXCHANGE, MKTCAP]].groupby([DATE, EXCHANGE]).rank(ascending=False) firms[KOSPI_LARGE] = firms.apply( lambda row: 1 if (row[EXCHANGE] == '유가증권시장') and (row[RANK] <= 100) else 0, axis=1) firms[KOSPI_MIDDLE] = firms.apply( lambda row: 1 if (row[EXCHANGE] == '유가증권시장') and (100 < row[RANK] <= 300) else 0, axis=1) firms[KOSPI_SMALL] = firms.apply( lambda row: 1 if (row[EXCHANGE] == '유가증권시장') and (300 < row[RANK]) else 0, axis=1) firms[KOSDAQ_LARGE] = firms.apply( lambda row: 1 if (row[EXCHANGE] == '코스닥') and (row[RANK] <= 100) else 0, axis=1) firms[KOSDAQ_MIDDLE] = firms.apply( lambda row: 1 if (row[EXCHANGE] == '코스닥') and (100 < row[RANK] <= 300) else 0, axis=1) firms[KOSDAQ_SMALL] = firms.apply( lambda row: 1 if (row[EXCHANGE] == '코스닥') and (300 < row[RANK]) else 0, axis=1) firms = firms.loc[ :, [DATE, CODE, KOSPI_LARGE, KOSPI_MIDDLE, KOSPI_SMALL, KOSDAQ_LARGE, KOSDAQ_MIDDLE, KOSDAQ_SMALL] ] for method in methods: for quantile in quantiles: for model in tqdm(models): ensemble_summary, ensemble_portfolios = get_ensemble( method, model_name=model, start_number=start_number, end_number=end_number, step=step, quantile=quantile, show_plot=show_plot ) if ensemble_summary is None and ensemble_portfolios is None: continue ensemble_portfolio = pd.merge(ensemble_portfolios[-1], firms, on=[DATE, CODE]) ensemble_portfolio_count = ensemble_portfolio[[DATE, CODE]].groupby(DATE).count() ensemble_portfolio_count.rename(columns={CODE: COUNT}, inplace=True) ensemble_portfolio_sum = ensemble_portfolio[[ DATE, KOSPI_LARGE, KOSPI_MIDDLE, KOSPI_SMALL, KOSDAQ_LARGE, KOSDAQ_MIDDLE, KOSDAQ_SMALL ]].groupby(DATE).sum() ensemble_portfolio_ratio = pd.merge(ensemble_portfolio_sum, ensemble_portfolio_count, on=DATE) ensemble_portfolio_ratio[KOSPI_LARGE] \ = ensemble_portfolio_ratio[KOSPI_LARGE] / ensemble_portfolio_ratio[COUNT] ensemble_portfolio_ratio[KOSPI_MIDDLE] \ = ensemble_portfolio_ratio[KOSPI_MIDDLE] / ensemble_portfolio_ratio[COUNT] ensemble_portfolio_ratio[KOSPI_SMALL] \ = ensemble_portfolio_ratio[KOSPI_SMALL] / ensemble_portfolio_ratio[COUNT] ensemble_portfolio_ratio[KOSDAQ_LARGE] \ = ensemble_portfolio_ratio[KOSDAQ_LARGE] / ensemble_portfolio_ratio[COUNT] ensemble_portfolio_ratio[KOSDAQ_MIDDLE] \ = ensemble_portfolio_ratio[KOSDAQ_MIDDLE] / ensemble_portfolio_ratio[COUNT] ensemble_portfolio_ratio[KOSDAQ_SMALL] \ = ensemble_portfolio_ratio[KOSDAQ_SMALL] / ensemble_portfolio_ratio[COUNT] file_names.append(_get_file_name(method, model, quantile)) CAGRs.append(ensemble_summary[CAGR].values[-1]) CAGR_rankIC = spearmanr(ensemble_summary[CAGR].values, ensemble_summary[CAGR].index) GAGR_rank_correlations.append(CAGR_rankIC[0]) CAGR_rank_p_values.append(CAGR_rankIC[1]) IRs.append(ensemble_summary[IR].values[-1]) IR_rankIC = spearmanr(ensemble_summary[IR].values, ensemble_summary[IR].index) IR_rank_correlations.append(IR_rankIC[0]) IR_rank_p_values.append(IR_rankIC[1]) SRs.append(ensemble_summary[SR].values[-1]) SR_rankIC = spearmanr(ensemble_summary[SR].values, ensemble_summary[SR].index) SR_rank_correlations.append(SR_rankIC[0]) SR_rank_p_values.append(SR_rankIC[1]) MDDs.append(ensemble_summary[MDD].values[-1]) alphas.append(ensemble_summary[FAMA_FRENCH_ALPHA].values[-1]) alpha_rankIC = spearmanr(ensemble_summary[FAMA_FRENCH_ALPHA].values, ensemble_summary[FAMA_FRENCH_ALPHA].index) alpha_rank_correlations.append(alpha_rankIC[0]) alpha_rank_p_values.append(alpha_rankIC[1]) betas.append(ensemble_summary[FAMA_FRENCH_BETA].values[-1]) rigid_accuracies.append(ensemble_summary[RIGID_ACCURACY].values[-1]) decile_accuracies.append(ensemble_summary[DECILE_ACCURACY].values[-1]) quarter_accuracies.append(ensemble_summary[QUARTER_ACCURACY].values[-1]) half_accuracies.append(ensemble_summary[HALF_ACCURACY].values[-1]) kospi_larges.append(ensemble_portfolio_ratio[KOSPI_LARGE].mean()) kospi_middles.append(ensemble_portfolio_ratio[KOSPI_MIDDLE].mean()) kospi_smalls.append(ensemble_portfolio_ratio[KOSPI_SMALL].mean()) kosdaq_larges.append(ensemble_portfolio_ratio[KOSDAQ_LARGE].mean()) kosdaq_middles.append(ensemble_portfolio_ratio[KOSDAQ_MIDDLE].mean()) kosdaq_smalls.append(ensemble_portfolio_ratio[KOSDAQ_SMALL].mean()) comparison_result = pd.DataFrame(data={ 'Model': file_names, 'CAGR': CAGRs, 'CAGR RC': GAGR_rank_correlations, 'CAGR RC p-value': CAGR_rank_p_values, 'IR': IRs, 'IR RC': IR_rank_correlations, 'IR RC p-value': IR_rank_p_values, 'SR': SRs, 'SR RC': SR_rank_correlations, 'SR RC p-value': SR_rank_p_values, 'FF alpha': alphas, 'FF alpha RC': alpha_rank_correlations, 'FF alpha RC p-value': alpha_rank_p_values, 'FF betas': betas, 'MDD': MDDs, 'Rigid accuracy': rigid_accuracies, 'Decile accuracy': decile_accuracies, 'Quarter accuracy': quarter_accuracies, 'Half accuracy': half_accuracies, 'KOSPI Large': kospi_larges, 'KOSPI Middle': kospi_middles, 'KOSPI Small': kospi_smalls, 'KOSDAQ Large': kosdaq_larges, 'KOSDAQ Middle': kosdaq_middles, 'KOSDAQ Small': kosdaq_smalls, }) if to_csv: comparison_result.to_csv('summary/comparison_result.csv', index=False) return comparison_result
def get_ensemble(method: str, model_name: str, start_number: int = 0, end_number: int = 9, step: int = 1, quantile: int = 40, show_plot=True): """ :param method: (str) :param model_name: (str) :param start_number: (int) :param end_number: (int) :param step: (int) :param quantile: (int) :param show_plot: (bool) :return ensemble_summary: (DataFrame) PORTFOLIO_RETURN | (float) ACTIVE_RETURN | (float) ACTIVE_RISK | (float) IR | (float) CAGR | (float) RIGID_ACCURACY | (float) DECILE_ACCURACY | (float) QUARTER_ACCURACY | (float) HALF_ACCURACY | (float) :return ensemble_portfolios: ([Portfolio]) DATE | (datetime) CODE | (str) RET_1 | (float) """ # Check parameters assert method in METHODS, "method does not exist." assert end_number > start_number + 1, "end_number should be bigger than (start_number + 1)." assert step >= 1, "step should be a positive integer." assert quantile > 1, "quantile should be an integer bigger than 1." result_file_name = _get_file_name(method, model_name, quantile) predictions = _get_predictions(model_name, start_number, end_number) get_ensemble_predictions = GET_ENSEMBLE_PREDICTIONS[method] ensemble_predictions = get_ensemble_predictions(predictions, quantile) # Append actual returns ensemble_predictions = [pd.merge(ensemble_prediction, actual_returns, on=[DATE, CODE]) for ensemble_prediction in ensemble_predictions] # Cumulative ensemble ensemble_numbers = pd.DataFrame(index=ensemble_predictions[0][DATE].unique()) ensemble_cumulative_returns = pd.DataFrame(index=ensemble_predictions[0][DATE].unique()) for index, ensemble_prediction in enumerate(ensemble_predictions): ensemble_number = ensemble_prediction.groupby(by=[DATE])[CODE].count() ensemble_return = ensemble_prediction.groupby(by=[DATE])[RET_1].mean() ensemble_cumulative_return = _cumulate(ensemble_return) if (index + 1) % step == 0: ensemble_numbers[index + 1] = ensemble_number ensemble_cumulative_returns[index + 1] = ensemble_cumulative_return # Fill nan ensemble_numbers.fillna(0, inplace=True) ensemble_cumulative_returns.fillna(method='ffill', inplace=True) ensemble_cumulative_returns.fillna(0, inplace=True) ensemble_portfolios = [Portfolio(ensemble_prediction) for ensemble_prediction in ensemble_predictions[(step - 1)::step]] for ensemble_portfolio in ensemble_portfolios: if ensemble_portfolio.empty: return None, None ensemble_outcomes = [ensemble_portfolio.outcome() for ensemble_portfolio in ensemble_portfolios] portfolio_returns = [ensemble_outcome[PORTFOLIO_RETURN] for ensemble_outcome in ensemble_outcomes] active_returns = [ensemble_outcome[ACTIVE_RETURN] for ensemble_outcome in ensemble_outcomes] active_risks = [ensemble_outcome[ACTIVE_RISK] for ensemble_outcome in ensemble_outcomes] information_ratios = [ensemble_outcome[IR] for ensemble_outcome in ensemble_outcomes] sharpe_ratios = [ensemble_outcome[SR] for ensemble_outcome in ensemble_outcomes] MDDs = [ensemble_outcome[MDD] for ensemble_outcome in ensemble_outcomes] alphas = [ensemble_outcome[FAMA_FRENCH_ALPHA] for ensemble_outcome in ensemble_outcomes] betas = [ensemble_outcome[FAMA_FRENCH_BETA] for ensemble_outcome in ensemble_outcomes] CAGRs = [ensemble_outcome[CAGR] for ensemble_outcome in ensemble_outcomes] rigid_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, quantile) for ensemble_portfolio in ensemble_portfolios] decile_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, 10) for ensemble_portfolio in ensemble_portfolios] quarter_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, 4) for ensemble_portfolio in ensemble_portfolios] half_accuracies = [_calculate_accuracy(ensemble_portfolio, predictions, 2) for ensemble_portfolio in ensemble_portfolios] ensemble_summary = pd.DataFrame({ PORTFOLIO_RETURN: portfolio_returns, ACTIVE_RETURN: active_returns, ACTIVE_RISK: active_risks, IR: information_ratios, SR: sharpe_ratios, MDD: MDDs, FAMA_FRENCH_ALPHA: alphas, FAMA_FRENCH_BETA: betas, CAGR: CAGRs, RIGID_ACCURACY: rigid_accuracies, DECILE_ACCURACY: decile_accuracies, QUARTER_ACCURACY: quarter_accuracies, HALF_ACCURACY: half_accuracies, }, index=ensemble_numbers.columns) ensemble_summary.to_csv('summary/' + result_file_name + '.csv') for ensemble_prediction in ensemble_predictions: ensemble_prediction[DATE] = pd.to_datetime(ensemble_prediction[DATE], format='%Y-%m-%d') # Plot if show_plot: fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8, 8)) # Company number ensemble_numbers.plot(ax=axes[0], colormap='Blues') axes[0].set_title('{}:{}, Top {}-quantile'.format(method.title(), model_name, quantile)) axes[0].set_xlabel('Date') axes[0].set_ylabel('# of companies') axes[0].legend(loc='upper left') # Cumulative return ensemble_cumulative_returns.plot(ax=axes[1], colormap='Blues') axes[1].set_xlabel('Date') axes[1].set_ylabel('Return') axes[1].legend(loc='upper left') # Information ratio # ensembles = ensemble_cumulative_returns.columns # trend_model = np.polyfit(ensembles, information_ratios, 1) # get_trend = np.poly1d(trend_model) # axes[2].plot(ensembles, information_ratios, 'black', ensembles, get_trend(ensembles), 'r--') # axes[2].set_ylim(0.3, 0.5) # axes[2].set_xlabel('# of ensembles') # axes[2].set_ylabel('Information ratio') plt.savefig('summary/' + result_file_name + '.png') fig.show() return ensemble_summary, ensemble_portfolios
GEOMETRIC = 'geometric' QUANTILE = 'quantile' PREDICTED_RET_1 = 'predict_return_1' ACTUAL_RANK = 'actual_rank' PREDICTED_RANK = 'predicted_rank' COUNT = 'count' RANK = 'rank' CORRECT = 'correct' RIGID_ACCURACY = 'rigid_accuracy' DECILE_ACCURACY = 'decile_accuracy' QUARTER_ACCURACY = 'quarter_accuracy' HALF_ACCURACY = 'half_accuracy' pf = Portfolio() CD91_returns = pf.get_benchmark(CD91)[BENCHMARK_RET_1] CD91_returns = CD91_returns.dropna() actual_returns = pf[[DATE, CODE, RET_1]] def get_intersection_ensemble_predictions(predictions, quantile: int = 40): """ :return ensemble_predictions: DATE | (datetime64) CODE | (str) """ selected_predictions = _select_predictions(predictions, quantile, [DATE, CODE]) # Intersection
STD_MOMENTUM = std + MOMENTUM STD_QUALITY = std + QUALITY STD_VOLATILITY = std + VOLATILITY ROLLING_RET_1 = rolling + RET_1 ROLLING_VALUE = rolling + STD_VALUE ROLLING_MOMENTUM = rolling + STD_MOMENTUM ROLLING_QUALITY = rolling + STD_QUALITY ROLLING_VOLATILITY = rolling + STD_VOLATILITY PRED_VALUE = predicted + VALUE PRED_MOMENTUM = predicted + MOMENTUM PRED_QUALITY = predicted + QUALITY PRED_VOLATILITY = predicted + VOLATILITY pf = Portfolio() pf[TRADING_CAPITAL] = pf[TRADING_VOLUME_RATIO] * pf[MKTCAP] pf = pf.periodic_standardize(factor=RET_1) pf = pf.periodic_standardize(factor=B_P) pf = pf.periodic_standardize(factor=E_P) pf[STD_VALUE] = (pf[std + B_P] + pf[std + E_P]) / 2 pf = pf.periodic_standardize(factor=MOM12_1) pf[STD_MOMENTUM] = pf[std + MOM12_1] pf = pf.periodic_standardize(factor=GP_A) pf[STD_QUALITY] = pf[std + GP_A]