Exemplo n.º 1
0
def make_appendix_table(assets=None):
    """Build asset information table for appendix."""
    # Read all data.
    def read_file(fid):
        """Read data file as DataFrame."""
        df = pd.read_csv(
            fid,
            index_col=0,
            parse_dates=True,
            infer_datetime_format=True,
            )
        sorted_cols = sorted(list(df))
        sorted_cols
        return df[sorted_cols]
        
    fids = glob('../data/*.csv')
    df = pd.DataFrame().join([read_file(fid) for fid in fids], how='outer')
    
    # Get asset class for each asset.
    asset_class_map = {}
    for fid in fids:
        asset_class = {
            'fx': 'FX',
            'commodities': 'Commodity',
            'alternatives': 'Alternative',
            'equities': 'Equity',
            'fixed_income': 'Fixed Income',
            }[fid.split('.csv')[0].rsplit('/', 1)[1]]
        temp_df = read_file(fid)
        for asset in temp_df.columns:
            asset_class_map[asset] = asset_class
    
    # Subset data to specified assets if provided.
    if assets is not None:
        df = df[assets].copy()
    tickers = list(df)
    
    # Read name conversions dict.
    with open('../data/asset_mapping.json', 'r') as fid:
        name_map = json.load(fid)
    
    table_data = defaultdict(list)
    for ticker in tickers:
        dates = df[ticker].dropna().index
        table_data['Ticker'].append(ticker)
        table_data['Security Name'].append(name_map[ticker])
        table_data['Asset Class'].append(asset_class_map[ticker])
        table_data['Start Date'].append(dates[0].strftime('%m/%d/%Y'))
        table_data['End Date'].append(dates[-1].strftime('%m/%d/%Y'))
        
    table = pd.DataFrame(table_data)
    cap = 'Summary of studied assets.'
    col_fmt = 'lllccc'
    eu.latex_print(table, hide_index=True, col_fmt=col_fmt, caption=cap,
                   adjust=True, greeks=False)
Exemplo n.º 2
0
def make_algo_comparison_tables(year=2018, fdir='../figures', save=True):
    # %%
    ess_train = CompareModels('essential', period='weekly', subset='train')
    ess_cv = CompareModels(stats='essential', period='weekly', subset='cv')
    ess_test = CompareModels(stats='essential', period='weekly', subset='test')
    ess_thresh = CompareModels(
        'essential_below_thresh',period='weekly', subset='train')
    non_ess = CompareModels('non_essential', period='weekly', subset='train')
    
    # %%
    # Essential stats.
    
    
    fig, axes=plt.subplots(1, 2, figsize=(18, 12))
    ess_train.plot_color_table(ax=axes[0], fontsize=12, prec=2)
    ess_cv.plot_color_table(ax=axes[1], fontsize=12, prec=2)
    plt.tight_layout()
    if save:
        eu.save_fig('essential_MAE_color_table', dir=fdir)
    else:
        plt.show()
    
    ess_train.plot_color_table(figsize=(9, 12), fontsize=12, prec=2)
    if save:
        eu.save_fig('essential_train_MAE_table', dir=fdir)
    else:
        plt.show()
    
    
    ess_cv.plot_color_table(figsize=(9, 12), fontsize=12, prec=2)
    if save:
        eu.save_fig('essential_cv_MAE_table', dir=fdir)
        cap = 'Mean Absolute Error (MAE) values for all essential stats '\
            'and studied algorithms.'
        eu.latex_figure(
            fids=['essential_train_MAE_table', 'essential_cv_MAE_table'],
            dir=fdir,
            caption=cap,
            subcaptions=['Training MAE.', 'Cross-Validation MAE.'],
            width=0.98,
            )
    else:
        plt.show()
        
    
    ess_thresh.plot_color_table(figsize=(9, 12), fontsize=12)
    if save:
        eu.save_fig('essential_thresh_MAE_table', dir=fdir)
    else:
        plt.show()
        
    non_ess.plot_color_table(figsize=(9, 12), fontsize=12, prec=3)
    if save:
        eu.save_fig('nonessential_MAE_table', dir=fdir)
    else:
        plt.show()

    
    # %%
    # Make train/cv/test table and print to LaTeX.
    
    with open('../data/.models/weekly/optimal_models.json', 'r') as fid:
        opt_models = json.load(fid)
    
    def get_set_mae_list(subset_comparison, models=opt_models):
        df = subset_comparison.mae_df
        mae_list = []
        for _, row in df.iterrows():
            mae_list.append(row[models[row['Position']][row['Stat']]])
        return mae_list
        
    df = ess_train.mae_df
    fmt_methods = {
        'MEAN': 'Mean',
        'MEDIAN': 'Median',
        'FLOOR': 'Min',
        'CEIL': 'Max',
        }
    algos = []
    for _, row in df.iterrows():
        algo = models[row['Position']][row['Stat']]
        algos.append(fmt_methods.get(algo, algo))
    pos = []
    for p in df['Position'].values:
        pos.append('{}' if p in pos else p)
    
    table = pd.DataFrame({
        'Position': pos,
        'Stat': df['Stat'],
        'Projection Function': algos,
        'Train': get_set_mae_list(ess_train),
        'CV': get_set_mae_list(ess_cv),
        'Test': get_set_mae_list(ess_test),
        })
    cap = 'MAE values for training, cross-validation, and test sets with ' \
        'selected optimal projection function for each stat.'
    eu.latex_print(table, hide_index=True, prec=2, col_fmt='lllcrrr',
                   caption=cap)
    table
    for qtile in range(q):
        table[qtile]['r'] = resdf[resdf['q']==qtile]['returns'].mean()*12
        table[qtile]['v'] = resdf[resdf['q']==qtile]['returns'].std()*np.sqrt(12)
        table[qtile][name] = resdf[resdf['q']==qtile][name].mean()

    table = pd.DataFrame.from_dict(table, orient='index')
    table['sr'] = table['r']/table['v']
    table = table.reset_index()
    table = table[['index', 'r', 'v', 'sr', name]]
    cols = 'Quartile Return Volatility Sharpe'.split()
    cols += [name]
    table.columns = cols
    table['Quartile'] += 1
    table[['Return', 'Volatility']] *= 100
    eu.latex_print(table, prec=2, hide_index=True)
# %%
def get_CAPM_weights(er, cov, gamma):
    n = cov.shape[0]
    w = cp.Variable((n, 1))
    gamma = cp.Parameter(nonneg=True, value=gamma)
    ret = w.T @ er
    risk = cp.quad_form(w, cov)
    constraints = [
        cp.sum(w) == 1,
        w <= 0.1,
        w >= 0,
        ret >= 0.02,
        ]
    obj = cp.Maximize(ret - gamma*risk)
    prob = cp.Problem(obj, constraints)
Exemplo n.º 4
0
def train_simple_linear_regression(trainers, period, n_iters, save, verbose):
    """
    Train OLS linear regression model.
    
    Parameters
    ----------
    trainers: dict
        Dict with postions as keys and TrainProjections class as values.
    period: {'weekly', 'season'}
        Period to train models over.
    n_iters: int
        Number of iterations to train models.
    save: bool
        If True, save results into data/.models dir.
    verbose: bool
        If True, print progress bar to screen.
    """

    if verbose:
        print('\n\nSimple linear regresssion OLS model:')
        t0 = perf_counter()
    positions = trainers.keys()

    def fmt_pval(pvals):
        """Return *-style significance list for pvalues."""
        sig = []
        for pval in pvals:
            if pval <= 0.001:
                sig.append('*')
            elif pval <= 0.01:
                sig.append('**')
            elif pval <= 0.05:
                sig.append('***')
            else:
                sig.append('')
        return sig

    # Get OLS results for each essential stat.
    df = pd.DataFrame()
    dis = not verbose
    n = sum(len(trainer.essential_stats) for trainer in trainers.values())
    with tqdm(total=n, disable=dis) as pbar:
        for pos in positions:
            for stat in trainers[pos].essential_stats:
                ols = trainers[pos].train_simple_linear_regression(stat)
                # Find signifcance and rounded values.
                vals = [pos, stat] + [f'{val}{sig}' for val, sig in \
                        zip(ols.params.round(2), fmt_pval(ols.pvalues))]
                cols = ['Position', 'Stat'] + list(ols.params.index)
                temp_df = pd.DataFrame([vals], columns=cols)
                df = df.append(temp_df, sort=False)

                if save:
                    fid = f'../data/.models/{period}/{pos}/' \
                        f'{stat.replace(" ", "_")}/OLS.csv'

                    save_df = pd.DataFrame(ols.params, columns=['vals'])
                    save_df.to_csv(fid)

                pbar.update(1)

    # Change positon column to only indicate position once.
    n_pos = {pos: len(trainers[pos].essential_stats) - 1 for pos in positions}
    pos_list = list(chain(*[[pos] + n_pos[pos] * [''] for pos in positions]))
    df['Position'] = pos_list

    # Build and print table.
    col_fmt = 'l' * (len(list(df)) + 1)
    cap = 'Simple linear regression results for ensemble weighting of each '
    cap += 'source. Asterisks denote statistical significance of regression '
    cap += 'coefficients, (*) denoting a p-value less than 0.5, (**) for less '
    cap += 'than 0.01, and (***) for less than 0.001.'
    if verbose:
        print(f'Time to train: {(perf_counter() - t0) / 60:.1f} min.')
        eu.latex_print(df,
                       hide_index=True,
                       adjust=True,
                       col_fmt=col_fmt,
                       caption=cap)