def make_return_dist_fig(sim_lookup, predictions, pick_K=100, n_bins=200, n_boots=5000): sim_net = sim_lookup['net_ret'].values sim_weights = sim_lookup['weights'].values bin_locs = np.linspace(0, 100, n_bins)[::-1] bins = np.percentile(sim_lookup['pred'].values, bin_locs) sim_samps_per_bin = len(sim_lookup)/float(n_bins) pred_bins = np.digitize(predictions['returns'] / 100., bins) #find bins of first max_K points in prediction sim_returns = np.zeros(n_boots) boot_samps = sim_samps_per_bin*pred_bins[:pick_K] + np.random.randint(0, sim_samps_per_bin, size=(n_boots, pick_K)) boot_samps = boot_samps.astype(int) sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum(sim_weights[boot_samps], axis=1) sim_returns = LCM.annualize_returns(sim_returns) fig,ax=plt.subplots(figsize=(5.0,4.0)) sns.distplot(sim_returns,bins=100, hist=False, rug=False, ax=ax, kde_kws={'color':'k','lw':3}) plt.xlabel('Annual returns (%)',fontsize=14) plt.ylabel('Probability',fontsize=14) plt.title('Estimated portfolio returns', fontsize=18) plt.tick_params(axis='both', which='major', labelsize=10) plt.margins(.01, .01) plt.tight_layout() return fig
def make_return_dist_fig(sim_lookup, predictions, pick_K=100, n_bins=200, n_boots=5000): sim_net = sim_lookup['net_ret'].values sim_weights = sim_lookup['weights'].values bin_locs = np.linspace(0, 100, n_bins)[::-1] bins = np.percentile(sim_lookup['pred'].values, bin_locs) sim_samps_per_bin = len(sim_lookup) / float(n_bins) pred_bins = np.digitize( predictions['returns'] / 100., bins) #find bins of first max_K points in prediction sim_returns = np.zeros(n_boots) boot_samps = sim_samps_per_bin * pred_bins[:pick_K] + np.random.randint( 0, sim_samps_per_bin, size=(n_boots, pick_K)) boot_samps = boot_samps.astype(int) sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum( sim_weights[boot_samps], axis=1) sim_returns = LCM.annualize_returns(sim_returns) fig, ax = plt.subplots(figsize=(5.0, 4.0)) sns.distplot(sim_returns, bins=100, hist=False, rug=False, ax=ax, kde_kws={ 'color': 'k', 'lw': 3 }) plt.xlabel('Annual returns (%)', fontsize=14) plt.ylabel('Probability', fontsize=14) plt.title('Estimated portfolio returns', fontsize=18) plt.tick_params(axis='both', which='major', labelsize=10) plt.margins(.01, .01) plt.tight_layout() return fig
#%% #load data data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d',]) #%% Set up list of predictors and their properties '''Store info for each predictor as a named tuple containing the col-name within the pandas dataframe, the full_name (human readable), and the type of normalization to apply to that feature.''' predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) #dict to create transformers for each specified type transformer_map = {'minMax':MinMaxScaler(), 'maxAbs':MaxAbsScaler(), 'standScal':StandardScaler(), 'log_minmax': LCM.log_minmax(), 'robScal':RobustScaler() } predictors = [ predictor('acc_now_delinq','num delinq accounts','maxAbs'), predictor('annual_inc','annual income','log_minmax'), predictor('collections_12_mths_ex_med','num recent collections','maxAbs'), predictor('cr_line_dur', 'duration cred line','standScal'), predictor('delinq_2yrs', 'num recent delinq','maxAbs'), predictor('desc_length', 'loan desc length','maxAbs'), predictor('dti', 'debt-income ratio','standScal'), predictor('emp_length', 'employment length','maxAbs'), predictor('funded_amnt','loan amount','maxAbs'), predictor('inq_last_6mths', 'num recent inqs','maxAbs'), predictor('int_rate', 'interest rate','maxAbs'),
#%% #load data data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d',]) #%% '''Store info for each predictor as a named tuple containing the col-name within the pandas dataframe, the full_name (human readable), and the type of normalization to apply to that feature.''' predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) transformer_map = {'minMax':MinMaxScaler(), 'maxAbs':MaxAbsScaler(), 'standScal':StandardScaler(), 'log_minmax': LCM.log_minmax(), 'robScal':RobustScaler()} predictors = [ predictor('acc_now_delinq','num delinq accounts','maxAbs'), predictor('annual_inc','annual income','log_minmax'), predictor('collections_12_mths_ex_med','num recent collections','maxAbs'), predictor('cr_line_dur', 'duration cred line','standScal'), predictor('delinq_2yrs', 'num recent delinq','maxAbs'), predictor('desc_length', 'loan desc length','maxAbs'), predictor('dti', 'debt-income ratio','standScal'), predictor('emp_length', 'employment length','maxAbs'), # predictor('funded_amnt','loan amount','maxAbs'), predictor('loan_amnt','loan amount','maxAbs'), predictor('inq_last_6mths', 'num recent inqs','maxAbs'), predictor('int_rate', 'interest rate','maxAbs'),
LD = pd.read_csv(data_dir + data_name, parse_dates=[ 'issue_d', ]) #%% Set up list of predictors and their properties '''Store info for each predictor as a named tuple containing the col-name within the pandas dataframe, the full_name (human readable), and the type of normalization to apply to that feature.''' predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) #dict to create transformers for each specified type transformer_map = { 'minMax': MinMaxScaler(), 'maxAbs': MaxAbsScaler(), 'standScal': StandardScaler(), 'log_minmax': LCM.log_minmax(), 'robScal': RobustScaler() } predictors = [ predictor('acc_now_delinq', 'num delinq accounts', 'maxAbs'), predictor('annual_inc', 'annual income', 'log_minmax'), predictor('collections_12_mths_ex_med', 'num recent collections', 'maxAbs'), predictor('cr_line_dur', 'duration cred line', 'standScal'), predictor('delinq_2yrs', 'num recent delinq', 'maxAbs'), predictor('desc_length', 'loan desc length', 'maxAbs'), predictor('dti', 'debt-income ratio', 'standScal'), predictor('emp_length', 'employment length', 'maxAbs'), predictor('funded_amnt', 'loan amount', 'maxAbs'), predictor('inq_last_6mths', 'num recent inqs', 'maxAbs'),