def comp_wrapper_addmodel(function, model_series, new_series='Mode', y=est.dd_inf(), dgp_series=est.dd_inf()): model_series[new_series] = {} for split in model_series['DGP'].keys(): model_series[new_series][split] = function(y=y['y'][split]) return model_series
def comp_wrapper_g(function, *g_series, wrapper_model=comp_wrapper_model, y=est.dd_inf(), dgp_series=est.dd_inf(), comp_kws={}): #Example: g_series as probabilities, function as predict from probabilities. comp = {} for g in g_series[0].keys(): series = [serie[g] for serie in g_series] comp[g] = wrapper_model(function, *series, y=y[g], dgp_series=dgp_series[g], comp_kws=comp_kws) return comp
def comp_wrapper_par(function, *par_series, wrapper_model=comp_wrapper_g, y=est.dd_inf(), dgp_series=est.dd_inf(), comp_kws={}): #Example: g_series as probabilities, function as predict from probabilities. comp = {} for par in par_series[0].keys( ): # Zero just picks first series in par_series series = [serie[par] for serie in par_series] comp[par] = wrapper_model(function, *series, y=y[par], dgp_series=dgp_series[par], comp_kws=comp_kws) return comp
def comp_wrapper_model(function, *model_series, y=est.dd_inf(), dgp_series=est.dd_inf(), comp_kws={}): #Example: function takes average (so comp is average) and series are the probabilities for each model comp = {} for model in model_series[0].keys( ): #zero implies first series in variable number of series. comp[model] = {} #Store a value for each input model for split in model_series[0][model].keys(): #Repeat for train and test series = [serie[model][split] for serie in model_series ] #Extract the data for the specific case #print(model, split) comp[model][split] = function( *series, #Perform computation in input function y=y['y'][split], dgp_series=dgp_series['DGP'][split], **comp_kws) return comp
def table_wrapper_g_double( g_series1, g_series2, cell_function, extra_series1=est.dd_inf(), extra_series2=est.dd_inf( ), #Typically adds bootstrapper to cell function. g_functions=defaultdict(dict), g_subset=False, models=False, split1='Test', split2='Test', decimals=2, print_string=True, save_file=False, filename='table_wrapper_g', **kwargs): if g_subset == False: #If not subset specified, print for all functions gs = g_series1.keys() else: gs = g_subset # #Print only figures for subset of g_functions if models == False: models = g_series1[np.random.choice(list(g_series1.keys()))].keys() printer1, printer2 = est.dd_inf(), est.dd_inf() for g in gs: if 'g_name' in g_functions[g].keys( ): # Allow for "pretty" version of g. g_name = g_functions[g]['g_name'] else: g_name = g for model in models: printer1[model][g_name] = cell_function( series=g_series1[g][model][split1], extra_series=extra_series1[g][model][split1], decimals=decimals) printer2[model][g_name] = cell_function( series=g_series2[g][model][split2], extra_series=extra_series2[g][model][split2], decimals=decimals) #str(np.round(np.mean(g_series[g][model][split]), decimals)) + str(' (') \ # + str(np.round(np.std(g_series[g][model][split]), decimals+1)) if 'title1' in kwargs.keys(): title1 = kwargs['title1'] del kwargs['title1'] else: title1 = 'Act.' #Actual model if 'title2' in kwargs.keys(): title2 = kwargs['title2'] del kwargs['title2'] else: title2 = 'Obs.' #Actual model table_stuff1 = {(model, title1): printer1[model] for model in printer1.keys()} table_stuff2 = {(model, title2): printer2[model] for model in printer2.keys()} table_stuff = {**table_stuff1, **table_stuff2} # table_stuff = table_stuff1.update(table_stuff2) #Updates table1 and returns None. # Reorder index index = pd.MultiIndex.from_tuples(table_stuff.keys()) index.set_labels( [ [i for i in index.labels[0][0:len(models)] for _ in (0, 1)], # duplicates first level in original order [0, 1] * len(models) ], inplace=True) #Iterate (Act., Obs.) #Generate table table = pd.DataFrame( table_stuff, columns=index, index=printer1[np.random.choice(list(printer1.keys()))].keys(), ) if print_string == True: print( '---------------------------------------------------------------------' ) print('TABLE: ' + filename) print( '---------------------------------------------------------------------' ) print(table.round(decimals)) print( '---------------------------------------------------------------------' ) if save_file == True: with open(os.getcwd() + '\\tables\\' + '%s.tex' % filename, "w") as f: f.write(table_to_latex_custom(table, double_columns=True, **kwargs)) return table
def table_wrapper_g( g_series, cell_function, extra_series=est.dd_inf( ), #Typically adds bootstrapper to cell function. g_functions=defaultdict(dict), g_subset=False, models=False, split='Test', decimals=2, print_string=True, save_file=False, filename='table_wrapper_g', transpose=False, cell_writer=write_cells_2line, **latex_kws): if g_subset == False: #If not subset specified, print for all functions gs = g_series.keys() else: gs = g_subset # #Print only figures for subset of g_functions if models == False: models = g_series[np.random.choice(list(g_series.keys()))].keys() printer = est.dd_inf() for g in gs: if 'g_name' in g_functions[g].keys( ): # Allow for "pretty" version of g. g_name = g_functions[g]['g_name'] else: g_name = g for model in models: printer[model][g_name] = cell_function( series=g_series[g][model][split], extra_series=extra_series[g][model][split], decimals=decimals) #str(np.round(np.mean(g_series[g][model][split]), decimals)) + str(' (') \ # + str(np.round(np.std(g_series[g][model][split]), decimals+1)) + str(')') table = pd.DataFrame({model: printer[model] for model in printer.keys()}, columns=printer.keys(), index=printer[np.random.choice(list( printer.keys()))].keys()) if transpose == True: table = table.transpose() if print_string == True: print( '---------------------------------------------------------------------' ) print('TABLE: ' + filename) print( '---------------------------------------------------------------------' ) print(table.round(decimals)) print( '---------------------------------------------------------------------' ) if save_file == True: with open(os.getcwd() + '\\tables\\' + '%s.tex' % filename, "w") as f: f.write( table_to_latex_custom(table, cell_writer=cell_writer, **latex_kws)) return table
def comp_wrapper_parseries_g(function, *par_series, g_functions, parameter_space, wrapper_model=comp_wrapper_gseries, summary_function=np.mean, filename='V3', mult_series=False, comp_kws={}, **kwargs): # Check for added series if 'dgp_series' in kwargs.keys(): dgp_series = kwargs['dgp_series'] else: dgp_series = est.dd_inf() if 'y' in kwargs.keys(): y = kwargs['y'] else: y = est.dd_inf() #Prepare dict with a dataframe for each g function comp = {} for g in g_functions.keys(): comp[g] = {} for split in ('Train', 'Test'): if mult_series == False: comp[g][split] = pd.DataFrame() else: comp[g][split] = {} for i in range(0, mult_series): comp[g][split][i] = pd.DataFrame() #Append rows to dataframe for each parameter for par in parameter_space: #Get computation for current parameter set temp = wrapper_model(function, *par_series, g_functions=g_functions, filename=filename + '_' + str(par), dgp_series=dgp_series, y=y, comp_kws=comp_kws) row = {} #Calculate results for each g function for g in temp.keys(): row[g] = {} for split in temp[g][np.random.choice(list( temp[g].keys()))].keys(): #Random model row[g][split] = {} if mult_series == False: row[g][split]['Parameter'] = par else: for i in range(0, mult_series): row[g][split][i] = {} row[g][split][i]['Parameter'] = par #Calculate a column with results for each model for model in temp[g].keys(): if mult_series == False: #Check for number of columns in series row[g][split][model] = summary_function( temp[g][model][split], axis=0) else: #If more than one value, summarize for each variable (e.g. for each beta) temp2 = summary_function(temp[g][model][split], axis=0) for i in range(0, mult_series): row[g][split][i][model] = temp2[i] #Append the result rows back into the dataframes if mult_series == False: comp[g][split] = comp[g][split].append(row[g][split], ignore_index=True) else: for i in range(0, mult_series): comp[g][split][i] = comp[g][split][i].append( row[g][split][i], ignore_index=True) return comp
def comp_wrapper_parseries(function, *par_series, wrapper_model=comp_wrapper_g, mult_series=False, summary_function=np.mean, load_individually=False, filename='V3', parameter_space=None, comp_kws={}, **kwargs): #NOTE: Series are now strings, which specify the file to be loaded. #Hence the added antics in the beginning. # Check for added series if 'dgp_series' in kwargs.keys(): dgp_series = kwargs['dgp_series'] else: dgp_series = est.dd_inf() if 'y' in kwargs.keys(): y = kwargs['y'] else: y = est.dd_inf() # Prepare to load relevant files if load_individually == False: parameter_space = par_series[0].keys() else: #loads individually output = {} for i in range(0, len(par_series)): output[i] = par_series[i] if dgp_series != est.dd_inf(): output_dgp = dgp_series else: output_dgp = None if y != est.dd_inf(): output_y = y else: output_y = None comp = {} #Prepare dict with a dataframe for each g function if load_individually == False: #If data was compiled before for g in par_series[0][np.random.choice(list( par_series[0].keys()))].keys(): comp[g] = {} for split in par_series[0][np.random.choice( list(par_series[0].keys()))][g]['DGP'].keys(): if mult_series == False: comp[g][split] = pd.DataFrame() else: comp[g][split] = {} for i in range(0, mult_series): comp[g][split][i] = pd.DataFrame() else: #If data is in individual files, we need to load one to get the subsequent structure with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, output[0], np.random.choice(parameter_space)), "rb") as f: temp = pickle.loads(f.read()) for g in temp.keys(): comp[g] = {} for split in temp[g]['DGP'].keys(): if mult_series == False: comp[g][split] = pd.DataFrame() else: comp[g][split] = {} for i in range(0, mult_series): comp[g][split][i] = pd.DataFrame() #Append rows to dataframe for each parameter for par in parameter_space: if load_individually == False: series = [serie[par] for serie in par_series] temp = wrapper_model(function, *series, y=y[par], dgp_series=dgp_series[par], comp_kws=comp_kws) else: series = [] for i in range(0, len(par_series)): with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, output[i], par), "rb") as f: series.append(pickle.loads(f.read())) if output_dgp != None: with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, output_dgp, par), "rb") as f: dgp_series = pickle.loads(f.read()) if output_y != None: with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, output_y, par), "rb") as f: y = pickle.loads(f.read()) temp = wrapper_model(function, *series, y=y, dgp_series=dgp_series, comp_kws=comp_kws) row = {} #Calculate results for each g function for g in temp.keys(): row[g] = {} for split in temp[g][np.random.choice(list( temp[g].keys()))].keys(): #Random model row[g][split] = {} if mult_series == False: row[g][split]['Parameter'] = par else: for i in range(0, mult_series): row[g][split][i] = {} row[g][split][i]['Parameter'] = par #Calculate a column with results for each model for model in temp[g].keys(): if mult_series == False: #Check for number of columns in series row[g][split][model] = summary_function( temp[g][model][split], axis=0) else: #If more than one value, summarize for each variable (e.g. for each beta) temp2 = summary_function(temp[g][model][split], axis=0) for i in range(0, mult_series): row[g][split][i][model] = temp2[i] #Append the result rows back into the dataframes if mult_series == False: comp[g][split] = comp[g][split].append(row[g][split], ignore_index=True) else: for i in range(0, mult_series): comp[g][split][i] = comp[g][split][i].append( row[g][split][i], ignore_index=True) # del output, output_dgp, output_y, dgp_series, y return comp
def comp_wrapper_gseries( function, *g_series, g_functions, #Requires g_functions, since series is strings wrapper_model=comp_wrapper_model, filename='V2', comp_kws={}, **kwargs): #NOTE: Series are now strings, which specify the file to be loaded. #Hence the added antics in the beginning. # Check for added series if 'dgp_series' in kwargs.keys(): if isinstance(kwargs['dgp_series'], str): dgp_series = kwargs[ 'dgp_series'] #String can't be copied (nor do we need to) else: dgp_series = kwargs['dgp_series'].copy( ) #Copy, to avoid infinite dict else: dgp_series = est.dd_inf() if 'y' in kwargs.keys(): if isinstance(kwargs['y'], str): y = kwargs['y'] else: y = kwargs['y'].copy() else: y = est.dd_inf() # Prepare to load relevant files (original names are replaced for consistency) output = {} for i in range(0, len(g_series)): output[i] = g_series[i] if dgp_series != est.dd_inf(): output_dgp = dgp_series else: output_dgp = None if y != est.dd_inf(): output_y = y else: output_y = None #Load and process data comp = {} for g in g_functions.keys(): #Load data series = [] for i in range(0, len(g_series)): with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, g, output[i]), "rb") as f: series.append(pickle.loads(f.read())) # print(y) if output_dgp != None: with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, g, output_dgp), "rb") as f: dgp_series = pickle.loads(f.read()) if output_y != None: with open(os.getcwd() + '\\simulation_results\\single_iterations\\'+'%s_%s_%s.txt' \ % (filename, g, output_y), "rb") as f: y = pickle.loads(f.read()) # Perform computation below comp[g] = wrapper_model(function, *series, y=y, dgp_series=dgp_series, comp_kws=comp_kws) return comp
def smr_bootstrap_mrgeffs(boot_mrgeffs, **kwargs): return boot_mrgeffs[:, 0] # Calculate if parameters['bootstrap_averages'] == False: boot_expect_avg = comp_summary( output=boot_expect, summary_function=smr_bootstrap_expect_avg) boot_mrgeff_avg = comp_summary( output=boot_mrgeff, summary_function=smr_bootstrap_mrgeffs_avg) else: # Averages done in estimation boot_expect_avg = boot_expect boot_mrgeff_avg = comp_summary( output=boot_mrgeff, summary_function=smr_bootstrap_mrgeffs) else: boot_expect_avg = est.dd_inf() boot_mrgeff_avg = est.dd_inf() #Mrgeff data mrgeff_data = {} def comp_mrgeffdata(output, data, variable_iloc=0, specs=xs, estimators=estimators, parameters=parameters, **kwargs): comp = {} for spec in specs.keys(): comp[spec] = {}