def __init__( self, filepath_Y, num_params, gsa_func, gsa_label, write_dir, num_steps=10, **kwargs, ): self.filepath_Y = filepath_Y self.Y = read_hdf5_array(filepath_Y).flatten() self.iterations = self.Y.shape[0] self.num_params = num_params self.gsa_func = gsa_func self.gsa_label = gsa_label self.write_dir = write_dir self.make_dirs() self.num_steps = num_steps self.iterations_order = self.generate_iterations_order() ( self.iterations_min, self.iterations_least_common_multiple, ) = self.generate_iterations_min_and_least_common_multiple( self.gsa_label, **kwargs ) # depends on gsa method ( self.iterations_step, self.iterations_for_convergence, ) = self.generate_iterations_for_convergence() self.sampling_label = str(self.filepath_Y).split(".")[ 1 ] # TODO there must be a better way self.seed = str(self.filepath_Y).split(".")[-2]
def get_influential_Y_from_gsa(self, gsa_indices, num_influential, tag=None): """Run model when only influential inputs vary based on sensitivity indices values. Parameters ---------- gsa_indices : array Array with sensitivity indices values for all model inputs. num_influential : int Number of first most influential inputs to vary. tag : str Tag to save results. Returns ------- influential_Y : array Model outputs when only influential inputs vary. """ assert num_influential <= self.num_params assert len(gsa_indices) == self.num_params filepath = self.create_model_output_inf_filepath(num_influential, tag) if filepath.exists(): print("{} already exists".format(filepath.name)) influential_Y = read_hdf5_array(filepath).flatten() else: non_influential_inds = np.argsort(gsa_indices)[::-1][num_influential:] non_influential_inds.sort() X_rescaled_inf = deepcopy(self.X_rescaled) X_rescaled_inf[:, non_influential_inds] = np.tile( self.default_x_rescaled[non_influential_inds], (self.iterations, 1) ) influential_Y = self.model(X_rescaled_inf) write_hdf5_array(influential_Y, filepath) return influential_Y
def generate_Y_all_inputs_vary(self): """Run model when all inputs vary.""" # Model output if not self.filepath_Y_all.exists(): Y = self.model(self.X_rescaled) write_hdf5_array(Y, self.filepath_Y_all) else: # print("{} already exists".format(self.filepath_Y_all.name)) Y = read_hdf5_array(self.filepath_Y_all).flatten() return Y
def generate_X_rescaled_all_inputs_vary(self): """Rescale unitcube samples when all model inputs vary.""" if not self.filepath_X_rescaled_all.exists(): # Unitcube samples np.random.seed(self.seed) X = np.random.rand(self.iterations, self.num_params) X_rescaled = self.model.rescale(X) write_hdf5_array(X_rescaled, self.filepath_X_rescaled_all) else: X_rescaled = read_hdf5_array(self.filepath_X_rescaled_all) return X_rescaled
def get_influential_Y_from_parameter_choice(self, influential_inputs, tag=None): """Run model when only influential inputs vary based on chosen influential inputs.""" num_influential = len(influential_inputs) assert num_influential <= self.num_params filepath = self.create_model_output_inf_filepath(num_influential, tag) if filepath.exists(): print("{} already exists".format(filepath.name)) influential_Y = read_hdf5_array(filepath).flatten() else: non_influential_inds = np.setdiff1d( np.arange(self.num_params), influential_inputs ) non_influential_inds.sort() X_rescaled_inf = deepcopy(self.X_rescaled) X_rescaled_inf[:, non_influential_inds] = np.tile( self.default_x_rescaled[non_influential_inds], (self.iterations, 1) ) influential_Y = self.model(X_rescaled_inf) write_hdf5_array(influential_Y, filepath) return influential_Y
"cf": uncertain_cf_params[uncertain_params_selected_where_dict["cf"]], } num_params = (len(uncertain_params["tech"]) + len(uncertain_params["bio"]) + len(uncertain_params["cf"])) filepath_Y = (write_dir / "arrays" / "si.Y.{}inf.{}.{}.lognormal{}.hdf5".format( num_params, iterations, seed, n_use_lognormal, )) print(filepath_Y.name) if filepath_Y.exists(): Y = read_hdf5_array(filepath_Y).flatten() else: model = LCAModelBase( demand, uncertain_method, uncertain_params, # uncertain_params_selected_where_dict, ) np.random.seed(seed) X = np.random.rand(iterations, num_params) Xr = model.rescale(X) Y = model(Xr) write_hdf5_array(Y, filepath_Y) Y_dict[n_use_lognormal] = Y if n_use_lognormal == -1: subplot_titles.append(r"$\text{All 408'741 inputs vary}$")
filepath_val_delt = (write_dir_arr / "validation.Y.60inf.2000.23467.DeltaIndexNr0.hdf5") filepath_val_gain = write_dir_arr / "validation.Y.60inf.2000.23467.TotalGain.hdf5" filepath_val_dict = { "all": filepath_val_all, "corr": filepath_val_corr, "salt": filepath_val_salt, "delt": filepath_val_delt, "xgbo": filepath_val_gain, } Y_dict, S_dict = {}, {} Y_arr, S_arr = np.zeros((0, iterations)), np.zeros((0, num_params)) stability_dicts = [] for k in filepath_val_dict.keys(): Y_dict[k] = read_hdf5_array(filepath_val_dict[k]).flatten() if k != "all": Y_arr = np.vstack([Y_arr, Y_dict[k]]) S_dict[k] = read_pickle( filepath_gsa_dict[k][0])[filepath_gsa_dict[k][1]] S_arr = np.vstack([S_arr, S_dict[k]]) stability_dict = read_pickle(filepath_stability_dict[k][0]) stability_dicts.append(stability_dict) S_sorted = np.sort(np.abs(S_dict["xgbo"]))[-1::-1] th = (S_sorted[99] - S_sorted[1000]) / 4 bootstrap_ranking_tag = "paper1" st = Robustness( stability_dicts, write_dir,
demand_act = [act for act in co if "Food" in act["name"]][0] demand = {demand_act: 1} method = ("IPCC 2013", "climate change", "GTP 100a") lca = bw.LCA(demand, method) lca.lci() lca.lcia() num_params = len(lca.tech_params[lca.tech_params["uncertainty_type"] > 1]) print(num_params) seed = 923458 filename_Y_narrow = "validation.narrow.Y.{}.{}.{}.hdf5".format( iterations, num_params, seed) filepath_Y_narrow = path_setac / "arrays" / filename_Y_narrow if filepath_Y_narrow.exists(): narrow_Y = read_hdf5_array(filepath_Y_narrow).flatten() else: tech_params_narrow = deepcopy(lca.tech_params) # Read params_yes from xgboost model filepath_params_yes_0 = path_merlin / "params_yes_0.pickle" with open(filepath_params_yes_0, "rb") as f: params_yes_0 = pickle.load(f) model_seed = 3333 path_model_dir = path_setac / "regression" / "{}_model".format(model_seed) model, params_yes_xgboost, importance_dict = get_xgboost_params( path_model_dir, params_yes_0) params_yes = params_yes_xgboost[:num_params_narrow] tech_params_narrow[params_yes]["scale"] = ( tech_params_narrow[params_yes]["scale"] / 2) lca_model = LCAModelSetac(demand, method, tech_params_narrow)
# --> So I have to hardcode.... # tag = 60 # filepath_influential_Y = ( # write_dir / "arrays" / validation.create_influential_model_output_filepath(tag) # ) # influential_Y = read_hdf5_array(filepath_influential_Y).flatten() - diff_mean[tag] # # validation.plot_histogram_base_Y_influential_Y( # influential_Y, tag=tag, save_fig=True, bin_min=bin_min, bin_max=bin_max # ) # tag = 60 filepath_influential_Y = ( write_dir / "arrays" / validation.create_influential_model_output_filepath(tag)) influential_Y = read_hdf5_array( filepath_influential_Y).flatten() - diff_mean[tag] # validation.plot_histogram_base_Y_influential_Y( # influential_Y, tag=tag, save_fig=True, bin_min=bin_min, bin_max=bin_max # ) validation.plot_correlation_base_Y_influential_Y(influential_Y, tag=tag, save_fig=True) # Narrow # iterations = 2000 # num_params_narrow = 60 # scaling_factor = 8 # tag = "{}.div{}".format(num_params_narrow, scaling_factor) # filename_Y_narrow = "validation.narrow.Y.{}.{}.div{}.{}.hdf5".format( # iterations, num_params_narrow, scaling_factor, seed