'lcl_ml_ens_mean_spatial_mean', 'w_up_time_max_ens_mean_of_90th'] ''' data['X']['mid_level_lapse_rate_ens_mean_spatial_mean'] = data['X']['mid_level_lapse_rate_ens_mean_spatial_mean'] / 2.67765 X['mid_level_lapse_rate_ens_mean_spatial_mean'] = X['mid_level_lapse_rate_ens_mean_spatial_mean'] / 2.67765 fname = join(perm_path, f'permutation_importance_all_models_{target}_{time}_training_{metric}{drop_opt}{perm_method}.nc') explainer = InterpretToolkit(X=data['X'],y=data['targets'],estimator_output='probability',) perm_results = explainer.load(fname) #important_vars = perm_results['multipass_rankings__LogisticRegression'].values[:12] #important_vars = ['low_level_lapse_rate_ens_mean_spatial_mean'] important_vars = ['mid_level_lapse_rate_ens_mean_spatial_mean'] all_vars = perm_results['singlepass_rankings__LogisticRegression'].values display_feature_names = {f: to_readable_names([f])[0] for f in all_vars} #display_feature_names = _fix_long_names(display_feature_names) feature_units = {f: get_units(f)for f in all_vars} if option == 'interaction': interaction_index = 'auto' y = None elif option == 'targets': interaction_index=None y =data['targets'] elif option == 'interaction_and_target': interaction_index = 'auto' y =data['targets'] else: interaction_index=None y =data['targets']
}) examples_subset, target_subset = rus.fit_resample( examples_transformed, target_values_transformed, ) #examples_subset = shap.sample(examples_transformed, 1000) myInterpreter = InterpretToolkit(model=[model.steps[-1][1]], model_names=[model_name], examples=examples_subset, targets=target_subset, feature_names=feature_names) display_feature_names = { f: to_readable_names([f])[0] for f in feature_names } display_feature_names = _fix_long_names(display_feature_names) feature_units = {f: get_units(f) for f in feature_names} date_subset = date_col[:len(examples_subset)].reshape( len(examples_subset), 1) examples_subset = np.concatenate((examples_subset, date_subset), axis=1) examples_subset = pd.DataFrame(examples_subset, columns=original_feature_names) if normalize_method != None: unnormalize = UnNormalize(model.steps[1][1], feature_names) feature_values = unnormalize._full_inverse_transform(examples_subset)
print('First load of the data...') examples, target_values = _load_train_data(**parameters) feature_names = list(examples.columns) feature_names.remove('Run Date') important_vars = [ 'cape_ml_ens_mean_spatial_mean', 'cin_ml_ens_mean_spatial_mean', 'lcl_ml_ens_mean_spatial_mean', 'shear_v_0to6_ens_mean_spatial_mean', 'srh_0to3_ens_mean_spatial_mean', ] # {'w_up_time_max_ens_mean_of_90th': 'Updraft ($\\mu_e$ of P$_{90}$ of max$_t$)', 'uh_2to5_time_max_ens_mean_of_90th': '2-5 km UH ($\\mu_e$ of P$_{90}$ of max$_t$)', 'cape_ml_ens_mean_spatial_mean': 'ML CAPE ($\\mu_e$)'} display_feature_names = { f: to_readable_names([f])[0].split('(')[0][:-1] for f in important_vars } print(display_feature_names) display_feature_names = _fix_long_names(display_feature_names) feature_units = {f: get_units(f) for f in important_vars} ale_results = [] for i, model_name in enumerate(model_set): parameters['model_name'] = model_name calibrated_pipeline = _load_model(**parameters) model = calibrated_pipeline.calibrated_classifiers_[0].base_estimator examples_transformed, target_values_transformed = just_transforms(
myInterpreter = InterpretToolkit(examples=examples, targets=target_values) results = [] for target in targets: fnames = [ get_fnames(model_name, target, time, drop_opt) for model_name in ml_models ] results.append(myInterpreter.load_results(fnames)) fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc' ds = xr.open_dataset(fname) features = [var for var in list(ds.data_vars) if 'matched' not in var ] + ['Run Date'] readable_feature_names = { feature: to_readable_names([feature])[0] for feature in features } feature_colors = { feature: to_readable_names([feature])[1] for feature in features } adict = readable_feature_names display_feature_names1 = { f'{f[0]}__{f[1]}': f'{adict[f[0]]} & {adict[f[1]]}' for f in list(itertools.combinations(features, r=2)) } display_feature_names2 = { f'{f[1]}__{f[0]}': f'{adict[f[0]]} & {adict[f[1]]}'
# Load the most important variables path = '/work/mflora/ML_DATA/permutation_importance' perm_imp_fname = join( path, f'permutation_importance_all_models_{target}_{time}_training_{metric}{drop_opt}{perm_method}.nc' ) explainer = InterpretToolkit() perm_imp_results = explainer.load(perm_imp_fname) important_vars = perm_imp_results[ f'{mode}_rankings__LogisticRegression'].values important_vars = important_vars[:n_vars] # Convert to pretty feature names readable_feature_names = { feature: to_readable_names([feature])[0] + f' ({get_units(feature)})' for feature in important_vars } parameters = { 'time': time, 'target': target, 'drop_opt': drop_opt, } X, y = _load_train_data(**parameters) n_panels = len(important_vars) fig, axes = base_plt.create_subplots(n_panels, figsize=(10, 6), sharey=True, n_columns=4,
resample_method = resample_dict[time][target][model_name] return join( ale_path, f'ale_2d_results_{model_name}_{target}_{time}{drop_opt}{resample_method}.nc' ) ######################################## fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc' ds = xr.open_dataset(fname) features = [var for var in list(ds.data_vars) if 'matched' not in var ] + ['Run Date'] ds.close() display_feature_names = {f: to_readable_names([f])[0] for f in features} display_feature_names = _fix_long_names(display_feature_names) ###feature_units = {f: get_units(f)for f in features} myInterpreter = InterpretToolkit() fnames = [get_fnames(m, target, time, drop_opt) for m in model_names] results = myInterpreter.load_results(fnames=fnames) feature_names = results[ 'ale_variance_interactions_rankings__LogisticRegression'].values feature_names = feature_names[:3] fnames = [get_2d_ale(m, target, time, drop_opt) for m in model_names] ale_data = myInterpreter.load_results(fnames=fnames) feature_names = [tuple(f.split('__')) for f in feature_names]
return join(path, f'permutation_importance_{atype}_{target}_{time}_{mode}_{metric}{drop_opt}{perm_method}{resample}.nc') explainer = InterpretToolkit() #X=X,y=y) results =[] for target in targets: fname = get_fnames(target, time, mode, metric, drop_opt, perm_method, resample) results.append(explainer.load(fname)) results[0].attrs['estimator_output'] = ['LogisticRegression'] fname = '/work/mflora/ML_DATA/INPUT_DATA/20180501/PROBABILITY_OBJECTS_20180501-2330_10.nc' ds = xr.open_dataset(fname) features = [ var for var in list(ds.data_vars) if 'matched' not in var] + ['Run Date'] readable_feature_names = {feature: to_readable_names([feature])[0] for feature in features} feature_colors = {feature: to_readable_names([feature])[1] for feature in features} p_values=None fig = explainer.plot_importance( data=results, method=method, display_feature_names=readable_feature_names, feature_colors=feature_colors, num_vars_to_plot=num_vars_to_plot, rows = rows, columns = columns, plot_correlated_features=False, estimator_names = ml_models, p_values =p_values,