def csv_add_features(self, csvsrc, csvdest): afm_dict=dict() param_dict=dict() #E900 column e900_dict = dict() for elem in ['P','Ni','Cu','Mn']: #Si, C not used in e900 e900_dict['wt%s' % elem] = 'wt_percent_%s' % elem e900_dict['fluencestr'] = 'fluence_n_cm2' e900_dict['tempC'] = 'temperature_C' e900_dict['prod_ID'] = 'product_id' afm_dict['DBTT.E900'] = dict(e900_dict) param_dict['DBTT.E900'] = dict() #get_dataframe csv_dataparser = DataParser() csv_dataframe = csv_dataparser.import_data("%s.csv" % os.path.join(self.save_path, csvsrc)) #add features for afm in afm_dict.keys(): (feature_name, feature_data) = cf_help.get_custom_feature_data(class_method_str = afm, starting_dataframe = csv_dataframe, param_dict = dict(param_dict[afm]), addl_feature_method_kwargs = dict(afm_dict[afm])) fio = FeatureIO(csv_dataframe) csv_dataframe = fio.add_custom_features([feature_name],feature_data) #add log10 features log10_dict=dict() log10_dict['fluence_n_cm2'] = dict() log10_dict['flux_n_cm2_sec'] = dict() for lkey in log10_dict.keys(): orig_data = csv_dataframe[lkey] log10_data = np.log10(orig_data) fio = FeatureIO(csv_dataframe) csv_dataframe = fio.add_custom_features(["log(%s)" % lkey], log10_data) #add normalizations norm_dict = dict() norm_dict['log(fluence_n_cm2)']=dict() norm_dict['log(fluence_n_cm2)']['smin'] = 17 norm_dict['log(fluence_n_cm2)']['smax'] = 25 norm_dict['log(flux_n_cm2_sec)']=dict() norm_dict['log(flux_n_cm2_sec)']['smin'] = 10 norm_dict['log(flux_n_cm2_sec)']['smax'] = 15 norm_dict['temperature_C']=dict() norm_dict['temperature_C']['smin'] = 270 norm_dict['temperature_C']['smax'] = 320 for elem in ["P","C","Cu","Ni","Mn","Si"]: norm_dict["at_percent_%s" % elem] = dict() norm_dict["at_percent_%s" % elem]['smin'] = 0.0 norm_dict["at_percent_%s" % elem]['smax'] = 1.717 #max Mn atomic percent for nkey in norm_dict.keys(): fnorm = FeatureNormalization(csv_dataframe) scaled_feature = fnorm.minmax_scale_single_feature(nkey, smin=norm_dict[nkey]['smin'], smax=norm_dict[nkey]['smax']) fio = FeatureIO(csv_dataframe) csv_dataframe = fio.add_custom_features(["N(%s)" % nkey],scaled_feature) csv_dataframe.to_csv("%s.csv" % os.path.join(self.save_path, csvdest)) return
def make_data(self): n_samples, n_features = 100, 5 y = self.random_state.randn(n_samples) X = self.random_state.randn(n_samples, n_features) nidx = np.arange(0, n_samples) self.dataframe = pd.DataFrame(index=nidx) num_cat = self.random_state.randint(0, 4, n_samples) cats = ['A', 'B', 'C', 'D'] str_cat = [cats[nc] for nc in num_cat] time = nidx * np.pi / 8.0 sine_feature = np.sin(time) + X[:, 0] #add noise linear_feature = 100 * time + 30.0 + X[:, 1] #add noise y_feature = np.sin(time) + y / 10.0 y_feature_error = X[:, 3] / X[:, 4] / 100.0 #add random error d_cols = dict() d_cols["num_idx"] = nidx d_cols["num_cat"] = num_cat d_cols["str_cat"] = str_cat d_cols["time"] = time d_cols["sine_feature"] = sine_feature d_cols["linear_feature"] = linear_feature d_cols["y_feature"] = y_feature d_cols["y_feature_error"] = y_feature_error cols = list(d_cols.keys()) cols.sort() for col in cols: fio = FeatureIO(self.dataframe) self.dataframe = fio.add_custom_features([col], d_cols[col]) fnorm = FeatureNormalization(self.dataframe) N_sine_feature = fnorm.minmax_scale_single_feature("sine_feature") N_linear_feature = fnorm.minmax_scale_single_feature("linear_feature") fio = FeatureIO(self.dataframe) self.dataframe = fio.add_custom_features(["N_sine_feature"], N_sine_feature) fio = FeatureIO(self.dataframe) self.dataframe = fio.add_custom_features(["N_linear_feature"], N_sine_feature) return
def get_afm_updated_dataset(self, indiv_df, indiv_params): """Update dataframe with additional feature methods """ for afm in indiv_params.keys(): if afm == 'model': #model dealt with separately continue afm_kwargs = dict(indiv_params[afm]) (feature_name, feature_data) = cf_help.get_custom_feature_data( afm, starting_dataframe=indiv_df, addl_feature_method_kwargs=dict(afm_kwargs)) fio = FeatureIO(indiv_df) indiv_df = fio.add_custom_features([afm], feature_data) return indiv_df
def subtraction(self, col1="", col2="", num1="", num2="", **params): """Testing function. col1 <str>: first feature name col2 <str>: second feature name num1 <float>: number to multiply col1 by num2 <float>: number to subtract """ col1_data = self.df[col1] col2_data = self.df[col2] new_data = (col1_data * num1) - col2_data + num2 fio = FeatureIO(self.df) new_df = fio.add_custom_features(["Subtraction_test"],new_data) fnorm = FeatureNormalization(new_df) N_new_data = fnorm.minmax_scale_single_feature("Subtraction_test") return N_new_data
def add_normalization(self, cname, verbose=0): df = self.dfs[cname] norm_dict = dict() norm_dict['log(fluence_n_cm2)'] = dict() norm_dict['log(fluence_n_cm2)']['smin'] = 17 norm_dict['log(fluence_n_cm2)']['smax'] = 25 norm_dict['log(flux_n_cm2_sec)'] = dict() norm_dict['log(flux_n_cm2_sec)']['smin'] = 10 norm_dict['log(flux_n_cm2_sec)']['smax'] = 15 norm_dict['log(eff fl 100p=10)'] = dict() norm_dict['log(eff fl 100p=10)']['smin'] = 17 norm_dict['log(eff fl 100p=10)']['smax'] = 25 norm_dict['log(eff fl 100p=20)'] = dict() norm_dict['log(eff fl 100p=20)']['smin'] = 17 norm_dict['log(eff fl 100p=20)']['smax'] = 25 norm_dict['log(eff fl 100p=26)'] = dict() norm_dict['log(eff fl 100p=26)']['smin'] = 17 norm_dict['log(eff fl 100p=26)']['smax'] = 25 norm_dict['log(eff fl 100p=23)'] = dict() norm_dict['log(eff fl 100p=23)']['smin'] = 17 norm_dict['log(eff fl 100p=23)']['smax'] = 25 norm_dict['log(eff fl 100p=8)'] = dict() norm_dict['log(eff fl 100p=8)']['smin'] = 17 norm_dict['log(eff fl 100p=8)']['smax'] = 25 norm_dict['temperature_C'] = dict() norm_dict['temperature_C']['smin'] = 270 norm_dict['temperature_C']['smax'] = 320 for elem in ["P", "C", "Cu", "Ni", "Mn", "Si"]: norm_dict["at_percent_%s" % elem] = dict() norm_dict["at_percent_%s" % elem]['smin'] = 0.0 norm_dict["at_percent_%s" % elem]['smax'] = 1.717 #max Mn atomic percent for nkey in norm_dict.keys(): fnorm = FeatureNormalization(df) scaled_feature = fnorm.minmax_scale_single_feature( nkey, smin=norm_dict[nkey]['smin'], smax=norm_dict[nkey]['smax']) fio = FeatureIO(df) df = fio.add_custom_features(["N(%s)" % nkey], scaled_feature) self.dfs[cname] = df return
def calculate_EffectiveFluence(self, pvalue=0, ref_flux=3e10, flux_feature="", fluence_feature="", scale_min=1e17, scale_max=1e25, **params): """Calculate effective fluence """ fluence = self.df[fluence_feature] flux = self.df[flux_feature] EFl = fluence * (ref_flux / flux)**pvalue EFl = np.log10(EFl) fio = FeatureIO(self.df) new_df = fio.add_custom_features(["EFl"], EFl) fnorm = FeatureNormalization(new_df) N_EFl = fnorm.minmax_scale_single_feature("EFl", smin=np.log10(scale_min), smax=np.log10(scale_max)) return N_EFl
def add_feature(self, feature_name, feature_data): fio = FeatureIO(self.data) self.data = fio.add_custom_features([feature_name], feature_data) return
def add_prediction_sigma(self, prediction_data_sigma): fio = FeatureIO(self.data) self.data = fio.add_custom_features(["Prediction Sigma"], prediction_data_sigma) self.target_prediction_sigma = self.data["Prediction Sigma"] return
def add_prediction(self, prediction_data): fio = FeatureIO(self.data) self.data = fio.add_custom_features(["Prediction"], prediction_data) self.target_prediction = self.data["Prediction"] return