Python FeatureIO.add_custom_features示例，FeatureOperations.FeatureIO.add_custom_features Python示例

示例#1

0

显示文件

文件： DBTTData.py 项目： robertmaxwilliams/MAST-ML

 def csv_add_features(self, csvsrc, csvdest):
     afm_dict=dict()
     param_dict=dict()
     #E900 column
     e900_dict = dict()
     for elem in ['P','Ni','Cu','Mn']: #Si, C not used in e900
         e900_dict['wt%s' % elem] = 'wt_percent_%s' % elem
     e900_dict['fluencestr'] = 'fluence_n_cm2'
     e900_dict['tempC'] = 'temperature_C'
     e900_dict['prod_ID'] = 'product_id'
     afm_dict['DBTT.E900'] = dict(e900_dict)
     param_dict['DBTT.E900'] = dict()
     #get_dataframe
     csv_dataparser = DataParser()
     csv_dataframe = csv_dataparser.import_data("%s.csv" % os.path.join(self.save_path, csvsrc))
     #add features
     for afm in afm_dict.keys():
         (feature_name, feature_data) = cf_help.get_custom_feature_data(class_method_str = afm,
             starting_dataframe = csv_dataframe,
             param_dict = dict(param_dict[afm]),
             addl_feature_method_kwargs = dict(afm_dict[afm]))
         fio = FeatureIO(csv_dataframe)
         csv_dataframe = fio.add_custom_features([feature_name],feature_data)
     #add log10 features
     log10_dict=dict()
     log10_dict['fluence_n_cm2'] = dict()
     log10_dict['flux_n_cm2_sec'] = dict()
     for lkey in log10_dict.keys():
         orig_data = csv_dataframe[lkey]
         log10_data = np.log10(orig_data)
         fio = FeatureIO(csv_dataframe)
         csv_dataframe = fio.add_custom_features(["log(%s)" % lkey], log10_data)
     #add normalizations
     norm_dict = dict()
     norm_dict['log(fluence_n_cm2)']=dict()
     norm_dict['log(fluence_n_cm2)']['smin'] = 17
     norm_dict['log(fluence_n_cm2)']['smax'] = 25
     norm_dict['log(flux_n_cm2_sec)']=dict()
     norm_dict['log(flux_n_cm2_sec)']['smin'] = 10
     norm_dict['log(flux_n_cm2_sec)']['smax'] = 15
     norm_dict['temperature_C']=dict()
     norm_dict['temperature_C']['smin'] = 270
     norm_dict['temperature_C']['smax'] = 320
     for elem in ["P","C","Cu","Ni","Mn","Si"]:
         norm_dict["at_percent_%s" % elem] = dict()
         norm_dict["at_percent_%s" % elem]['smin'] = 0.0
         norm_dict["at_percent_%s" % elem]['smax'] = 1.717 #max Mn atomic percent
     for nkey in norm_dict.keys():
         fnorm = FeatureNormalization(csv_dataframe)
         scaled_feature = fnorm.minmax_scale_single_feature(nkey,
                             smin=norm_dict[nkey]['smin'], 
                             smax=norm_dict[nkey]['smax'])
         fio = FeatureIO(csv_dataframe)
         csv_dataframe = fio.add_custom_features(["N(%s)" % nkey],scaled_feature)
     csv_dataframe.to_csv("%s.csv" % os.path.join(self.save_path, csvdest))
     return

示例#2

0

显示文件

    def make_data(self):
        n_samples, n_features = 100, 5
        y = self.random_state.randn(n_samples)
        X = self.random_state.randn(n_samples, n_features)

        nidx = np.arange(0, n_samples)
        self.dataframe = pd.DataFrame(index=nidx)
        num_cat = self.random_state.randint(0, 4, n_samples)
        cats = ['A', 'B', 'C', 'D']
        str_cat = [cats[nc] for nc in num_cat]
        time = nidx * np.pi / 8.0
        sine_feature = np.sin(time) + X[:, 0]  #add noise
        linear_feature = 100 * time + 30.0 + X[:, 1]  #add noise
        y_feature = np.sin(time) + y / 10.0
        y_feature_error = X[:, 3] / X[:, 4] / 100.0  #add random error
        d_cols = dict()
        d_cols["num_idx"] = nidx
        d_cols["num_cat"] = num_cat
        d_cols["str_cat"] = str_cat
        d_cols["time"] = time
        d_cols["sine_feature"] = sine_feature
        d_cols["linear_feature"] = linear_feature
        d_cols["y_feature"] = y_feature
        d_cols["y_feature_error"] = y_feature_error
        cols = list(d_cols.keys())
        cols.sort()
        for col in cols:
            fio = FeatureIO(self.dataframe)
            self.dataframe = fio.add_custom_features([col], d_cols[col])
        fnorm = FeatureNormalization(self.dataframe)
        N_sine_feature = fnorm.minmax_scale_single_feature("sine_feature")
        N_linear_feature = fnorm.minmax_scale_single_feature("linear_feature")
        fio = FeatureIO(self.dataframe)
        self.dataframe = fio.add_custom_features(["N_sine_feature"],
                                                 N_sine_feature)
        fio = FeatureIO(self.dataframe)
        self.dataframe = fio.add_custom_features(["N_linear_feature"],
                                                 N_sine_feature)
        return

示例#3

0

显示文件

 def get_afm_updated_dataset(self, indiv_df, indiv_params):
     """Update dataframe with additional feature methods
     """
     for afm in indiv_params.keys():
         if afm == 'model':  #model dealt with separately
             continue
         afm_kwargs = dict(indiv_params[afm])
         (feature_name, feature_data) = cf_help.get_custom_feature_data(
             afm,
             starting_dataframe=indiv_df,
             addl_feature_method_kwargs=dict(afm_kwargs))
         fio = FeatureIO(indiv_df)
         indiv_df = fio.add_custom_features([afm], feature_data)
     return indiv_df

示例#4

0

显示文件

文件： Testing.py 项目： robertmaxwilliams/MAST-ML

 def subtraction(self, col1="", col2="", num1="", num2="", **params):
     """Testing function.
         col1 <str>: first feature name
         col2 <str>: second feature name
         num1 <float>: number to multiply col1 by
         num2 <float>: number to subtract
     """
     col1_data = self.df[col1]
     col2_data = self.df[col2]
     new_data = (col1_data * num1) - col2_data + num2
     fio = FeatureIO(self.df)
     new_df = fio.add_custom_features(["Subtraction_test"],new_data)
     fnorm = FeatureNormalization(new_df)
     N_new_data = fnorm.minmax_scale_single_feature("Subtraction_test")
     return N_new_data

示例#5

0

显示文件

文件： DBTTDataCSVSetup.py 项目： robertmaxwilliams/MAST-ML

 def add_normalization(self, cname, verbose=0):
     df = self.dfs[cname]
     norm_dict = dict()
     norm_dict['log(fluence_n_cm2)'] = dict()
     norm_dict['log(fluence_n_cm2)']['smin'] = 17
     norm_dict['log(fluence_n_cm2)']['smax'] = 25
     norm_dict['log(flux_n_cm2_sec)'] = dict()
     norm_dict['log(flux_n_cm2_sec)']['smin'] = 10
     norm_dict['log(flux_n_cm2_sec)']['smax'] = 15
     norm_dict['log(eff fl 100p=10)'] = dict()
     norm_dict['log(eff fl 100p=10)']['smin'] = 17
     norm_dict['log(eff fl 100p=10)']['smax'] = 25
     norm_dict['log(eff fl 100p=20)'] = dict()
     norm_dict['log(eff fl 100p=20)']['smin'] = 17
     norm_dict['log(eff fl 100p=20)']['smax'] = 25
     norm_dict['log(eff fl 100p=26)'] = dict()
     norm_dict['log(eff fl 100p=26)']['smin'] = 17
     norm_dict['log(eff fl 100p=26)']['smax'] = 25
     norm_dict['log(eff fl 100p=23)'] = dict()
     norm_dict['log(eff fl 100p=23)']['smin'] = 17
     norm_dict['log(eff fl 100p=23)']['smax'] = 25
     norm_dict['log(eff fl 100p=8)'] = dict()
     norm_dict['log(eff fl 100p=8)']['smin'] = 17
     norm_dict['log(eff fl 100p=8)']['smax'] = 25
     norm_dict['temperature_C'] = dict()
     norm_dict['temperature_C']['smin'] = 270
     norm_dict['temperature_C']['smax'] = 320
     for elem in ["P", "C", "Cu", "Ni", "Mn", "Si"]:
         norm_dict["at_percent_%s" % elem] = dict()
         norm_dict["at_percent_%s" % elem]['smin'] = 0.0
         norm_dict["at_percent_%s" %
                   elem]['smax'] = 1.717  #max Mn atomic percent
     for nkey in norm_dict.keys():
         fnorm = FeatureNormalization(df)
         scaled_feature = fnorm.minmax_scale_single_feature(
             nkey,
             smin=norm_dict[nkey]['smin'],
             smax=norm_dict[nkey]['smax'])
         fio = FeatureIO(df)
         df = fio.add_custom_features(["N(%s)" % nkey], scaled_feature)
     self.dfs[cname] = df
     return

示例#6

0

显示文件

    def calculate_EffectiveFluence(self,
                                   pvalue=0,
                                   ref_flux=3e10,
                                   flux_feature="",
                                   fluence_feature="",
                                   scale_min=1e17,
                                   scale_max=1e25,
                                   **params):
        """Calculate effective fluence
        """
        fluence = self.df[fluence_feature]
        flux = self.df[flux_feature]

        EFl = fluence * (ref_flux / flux)**pvalue
        EFl = np.log10(EFl)
        fio = FeatureIO(self.df)
        new_df = fio.add_custom_features(["EFl"], EFl)
        fnorm = FeatureNormalization(new_df)
        N_EFl = fnorm.minmax_scale_single_feature("EFl",
                                                  smin=np.log10(scale_min),
                                                  smax=np.log10(scale_max))

        return N_EFl

示例#7

0

显示文件

文件： DataHandler.py 项目： robertmaxwilliams/MAST-ML

 def add_feature(self, feature_name, feature_data):
     fio = FeatureIO(self.data)
     self.data = fio.add_custom_features([feature_name], feature_data)
     return

示例#8

0

显示文件

文件： DataHandler.py 项目： robertmaxwilliams/MAST-ML

 def add_prediction_sigma(self, prediction_data_sigma):
     fio = FeatureIO(self.data)
     self.data = fio.add_custom_features(["Prediction Sigma"],
                                         prediction_data_sigma)
     self.target_prediction_sigma = self.data["Prediction Sigma"]
     return

示例#9

0

显示文件

文件： DataHandler.py 项目： robertmaxwilliams/MAST-ML

 def add_prediction(self, prediction_data):
     fio = FeatureIO(self.data)
     self.data = fio.add_custom_features(["Prediction"], prediction_data)
     self.target_prediction = self.data["Prediction"]
     return