def OneHotOneColumn(self, data_container, feature_list): info = data_container.GetFrame() feature_name = data_container.GetFeatureName() for feature in feature_list: assert (feature in feature_name) new_info = pd.get_dummies(info, columns=feature_list) new_data = DataContainer() new_data.SetFrame(new_info) return new_data
def _MergeClinical(self, dc, cli_df): # Merge DataContainer and a dataframe of clinical if 'label' in cli_df.columns.tolist(): del cli_df['label'] elif 'Label' in cli_df.columns.tolist(): del cli_df['Label'] df = pd.merge(dc.GetFrame(), cli_df, how='left', left_index=True, right_index=True) merge_dc = DataContainer() merge_dc.SetFrame(df) merge_dc.UpdateFrameByData() return merge_dc
def Run(self, data_container, store_folder='', store_key=''): temp_frame = data_container.GetFrame().select_dtypes(include=None, exclude=['object']) new_data_container = DataContainer() new_data_container.SetFrame(temp_frame) if store_folder and os.path.isdir(store_folder): feature_store_path = os.path.join(store_folder, 'numeric_feature.csv') featureinfo_store_path = os.path.join(store_folder, 'feature_select_info.csv') new_data_container.Save(feature_store_path) SaveSelectInfo(new_data_container.GetFeatureName(), featureinfo_store_path, is_merge=False) return new_data_container