def process_dataframe(df): # Replace attack string with an int for i in range(len(attack_cat_values)): df['attack_cat'] = df['attack_cat'].replace([attack_cat_values[i]], i) # Assign x (inputs) and y (outputs) of the network y = df['attack_cat'] x = df.drop(columns='attack_cat') # ***** MULTIPLE ENCODER CHOICE ***** # Encode categorical features as an integer array if params['encoder'] == 'ordinalencoder': x = OrdinalEncoder().fit_transform(x) # Encode labels with value between 0 and n_classes-1. elif params['encoder'] == 'labelencoder': x = x.apply(LabelEncoder().fit_transform) else: # Replace String features with ints for i in range(len(proto_values)): x['proto'] = x['proto'].replace(proto_values[i], i) for i in range(len(state_values)): x['state'] = x['state'].replace(state_values[i], i) for i in range(len(service_values)): x['service'] = x['service'].replace(service_values[i], i) # Standardize by removing the mean and scaling to unit variance if params['encoder'] == "standardscaler": x = StandardScaler().fit_transform(x) # Transforms features by scaling each feature to range [0, 1] elif params['encoder'] == "minmaxscaler01": x = MinMaxScaler(feature_range=(0, 1)).fit_transform(x) # Transforms features by scaling each feature to range [-1, 1] elif params['encoder'] == "minmaxscaler11": x = MinMaxScaler(feature_range=(-1, 1)).fit_transform(x) return x, y
def process_dataframe(df): # Select 4, 8 or 41 features if params['features_nb'] == 4: features = four_features elif params['features_nb'] == 8: features = eight_features else: features = full_features df = df[features] # Replace connexion type string with an int (also works with NSL) df['label'] = df['label'].replace(['normal.', 'normal'], 0) for i in range(len(entry_type['probe'])): df['label'] = df['label'].replace( [entry_type['probe'][i], entry_type['probe'][i][:-1]], 1) for i in range(len(entry_type['dos'])): df['label'] = df['label'].replace( [entry_type['dos'][i], entry_type['dos'][i][:-1]], 2) for i in range(len(entry_type['u2r'])): df['label'] = df['label'].replace( [entry_type['u2r'][i], entry_type['u2r'][i][:-1]], 3) for i in range(len(entry_type['r2l'])): df['label'] = df['label'].replace( [entry_type['r2l'][i], entry_type['r2l'][i][:-1]], 4) # For NSL KDD if "difficulty" in df.columns: df = df.drop(columns='difficulty') # Assign x (inputs) and y (outputs) of the network y = df['label'] x = df.drop(columns='label') # ***** MULTIPLE ENCODER CHOICE ***** # Encode categorical features as an integer array if params['encoder'] == 'ordinalencoder': x = OrdinalEncoder().fit_transform(x) # Encode labels with value between 0 and n_classes-1. elif params['encoder'] == 'labelencoder': x = x.apply(LabelEncoder().fit_transform) else: # Replace String features with ints if 'service' in features: for i in range(len(service_values)): x['service'] = x['service'].replace(service_values[i], i) if 'protocol_type' in features: for i in range(len(protocol_type_values)): x['protocol_type'] = x['protocol_type'].replace( protocol_type_values[i], i) if 'flag' in features: for i in range(len(flag_values)): x['flag'] = x['flag'].replace(flag_values[i], i) # Standardize by removing the mean and scaling to unit variance if params['encoder'] == "standardscaler": x = StandardScaler().fit_transform(x) # Transforms features by scaling each feature to range [0, 1] elif params['encoder'] == "minmaxscaler01": x = MinMaxScaler(feature_range=(0, 1)).fit_transform(x) # Transforms features by scaling each feature to range [-1, 1] elif params['encoder'] == "minmaxscaler11": x = MinMaxScaler(feature_range=(-1, 1)).fit_transform(x) return x, y