Python OrdinalEncoder.apply示例

    def process_dataframe(df):
        # Replace attack string with an int
        for i in range(len(attack_cat_values)):
            df['attack_cat'] = df['attack_cat'].replace([attack_cat_values[i]],
                                                        i)

        # Assign x (inputs) and y (outputs) of the network
        y = df['attack_cat']
        x = df.drop(columns='attack_cat')

        # ***** MULTIPLE ENCODER CHOICE *****
        # Encode categorical features as an integer array
        if params['encoder'] == 'ordinalencoder':
            x = OrdinalEncoder().fit_transform(x)
        # Encode labels with value between 0 and n_classes-1.
        elif params['encoder'] == 'labelencoder':
            x = x.apply(LabelEncoder().fit_transform)
        else:
            # Replace String features with ints
            for i in range(len(proto_values)):
                x['proto'] = x['proto'].replace(proto_values[i], i)

            for i in range(len(state_values)):
                x['state'] = x['state'].replace(state_values[i], i)

            for i in range(len(service_values)):
                x['service'] = x['service'].replace(service_values[i], i)
            # Standardize by removing the mean and scaling to unit variance
            if params['encoder'] == "standardscaler":
                x = StandardScaler().fit_transform(x)
            # Transforms features by scaling each feature to range [0, 1]
            elif params['encoder'] == "minmaxscaler01":
                x = MinMaxScaler(feature_range=(0, 1)).fit_transform(x)
            # Transforms features by scaling each feature to range [-1, 1]
            elif params['encoder'] == "minmaxscaler11":
                x = MinMaxScaler(feature_range=(-1, 1)).fit_transform(x)

        return x, y

示例#2

显示文件

文件： kdd_processing.py 项目： sylvainlapeyrade/RNN_Intrusion-Detection_Keras

    def process_dataframe(df):
        # Select 4, 8 or 41 features
        if params['features_nb'] == 4:
            features = four_features
        elif params['features_nb'] == 8:
            features = eight_features
        else:
            features = full_features

        df = df[features]

        # Replace connexion type string with an int (also works with NSL)
        df['label'] = df['label'].replace(['normal.', 'normal'], 0)
        for i in range(len(entry_type['probe'])):
            df['label'] = df['label'].replace(
                [entry_type['probe'][i], entry_type['probe'][i][:-1]], 1)
        for i in range(len(entry_type['dos'])):
            df['label'] = df['label'].replace(
                [entry_type['dos'][i], entry_type['dos'][i][:-1]], 2)
        for i in range(len(entry_type['u2r'])):
            df['label'] = df['label'].replace(
                [entry_type['u2r'][i], entry_type['u2r'][i][:-1]], 3)
        for i in range(len(entry_type['r2l'])):
            df['label'] = df['label'].replace(
                [entry_type['r2l'][i], entry_type['r2l'][i][:-1]], 4)

        # For NSL KDD
        if "difficulty" in df.columns:
            df = df.drop(columns='difficulty')

        # Assign x (inputs) and y (outputs) of the network
        y = df['label']
        x = df.drop(columns='label')

        # ***** MULTIPLE ENCODER CHOICE *****
        # Encode categorical features as an integer array
        if params['encoder'] == 'ordinalencoder':
            x = OrdinalEncoder().fit_transform(x)
        # Encode labels with value between 0 and n_classes-1.
        elif params['encoder'] == 'labelencoder':
            x = x.apply(LabelEncoder().fit_transform)
        else:
            # Replace String features with ints
            if 'service' in features:
                for i in range(len(service_values)):
                    x['service'] = x['service'].replace(service_values[i], i)

            if 'protocol_type' in features:
                for i in range(len(protocol_type_values)):
                    x['protocol_type'] = x['protocol_type'].replace(
                        protocol_type_values[i], i)

            if 'flag' in features:
                for i in range(len(flag_values)):
                    x['flag'] = x['flag'].replace(flag_values[i], i)

            # Standardize by removing the mean and scaling to unit variance
            if params['encoder'] == "standardscaler":
                x = StandardScaler().fit_transform(x)
            # Transforms features by scaling each feature to range [0, 1]
            elif params['encoder'] == "minmaxscaler01":
                x = MinMaxScaler(feature_range=(0, 1)).fit_transform(x)
            # Transforms features by scaling each feature to range [-1, 1]
            elif params['encoder'] == "minmaxscaler11":
                x = MinMaxScaler(feature_range=(-1, 1)).fit_transform(x)
        return x, y