示例#1
0
def process_unsw(root='/home/naruto/NetLearner'):
    unsw.generate_dataset(False)
    raw_X_train = np.load('%s/UNSW/train_dataset.npy' % root)
    y_train = np.load('%s/UNSW/train_labels.npy' % root)
    raw_X_test = np.load('%s/UNSW/test_dataset.npy' % root)
    y_test = np.load('%s/UNSW/test_labels.npy' % root)
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
def process_unsw(root='SharedAutoEncoder/'):
    unsw.generate_dataset(one_hot_encode=True, root_dir=root)
    raw_X_train = np.load(root + 'UNSW/train_dataset.npy')
    y_train = np.load(root + 'UNSW/train_labels.npy')
    raw_X_test = np.load(root + 'UNSW/test_dataset.npy')
    y_test = np.load(root + 'UNSW/test_labels.npy')
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
示例#3
0
def process_unsw():
    unsw.generate_dataset(True)
    raw_X_train = np.load('UNSW/train_dataset.npy')
    y_train = np.load('UNSW/train_labels.npy')
    raw_X_test = np.load('UNSW/test_dataset.npy')
    y_test = np.load('UNSW/test_labels.npy')
    [X_train, _, X_test] = min_max_scale(raw_X_train, None, raw_X_test)
    permutate_dataset(X_train, y_train)
    permutate_dataset(X_test, y_test)

    print('Training set', X_train.shape, y_train.shape)
    print('Test set', X_test.shape, y_test.shape)
    return {'X': X_train, 'y': y_train, 'X_test': X_test, 'y_test': y_test}
示例#4
0
def load_unsw_dataset():
    unsw.generate_dataset(False, True, model_dir)
    raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
    train_labels = np.load(data_dir + 'train_labels.npy')
    raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
    test_labels = np.load(data_dir + 'test_labels.npy')
    # train_dataset, valid_dataset, test_dataset = min_max_normalize(
    #    raw_train_dataset, raw_valid_dataset, raw_test_dataset)
    # print('Min-Max normalizing dataset')
    train_dataset, _, test_dataset = standard_scale(raw_train_dataset,
                                                    None, raw_test_dataset)
    print('Mean normalizing dataset')
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)
    return (train_dataset, train_labels, test_dataset, test_labels)
示例#5
0
from __future__ import print_function
import numpy as np
import tensorflow as tf
from preprocess.unsw import generate_dataset
from netlearner.utils import hyperparameter_summary
from netlearner.utils import augment_quantiled, permutate_dataset
from netlearner.multilayer_perceptron import MultilayerPerceptron

generate_dataset(True)
raw_train_dataset = np.load('UNSW/train_dataset.npy')
train_labels = np.load('UNSW/train_labels.npy')
raw_valid_dataset = np.load('UNSW/valid_dataset.npy')
valid_labels = np.load('UNSW/valid_labels.npy')
raw_test_dataset = np.load('UNSW/test_dataset.npy')
test_labels = np.load('UNSW/test_labels.npy')

columns = np.array(range(1, 6) + range(8, 16) + range(17, 19) +
                   range(23, 25) + [26])
[train_dataset, valid_dataset, test_dataset] = augment_quantiled(
    raw_train_dataset, raw_valid_dataset, raw_test_dataset, columns)
permutate_dataset(train_dataset, train_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

num_samples, feature_size = train_dataset.shape
num_labels = train_labels.shape[1]
batch_size = 80
keep_prob = 0.80
beta = 0.00008
weights = [1.0, 1.0]
示例#6
0
    0: 1.0,
    1: 8.0,
    2: 3.0,
    3: 3.0,
    4: 8.0,
    5: 3.0,
    6: 8.0,
    7: 16.0,
    8: 16.0,
    9: 3.0
}
weights = None
hidden_size = [800, 480]
fold = 5

unsw.generate_dataset(True, True, model_dir)
# raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
# raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
# X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset)
raw_X = np.load(data_dir + 'train_dataset.npy')
raw_X_test = np.load(data_dir + 'test_dataset.npy')
y = np.load(data_dir + 'train_labels.npy')
y_test = np.load(data_dir + 'test_labels.npy')
y_flatten = np.argmax(y, axis=1)
X, _, X_test = min_max_scale(raw_X, None, raw_X_test)
print('Train dataset', X.shape, y.shape, y_flatten.shape)
print('Test dataset', X_test.shape, y_test.shape)

feature_size = X.shape[1]
num_samples, num_classes = y.shape
skf = StratifiedKFold(n_splits=fold)
示例#7
0
    il = Input(shape=(feature_size, ), name='input')
    h1 = Dense(encoder_size, activation='relu', name='h1')(il)
    h1 = Dropout(0.8)(h1)
    h2 = Dense(480, activation='sigmoid', name='h2')(h1)
    sm = Dense(num_classes, activation='softmax', name='output')(h2)
    mlp = Model(inputs=il, outputs=sm, name='sae_mlp')
    mlp.compile(optimizer='adam', loss='categorical_crossentropy',
                metrics=['accuracy'])
    mlp.summary()
    mlp.get_layer('h1').set_weights(init_weights)
    mlp.save(pretrained_mlp_path)


os.environ['CUDA_VISIBLE_DEVICES'] = '1'
model_dir = 'SparseAE/'
generate_dataset(True, True, model_dir)
data_dir = model_dir + 'UNSW/'
pretrained_mlp_path = data_dir + 'sae_mlp.h5'

raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
y = np.load(data_dir + 'train_labels.npy')
y_test = np.load(data_dir + 'test_labels.npy')
X, _, X_test = min_max_scale(raw_train_dataset, None, raw_test_dataset)
X, y = permutate_dataset(X, y)
print('Training set', X.shape, y.shape)
print('Test set', X_test.shape)
num_samples, num_classes = y.shape
feature_size = X.shape[1]
encoder_size = 800
num_epoch = 160
    model = Sequential()
    model.add(Embedding(vocabulary_dim, embedding_dim, input_length=1))
    model.add(Flatten())
    model.compile('rmsprop', 'mse')
    e_train = model.predict(X_train)
    e_valid = model.predict(X_valid)
    e_test = model.predict(X_test)
    print(e_train.shape)
    # print(np.amax(e_train, axis=0), np.amin(e_train, axis=0))
    print(e_test.shape)
    # print(np.amax(e_test, axis=0), np.amin(e_test, axis=0))
    return e_train, e_valid, e_test


generate_dataset(one_hot_encode=False)
raw_train = np.load('UNSW/train_dataset.npy')
y_train = np.load('UNSW/train_labels.npy')
raw_valid = np.load('UNSW/valid_dataset.npy')
y_valid = np.load('UNSW/valid_labels.npy')
raw_test = np.load('UNSW/test_dataset.npy')
y_test = np.load('UNSW/test_labels.npy')

train_cont = raw_train[:, :-3]
valid_cont = raw_valid[:, :-3]
test_cont = raw_test[:, :-3]
train_disc = raw_train[:, -3:]
valid_disc = raw_valid[:, -3:]
test_disc = raw_test[:, -3:]

print("Continuous dataset", train_cont.shape)
示例#9
0
    h1 = Dense(num_hidden_rbm, activation='sigmoid', name='h1')(input_layer)
    h1 = Dropout(0.8)(h1)
    h2 = Dense(480, activation='sigmoid', name='h2')(h1)
    sm = Dense(num_classes, activation='softmax', name='output')(h2)
    mlp = Model(inputs=input_layer, outputs=sm, name='rbm_mlp')
    mlp.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy'])
    mlp.summary()
    mlp.get_layer('h1').set_weights([rbm_w, rbm_b])
    mlp.save(pretrained_mlp_path)


os.environ['CUDA_VISIBLE_DEVICES'] = '1'
model_dir = 'RBM/'
generate_dataset(binary_label=True, one_hot_encoding=True, root_dir=model_dir)
data_dir = model_dir + 'UNSW/'
pretrained_mlp_path = data_dir + 'rbm_mlp.h5'

raw_train_dataset = np.load(data_dir + 'train_dataset.npy')
raw_valid_dataset = np.load(data_dir + 'valid_dataset.npy')
y = np.load(data_dir + 'train_labels.npy')
raw_test_dataset = np.load(data_dir + 'test_dataset.npy')
y_test = np.load(data_dir + 'test_labels.npy')
[X, X_valid, X_test] = min_max_scale(raw_train_dataset, raw_valid_dataset,
                                     raw_test_dataset)
X, y = permutate_dataset(X, y)
print('Training set', X.shape, y.shape)
print('Test set', X_test.shape)
(num_samples, num_classes) = y.shape
feature_size = X.shape[1]