Exemplo n.º 1
0
def main(argv=None):

    print('load data')
    mnist = load_data()
    print('Finished!')
    print('Start training!')
    train(mnist)
Exemplo n.º 2
0
def main(argv=None):

    print('Loading data')
    x = load_data()
    print('Finished!')
    print('Starting training')
    train(x)
Exemplo n.º 3
0
def train(root, res, neighbor, model_save_path, TEST_SPLIT):
    print(f'Training {neighbor} Neighbor CNN model with Resolution {res}')

    X_train, X_test, y_train, y_test = load_data(root, res, TEST_SPLIT,
                                                 neighbor)

    # define model callbacks
    model_save_path = os.path.join(model_save_path, res.strip('.pkl'),
                                   "Neighbor_%s" % neighbor + '.hdf5')
    checkpoint = ModelCheckpoint(model_save_path,
                                 monitor='val_loss',
                                 save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=10, verbose=1)
    model = CNN(neighbor)

    # training
    history = model.fit(X_train,
                        y_train,
                        validation_data=(X_test, y_test),
                        batch_size=256,
                        epochs=500,
                        shuffle=True,
                        callbacks=[checkpoint, earlystopper])

    # save training history
    with open(
            '../history/%s/' % res.strip('.pkl') + 'Neighbor_%s' % neighbor +
            '.pkl', 'wb') as f:
        pickle.dump(history.history, f)
Exemplo n.º 4
0
def main(argv=None):

    print('This is to build a simple RNN.')
    print('Loading data')
    x = load_data()
    print('Finished!')
    print('Starting training')
    train(x)
Exemplo n.º 5
0
def main(argv=None):

    print(
        'This is to pretrain a shallow neural network using autoencoder or restricted boltzmann machine.'
        'There are two options.'
        '1 -> pretrain_with_RBM'
        '2 -> pretrain_with_AE')
    print('Loading data')
    x = load_data()
    print('Finished!')
    print()
    option = 'pretrain_with_RBM'
    print('Starting training')
    train(x, option)
Exemplo n.º 6
0
def main():
    X_train, X_test, y_train, y_test = load_data(path, res, args.test_ratio)
    X_train, X_test, y_train, y_test = Preprocessing_DNN(
        X_train, X_test, y_train, y_test)

    # Training
    if args.action == 'train':
        if args.model_use == 'Linear':
            model = LinearRegression()
            model.fit(X_train, y_train)
            with open(args.model_save_path + 'Linear_model.pik', 'wb') as f:
                pickle.dump(model, f)

        elif args.model_use == 'DNN':
            model = DNN()
            filepath = args.model_save_path + "/weights-improvement-{epoch:03d}-{loss:.3e}.hdf5"
            checkpoint = ModelCheckpoint(filepath,
                                         monitor='val_loss',
                                         save_best_only=False,
                                         period=1)
            history = model.fit(X_train,
                                y_train,
                                validation_data=(X_test, y_test),
                                shuffle=True,
                                callbacks=[checkpoint])

            # save history
            with open(args.history_save_path + args.model_name + '.pkl',
                      'wb') as f:
                pickle.dump(history.history, f)

    # Testing
    elif args.action == 'test':
        if args.model_use == 'Linear':
            with open(args.load_model_path, 'rb') as f:
                model = pickle.load(f)

        elif args.model_use == 'DNN':
            model = load_model(args.load_model_path)

        model.predict(X_test)
Exemplo n.º 7
0
def main(argv=None):
    x = load_data()
    retrain(x)
Exemplo n.º 8
0
from numpy import *
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")
import pandas as pd
import pylab as P

plt.style.use('ggplot')
from Preprocessing import load_data

preproc_data, features_data, rcvcall_data = load_data(
    'train_2011_2012_2013.csv')

#Plot CSPL_RECEIVED_CALLS in function of ASS_ASSIGNMENT

preproc_data1 = preproc_data.groupby(["ASS_ASSIGNMENT"
                                      ])['CSPL_RECEIVED_CALLS'].sum()
plt.figure()
preproc_data1.plot(kind="bar", x="ASS_ASSIGNMENT", y="CSPL_RECEIVED_CALLS")
plt.legend()
plt.show()

#Plot CSPL_RECEIVED_CALLS in function of HOUR

preproc_data2 = preproc_data.groupby(["HOUR"])['CSPL_RECEIVED_CALLS'].sum()
plt.figure()
preproc_data2.plot(kind="bar", x="HOUR", y="CSPL_RECEIVED_CALLS")
plt.legend()
plt.show()
Exemplo n.º 9
0
def main(argv=None):
    x = load_data()
    evaluate(x)
Exemplo n.º 10
0
def main():
    # parse the raw data files first
    normal_file_raw = 'dataset/normalTrafficTraining.txt'
    anomaly_file_raw = 'dataset/anomalousTrafficTest.txt'
    normal_test_raw = 'dataset/normalTrafficTest.txt'

    normal_test_parse = 'dataset/normalRequestTest.txt'
    normal_file_parse = 'dataset/normalRequestTraining.txt'
    anomaly_file_parse = 'dataset/anomalousRequestTest.txt'

    # Parse the files to decode the URLs in the raw HTTP requests and write them in a proper format
    parse_file(normal_file_raw, normal_file_parse)
    parse_file(anomaly_file_raw, anomaly_file_parse)
    parse_file(normal_test_raw, normal_test_parse)

    # Convert each HTTP request into a string and append each of these strings to a list
    X_train = to_string('../input/normalRequestTraining.txt')
    X_test_bad = to_string('../input/anomalousRequestTest.txt')
    X_test_good = to_string('../input/normalRequestTest.txt')

    # Label the good requests and bad requests
    # 0 --> good --> [1. 0.]
    # 1 --> bad -->  [0. 1.]
    y_train = [0] * len(X_train)
    y_bad = [1] * len(X_test_bad)
    y_good = [0] * len(X_test_good)

    # Put all the requests in the X and y lists
    y_unshuffled = y_bad + y_good + y_train
    X_unshuffled = X_test_bad + X_test_good + X_train

    # Shuffle the data
    X_shuffled, y_shuffled = shuffle(X_unshuffled, y_unshuffled)
    # use categorical output
    y_shuffled = to_categorical(y_shuffled)

    # set parameters:
    subset = None

    # Maximum length. Longer gets chopped. Shorter gets padded.
    maxlen = 1000

    # Model params
    # Filters for conv layers
    nb_filter = 64
    # Number of units in the dense layer
    dense_outputs = 64
    # Conv layer kernel size
    filter_kernels = [7, 7]
    # Number of units in the final output layer. Number of classes.
    cat_output = 2

    # Compile/fit params
    batch_size = 128
    nb_epoch = 20

    print('Loading data...')
    # # Expect x to be a list of sentences. Y to be index of the categories.
    (xt, yt), (x_test, y_test) = load_data(X_shuffled, y_shuffled)

    print('Creating vocab...')
    vocab, reverse_vocab, vocab_size, alphabet = create_vocab_set()

    print('Compile model...')
    model = create_model(filter_kernels, dense_outputs, maxlen, vocab_size,
                         nb_filter, cat_output)
    # Encode data
    xt = encode_data(xt, maxlen, vocab)
    x_test = encode_data(x_test, maxlen, vocab)

    print('Chars vocab: {}'.format(alphabet))
    print('Chars vocab size: {}'.format(vocab_size))
    print('X_train.shape: {}'.format(xt.shape))
    model.summary()

    print('Fit model...')
    patience = 5  # this is the number of epochs with no improvment after which the training will stop
    history = fit_model(model, xt, yt, patience, batch_size, nb_epoch)

    print("Testing model...")
    score = test_model(x_test, y_test, batch_size)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    # Graphs and data visualisation
    # Training Accuracy Vs validation Accuracy
    plt.figure(0)
    plt.figsize = (10, 10)
    plt.plot(history.history['acc'], 'r')
    plt.plot(history.history['val_acc'], 'g')
    plt.xticks(np.arange(0, 20, 1.0))
    plt.xlabel("Num of Epochs")
    plt.ylabel("Accuracy")
    plt.title("Training Accuracy Vs validation Accuracy")
    plt.legend(['train', 'validation'])

    # Training Loss Vs Validation Loss
    plt.figure(0)
    plt.figsize = (10, 10)
    plt.plot(history.history['loss'], 'r')
    plt.plot(history.history['val_loss'], 'g')
    plt.xticks(np.arange(0, 20, 1.0))
    plt.yticks(np.arange(0, 0.5, 0.1))
    plt.xlabel("Num of Epochs")
    plt.ylabel("Loss")
    plt.title("Training Loss Vs validation Loss")
    plt.legend(['train', 'validation'])

    # Classification Matrix
    y_pred = model.predict(x_test)
    y_pred1 = (y_pred > 0.5)
    matrix = confusion_matrix(y_test.argmax(axis=1), y_pred1.argmax(axis=1))
    print(matrix)
    plt.matshow(matrix, cmap=plt.cm.gray)
    plt.show()

    row_sum = matrix.sum(axis=1, keepdims=True)
    norm_conf = matrix / row_sum
    print(norm_conf)
    plt.matshow(norm_conf, cmap=plt.cm.gray)
    plt.show()