def create_random_sets():
    print("Generating TRAINING set...")
    training_set = onehot.generate_set(
        set_size, malware_ratio)  # generate random training set
    print("Generating TRAINING input...")
    data, labels = onehot.generate_input(
        training_set, total_features)  # perform one-hot encoding
    print("Generating TESTING set...")
    testing_set = onehot.generate_set(
        testing_set_size, malware_ratio)  # generate random testing set
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return data, labels, test_data, test_labels  # return train data - labels and test data - labels
def create_sets():

    if os.path.isfile("training_set_8500.txt") is False:
        set_size = 8500
        malware_ratio = 0.3
        print("Creating data-labels...")
        print("Generating TESTING set...")
        training_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random testing set
        with open("training_set_1500.txt", "w") as file:
            for item in training_set:
                file.write(str(item) + "\n")

    if os.path.isfile("testing_set_8500.txt") is False:
        set_size = 8500
        malware_ratio = 0.3
        print("Creating data-labels...")
        print("Generating TESTING set...")
        testing_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random testing set
        with open("testing_set_1500.txt", "w") as file:
            for item in testing_set:
                file.write(str(item) + "\n")

    training_set = []
    testing_set = []

    with open(
            "training_set_8500.txt", "r"
    ) as file:  # read training set file and append applications to list
        for line in file:
            line.strip()  # remove whitespace
            line = line[:-1]  # remove \n
            training_set.append(line)  # add item to list
    with open(
            "testing_set_8500.txt", "r"
    ) as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)
    print("Generating TRAINING input...")
    data, labels = onehot.generate_input(
        training_set, total_features)  # perform one-hot encoding
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return data, labels, test_data, test_labels
示例#3
0
def create_random_sets(set_size=1500, malware_ratio=0.3):
    print("Generating set...")
    testing_set = onehot.generate_set(set_size, malware_ratio)  # generate random set
    print("Generating input...")
    # shuffle the set randomly and perform one-hot encoding
    test_data, test_labels = onehot.generate_input(testing_set, total_features)
    return test_data, test_labels
def create_sets():
    training_set = []  # the list of training set
    testing_set = []  # the list of testing set

    with open("training_set_1500.txt", "r") as file:  # read training set file and append applications to list
        for line in file:
            line.strip()  # remove whitespace
            line = line[:-1]  # remove \n
            training_set.append(line)  # add item to list
    with open("testing_set_1500.txt", "r") as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)
    print("Generating TRAINING input...")
    data, labels = onehot.generate_input(training_set, total_features)  # perform one-hot encoding
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(testing_set, total_features)  # perform one-hot encoding
    return data, labels, test_data, test_labels
示例#5
0
def create_test_set():
    testing_set = []  # the list of testing set

    with open(
            "testing_set_1500.txt", "r"
    ) as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)

    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return test_data, test_labels
def create_set():
    if os.path.isfile("testing_set_200.txt") is False:
        set_size = 200
        malware_ratio = 0.5
        print("Creating data-labels...")
        print("Generating TESTING set...")
        testing_set = onehot.generate_set(
            set_size, malware_ratio)  # generate random testing set
        with open("testing_set_200.txt", "w") as file:
            for item in testing_set:
                file.write(str(item) + "\n")
    testing_set = []  # the list of testing set
    with open(
            "testing_set_200.txt", "r"
    ) as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    return test_data, test_labels
示例#7
0
            "training_set_2000.txt", "r"
    ) as file:  # read training set file and append applications to list
        for line in file:
            line.strip()  # remove whitespace
            line = line[:-1]  # remove \n
            training_set.append(line)  # add item to list
    with open(
            "testing_set_2000.txt", "r"
    ) as file:  # read testing set file and append applications to list
        for line in file:
            line.strip()
            line = line[:-1]
            testing_set.append(line)

    print("Generating TRAINING input...")
    data, labels = onehot.generate_input(
        training_set, total_features)  # perform one-hot encoding
    print("Generating TESTING input...")
    test_data, test_labels = onehot.generate_input(
        testing_set, total_features)  # perform one-hot encoding
    tune_neural_network()
"""
# use above code for grid search if you have enough RAM, modifying tune_batch_epochs() method and comment everything above
import set_onehot_encoding as onehot
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.models import Sequential
from keras.layers import Dense, Dropout
import os
total_features = 545333  # total unique features
set_size = 2000  # set site that will be used to create random training and testing set
malware_ratio = 0.3  # malware ratio in the set size