def get_preprocessed_training_data(datasize=None,
                                   regenerate=False,
                                   withfull=False):
    df = None
    workpath = os.path.dirname(os.path.abspath(__file__))

    if regenerate == False:
        with open(workpath + '/./df_training_20.pkl', 'rb') as input:
            df_training_20 = pickle.load(input)
        with open(workpath + '/./df_training_full.pkl', 'rb') as input:
            df_training_full = pickle.load(input)
        with open(workpath + '/./gmms_training_20.pkl', 'rb') as input:
            gmms_20 = pickle.load(input)
        with open(workpath + '/./gmms_training_full.pkl', 'rb') as input:
            gmms_full = pickle.load(input)
        return df_training_20, df_training_full, gmms_20, gmms_full

    else:
        workpath = os.path.dirname(os.path.abspath(__file__))
        datafile_20 = workpath + '/data/KDDTrain+_20Percent.txt'
        datafile_full = workpath + '/data/KDDTrain+.txt'

        headers, attacks = get_header_data()

        df_training_full = None
        gmms_training_full = None

        print "preprocessing training data for 20 percent..."
        df = model.load_dataframe(datafile_20, headers, datasize=datasize)
        print "descretization..."
        df_training_20 = discretize_elems(df, attacks)
        print "gmm fitting..."
        gmms_training_20 = construct_gmms(df_training_20, headers)

        if withfull == True:
            print "preprocessing training data total..."
            df = model.load_dataframe(datafile_full,
                                      headers,
                                      datasize=datasize)
            print "descretization..."
            df_training_full = discretize_elems(df, attacks)
            print "gmm fitting..."
            gmms_training_full = construct_gmms(df_training_full, headers)
        else:
            print "without full data"

        print "save to file..."
        with open(workpath + '/./df_training_20.pkl', 'wb') as output:
            pickle.dump(df_training_20, output, -1)
        with open(workpath + '/./gmms_training_20.pkl', 'wb') as output:
            pickle.dump(gmms_training_20, output, -1)
        if withfull == True:
            with open(workpath + '/./df_training_full.pkl', 'wb') as output:
                pickle.dump(df_training_full, output, -1)
            with open(workpath + '/./gmms_training_full.pkl', 'wb') as output:
                pickle.dump(gmms_training_full, output, -1)

        return df_training_20, df_training_full, gmms_training_20, gmms_training_full
示例#2
0
def get_preprocessed_training_data(datasize=None, regenerate=False, withfull=False):
    df = None
    workpath = os.path.dirname(os.path.abspath(__file__))

    if regenerate == False:
        with open(workpath+'/./df_training_20.pkl','rb') as input:
            df_training_20 = pickle.load(input)
        with open(workpath+'/./df_training_full.pkl','rb') as input:
            df_training_full = pickle.load(input)
        with open(workpath+'/./gmms_training_20.pkl','rb') as input:
            gmms_20 = pickle.load(input)
        with open(workpath+'/./gmms_training_full.pkl','rb') as input:
            gmms_full = pickle.load(input)
        return df_training_20, df_training_full, gmms_20, gmms_full

    else : 
        workpath = os.path.dirname(os.path.abspath(__file__))
        datafile_20 = workpath + '/data/KDDTrain+_20Percent.txt'
        datafile_full = workpath + '/data/KDDTrain+.txt'

        headers, attacks = get_header_data()

        df_training_full = None
        gmms_training_full = None

        print "preprocessing training data for 20 percent..."
        df = model.load_dataframe(datafile_20,headers,datasize=datasize)
        print "descretization..."
        df_training_20 = discretize_elems(df, attacks)
        print "gmm fitting..."
        gmms_training_20 = construct_gmms(df_training_20, headers)

        if withfull == True : 
            print "preprocessing training data total..."
            df = model.load_dataframe(datafile_full,headers,datasize=datasize)
            print "descretization..."
            df_training_full = discretize_elems(df, attacks)
            print "gmm fitting..."
            gmms_training_full = construct_gmms(df_training_full, headers)
        else :
            print "without full data"

        print "save to file..."
        with open(workpath + '/./df_training_20.pkl','wb') as output:
            pickle.dump(df_training_20, output,-1)
        with open(workpath + '/./gmms_training_20.pkl','wb') as output:
            pickle.dump(gmms_training_20, output,-1)
        if withfull == True :
            with open(workpath + '/./df_training_full.pkl','wb') as output:
                pickle.dump(df_training_full, output,-1)
            with open(workpath + '/./gmms_training_full.pkl','wb') as output:
                pickle.dump(gmms_training_full, output,-1)

        return df_training_20, df_training_full, gmms_training_20, gmms_training_full
示例#3
0
def get_preprocessed_test_data(datasize=None, regenerate=False):
    df = None
    workpath = os.path.dirname(os.path.abspath(__file__))

    if regenerate == False:
        with open(workpath+'/./df_test_plus.pkl','rb') as input:
            df_test_plus = pickle.load(input)
        with open(workpath+'/./df_test_21.pkl','rb') as input:
            df_test_21 = pickle.load(input)
        with open(workpath + '/./gmms_test_plus.pkl','rb') as input: 
            gmm_test_plus = pickle.load(input)
        with open(workpath + '/./gmms_test_21.pkl','rb') as input: 
            gmm_test_21 = pickle.load(input)
        return df_test_plus, df_test_21, gmm_test_plus, gmm_test_21
    else : 
        workpath = os.path.dirname(os.path.abspath(__file__))
        datafile_plus = workpath + '/data/KDDTest+.txt'
        datafile_21 = workpath + '/data/KDDTest-21.txt'

        headers, attacks = get_header_data()

        print "preprocessing testing data plus..."
        df = model.load_dataframe(datafile_plus,headers,datasize=datasize)
        df_test_plus = discretize_elems(df, attacks)
        gmms_test_plus = construct_gmms(df_test_plus, headers)

        print "preprocessing testing data 21..."
        df = model.load_dataframe(datafile_21,headers,datasize=datasize)
        df_test_21 = discretize_elems(df, attacks)
        gmms_test_21 = construct_gmms(df_test_21, headers)

        print "save to file..."
        with open(workpath + '/./df_test_plus.pkl','wb') as output:
            pickle.dump(df_test_plus, output, -1)
        with open(workpath + '/./df_test_21.pkl','wb') as output:
            pickle.dump(df_test_21, output, -1)
        with open(workpath + '/./gmms_test_plus.pkl','wb') as output:
            pickle.dump(gmms_test_plus, output,-1)
        with open(workpath + '/./gmms_test_21.pkl','wb') as output:
            pickle.dump(gmms_test_21, output,-1)

        return df_test_plus, df_test_21, gmms_test_plus, gmms_test_21
def get_preprocessed_test_data(datasize=None, regenerate=False):
    df = None
    workpath = os.path.dirname(os.path.abspath(__file__))

    if regenerate == False:
        with open(workpath + '/./df_test_plus.pkl', 'rb') as input:
            df_test_plus = pickle.load(input)
        with open(workpath + '/./df_test_21.pkl', 'rb') as input:
            df_test_21 = pickle.load(input)
        with open(workpath + '/./gmms_test_plus.pkl', 'rb') as input:
            gmm_test_plus = pickle.load(input)
        with open(workpath + '/./gmms_test_21.pkl', 'rb') as input:
            gmm_test_21 = pickle.load(input)
        return df_test_plus, df_test_21, gmm_test_plus, gmm_test_21
    else:
        workpath = os.path.dirname(os.path.abspath(__file__))
        datafile_plus = workpath + '/data/KDDTest+.txt'
        datafile_21 = workpath + '/data/KDDTest-21.txt'

        headers, attacks = get_header_data()

        print "preprocessing testing data plus..."
        df = model.load_dataframe(datafile_plus, headers, datasize=datasize)
        df_test_plus = discretize_elems(df, attacks)
        gmms_test_plus = construct_gmms(df_test_plus, headers)

        print "preprocessing testing data 21..."
        df = model.load_dataframe(datafile_21, headers, datasize=datasize)
        df_test_21 = discretize_elems(df, attacks)
        gmms_test_21 = construct_gmms(df_test_21, headers)

        print "save to file..."
        with open(workpath + '/./df_test_plus.pkl', 'wb') as output:
            pickle.dump(df_test_plus, output, -1)
        with open(workpath + '/./df_test_21.pkl', 'wb') as output:
            pickle.dump(df_test_21, output, -1)
        with open(workpath + '/./gmms_test_plus.pkl', 'wb') as output:
            pickle.dump(gmms_test_plus, output, -1)
        with open(workpath + '/./gmms_test_21.pkl', 'wb') as output:
            pickle.dump(gmms_test_21, output, -1)

        return df_test_plus, df_test_21, gmms_test_plus, gmms_test_21