Exemplo n.º 1
0
def getData(bfiles, AGDfile):
    p = pontus()
    root_dir = "../data_sets/"
    bfiles = "{}{}".format(root_dir, 'yd_20180427')
    AGDfile = "{}AGD{}.json".format(root_dir, 0)
    #aleax
    # bfiles="{}{}".format(root_dir,"Aleax")
    # AGDfile = "{}AGD{}.json".format(root_dir, 0)
    #wordlist
    # bfiles = "{}{}".format(root_dir, "Aleax2LD")
    # AGDfile = "{}wordlist.json".format(root_dir)
    trainDGADomain, testDGADomain, trainBenignDomain, testBenignDomain = p.getTrainTestDomains(
        benignFile=bfiles, AGDfile=AGDfile)
    # trainDGADomain=trainDGADomain[:1]
    # testDGADomain=testDGADomain[:1]
    # trainBenignDomain=trainBenignDomain[:1]
    # testBenignDomain=testBenignDomain[:1]

    trainData = trainDGADomain + trainBenignDomain
    trainFeas = lstm_getAllFea(trainData)
    trainLabel = np.concatenate(
        (np.ones(len(trainDGADomain)), np.zeros(len(trainBenignDomain))))
    testData = testDGADomain + testBenignDomain
    testFeas = lstm_getAllFea(testData)
    testLabel = np.concatenate(
        (np.ones(len(testDGADomain)), np.zeros(len(testBenignDomain))))

    index = list(range(len(trainData)))
    random.shuffle(index)

    real_train_features = []
    real_train_labels = []
    for i in index:
        real_train_features.append(trainFeas[i])
        real_train_labels.append(trainLabel[i])

    index = list(range(len(testData)))
    random.shuffle(index)

    real_test_features = []
    real_test_labels = []
    for i in index:
        real_test_features.append(testFeas[i])
        real_test_labels.append(testLabel[i])

    return (np.array(real_train_features),
            np.array(real_train_labels)), (np.array(real_test_features),
                                           np.array(real_test_labels))
Exemplo n.º 2
0
    def FANCI_expirement_process(self, n=755, m=28, c='gini'):
        p = pontus()
        root_dir = "../data_sets/"
        yd_bfiles = []
        dx_bfiles = []
        for filename in os.listdir(root_dir):
            if "yd" in filename:
                yd_bfiles.append(filename)
            if "dx" in filename:
                dx_bfiles.append(filename)

        for i in range(1):
            bfiles = "{}{}".format(root_dir, dx_bfiles[i])
            AGDfile = "{}AGD{}.json".format(root_dir, i)
            trainDGADomain, testDGADomain, trainBenignDomain, testBenignDomain = p.getTrainTestDomains(
                benignFile=bfiles, AGDfile=AGDfile)
            trainData = trainDGADomain + trainBenignDomain
            trainLabel = np.concatenate((np.ones(len(trainDGADomain)),
                                         np.zeros(len(trainBenignDomain))))
            testData = testDGADomain + testBenignDomain
            testLabel = np.concatenate(
                (np.ones(len(testDGADomain)), np.zeros(len(testBenignDomain))))

            train_features = FANCI_features.extract_all_features(trainData)

            clf = RandomForestClassifier(n_estimators=n,
                                         max_features=m,
                                         criterion=c)
            clf.fit(train_features, trainLabel)

            pred_features = FANCI_features.extract_all_features(testData)

            predict_result = clf.predict(pred_features)

            print("accuracy:{}\nrecall:{}\nprecision:{}\nf1-score:{}" \
                  .format(accuracy_score(testLabel, predict_result), \
                          recall_score(testLabel, predict_result), \
                          precision_score(testLabel, predict_result), \
                          f1_score(testLabel, predict_result)))
Exemplo n.º 3
0
    def testAleax2LD(self):
        p = pontus()
        root_dir = "../data_sets/"
        bfiles = "{}{}".format(root_dir, "yd_20180430")
        AGDfile = "{}AGD{}.json".format(root_dir, 5)
        trainDGADomain, testDGADomain, trainBenignDomain, testBenignDomain = p.getTrainTestDomains(
            benignFile=bfiles, AGDfile=AGDfile, ratio=1)
        sd = set(trainDGADomain)
        sb = set(trainBenignDomain)

        mdga = set()
        with open(os.path.join(root_dir, "all2LDAGD"), "r") as f:
            for r in f:
                mdga.add(r.strip())

        b2ld = set()
        with open(os.path.join(root_dir, "Aleax2LD"), "r") as f:
            for r in f:
                b2ld.add(r.strip())

        testDGADomain = list(mdga.difference(sd))[:20000]

        testBenignDomain = list(b2ld.difference(sb))[:20000]

        trainDomains = trainDGADomain + trainBenignDomain
        trainLabel = np.concatenate(
            (np.ones(len(trainDGADomain)), np.zeros(len(trainBenignDomain))))

        testDomains = testDGADomain + testBenignDomain
        testLabel = np.concatenate(
            (np.ones(len(testDGADomain)), np.zeros(len(testBenignDomain))))

        # for i in range(len(testDomains)):
        #     print("{} {}".format(testDomains[i],testLabel[i]))
        # print("FANCI")

        # self.perdiction(trainDomains,trainLabel,testDomains,testLabel,"FANCI")
        self.perdiction(trainDomains, trainLabel, testDomains, testLabel,
                        "pontus")
Exemplo n.º 4
0
 def __init__(self):
     self.pontus = pontus()
Exemplo n.º 5
0
    )
    print(len(trainDGADomain))
    print(len(testDGADomain))
    print(len(trainBenignDomain))
    print(len(testBenignDomain))

    trainDomains = trainDGADomain + trainBenignDomain

    trainLabel_noshuffle = np.concatenate(
        (np.ones(len(trainDGADomain)), np.zeros(len(trainBenignDomain))))

    testDomains = testDGADomain + testBenignDomain
    testLabel = np.concatenate(
        (np.ones(len(testDGADomain)), np.zeros(len(testBenignDomain))))

    ppp = pontus.pontus()
    str_train_features = ppp.getDomainFeatures(trainDomains)
    # map_train_features=getDomanListFeature(trainDomains)

    train_features_noshuffle = str_train_features
    # np.append(str_train_features, map_train_features, axis=1)
    # print(str_train_features[0])
    # print(map_train_features[0])
    # print(train_features_noshuffle[0])

    index = [i for i in range(len(trainDomains))]
    random.shuffle(index)
    train_features = [train_features_noshuffle[i] for i in index]
    trainLabel = [trainLabel_noshuffle[i] for i in index]
    print("GBDT")
    clf = GradientBoostingClassifier(max_depth=24,