Пример #1
0
    def run_linear_open_experiment(self, iterations=10, save=False):
        """
        Train a classifier on test data, obtain the best combination of
        parameters through a grid search cross-validation and test the
        classifier using a open-world split of the dataset. The results
        from the number of iterations are saved as pz files.

        :param iterations: number of runs (training/testing)
        :save: save predictions and labels if True
        """
        self.true_labels = np.array([])
        self.predictions = np.array([])
        for i in xrange(iterations):
            self.randomize_dataset_open_world()
            clf = GridSearchCV(svm.LinearSVC(), {'C': np.logspace(-3, 3, 7)})
            clf.fit(self.X_train, self.Y_train)
            out = clf.best_estimator_.decision_function(self.X_test)
            classes = clf.best_estimator_.classes_
            for scores in out:
                m = np.max(scores)
                if (abs(m / scores[:][:]) < 0.5).any():
                    self.predictions = np.append(self.predictions, 99)
                else:
                    p = classes[np.where(scores == m)]
                    self.predictions = np.append(self.predictions, p)
            self.true_labels = np.append(self.true_labels, self.Y_test)

        if save:
            pz.save(self.predictions, "mca_predictions_open.pz")
            pz.save(self.true_labels, "mca_true_labels_open.pz")
Пример #2
0
    def run_linear_closed_experiment(self, iterations=10, save=False):
        """
        Train a classifier on test data, obtain the best combination of
        parameters through a grid search cross-validation and test the
        classifier using a closed-world split of the dataset. The results
        from the number of iterations are saved as pz files.

        :param iterations: number of runs (training/testing)
        :save: save predictions and labels if True
        """
        self.true_labels = np.array([])
        self.predictions = np.array([])
        for i in xrange(iterations):
            print "[*] Iteration {0}".format(i)
            print "[*] Randomizing dataset..."
            self.randomize_dataset_closed_world()
            clf = GridSearchCV(svm.LinearSVC(), {'C': np.logspace(-3, 3, 7)})
            print "[*] Training..."
            clf.fit(self.X_train, self.Y_train)
            out = clf.best_estimator_.predict(self.X_test)
            print "[*] Testing..."
            self.predictions = np.append(self.predictions, out)
            self.true_labels = np.append(self.true_labels, self.Y_test)

        if save:
            print "[*] Predictions saved."
            pz.save(self.predictions, "mca_predictions_closed.pz")
            pz.save(self.true_labels, "mca_true_labels_closed.pz")
Пример #3
0
    def run_linear_experiment(self, rocs_filename, iterations=10):
        """
        Run a classification experiment by running several iterations.
        In each iteration data is randomized, a linear svm classifier
        is trained and evaluated using cross-validation over a the 
        cost parameter in the range np.logspace(-3, 3, 7). The best
        classifier is used for testing and a ROC curve is computed
        and saved as property and locally.

        :param rocs_filename: the file to save all rocs computed
        :param iterations: number of runs (training/testing)
        """
        for i in xrange(iterations):
            print "[*] Iteration {0}".format(i)
            print "[*] Randomizing dataset..."
            self.randomize_dataset()
            clf = GridSearchCV(svm.LinearSVC(), {'C': np.logspace(-3, 3, 7)})
            print "[*] Training..."
            clf.fit(self.X_train, self.Y_train)
            out = clf.best_estimator_.decision_function(self.X_test)
            print "[*] Testing..."
            roc = ev.compute_roc(np.float32(out.flatten()),
                                 np.float32(self.Y_test))
            self.rocs.append(roc)
            print "[*] ROC saved."
        pz.save(self.rocs, rocs_filename)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--directory', help='directory of the apk')
    args = parser.parse_args()
    if args.directory:
        # progressbar
        file_count = count_file(args.directory, '.apk')
        pbar = use_progressbar('networkxifying call graph...', file_count)
        pbar.start()
        progress = 0

        for parent, dirnames, filenames in os.walk(args.directory):
            for filename in filenames:
                if filename.endswith('.apk'):
                    # print(os.path.join(parent, filename))
                    cg, graphdir = generate(os.path.join(parent, filename))
                    fcg = networkxify(cg)
                    h = os.path.splitext(filename)[0]
                    fnx = os.path.join(graphdir, "{}.pz".format(h))
                    pz.save(fcg, os.path.join(graphdir, fnx))

                    # progressbar
                    progress += 1
                    pbar.update(progress)

        # progressbar
        pbar.finish()
    else:
        parser.print_help()
Пример #5
0
def save_data(X, m, Y, filenames):
    '''Store pz objects for the data matrix, the labels and
        the name of the original samples so that they can be used
        in a new experiment without the need to extract all
        features again'''
    print '[SC] Saving labels, data matrix and file names...'
    pz.save(X, 'X.pz')
    pz.save(m, 'maximum.pz')
    pz.save(Y, 'Y.pz')
    pz.save(filenames, 'filenames.pz')
Пример #6
0
 def save_data(self):
     """ Store pz objects for the data matrix, the labels and
         the name of the original samples so that they can be used
         in a new experiment without the need to extract all
         features again
     """
     print "[*] Saving labels, data matrix and file names..."
     pz.save(self.X, "X.pz")
     pz.save(self.Y, "Y.pz")
     pz.save(self.fnames, "fnames.pz")