示例#1
0
    def delete_sets(self):
        pkl = self.pickle_path

        if os.path.isfile(pkl):
            print_notice("Removing %s" % pkl)
            os.remove(pkl)
        else:
            print_warning("Unable to remove %s. File does not exist." % pkl)
示例#2
0
def create_popular_features(dataset, sets, language):

    mark_whole_path = config.get_boolean('dataset', 'MarkWholePathVulnerable')
    flaw_dict = sets['flaw_dict'][language]
    num_processes = 100

    with Pool(processes=num_processes) as pool:
        for vuln_type in config.get_list('dataset', 'Vulnerabilities'):
            filename = get_features_filename(dataset, language, vuln_type)

            if not os.path.isfile(filename):
                f = transform_file(flaw_dict[vuln_type], mark_whole_path)
                set_type = 'training_set'

                counter = 0

                l = len(sets[set_type][language][vuln_type])
                generator = iter(sets[set_type][language][vuln_type])

                pf = PopularFeatures(num_features=200)

                while True:
                    next_elements = list(next(generator) for _ in range(num_processes))
                    counter += len(next_elements)

                    if not next_elements:
                        break

                    start = timeit.default_timer()
                    res = pool.map(f, next_elements)

                    for df in res:
                        if df is None:
                            continue

                        if not all(x in df.columns.values for x in ['file_name', 'line', 'vulnerable', 'tainted']):
                            print_warning("Could not find the right columns in data frame. Ignoring.")
                            continue

                        # We drop these columns so our feature filter can ignore them
                        df.drop(['file_name', 'line', 'vulnerable', 'tainted'], axis=1, inplace=True)

                        pf.partial_fit(df)

                    print_notice(
                        "%s %s %s: %d/%d (run took %.2f secs)" % (language, vuln_type, set_type, counter, l,
                                                                  timeit.default_timer() - start))

                with open(filename, 'wb') as pickle_file:
                    # Protocol version 4 supports large objects (> 4GB)
                    pickle.dump(pf, pickle_file, protocol=4)

            else:
                print_notice("Pickle file %s already created" % filename)
示例#3
0
    def f(file):
        try:
            g = cfg.create_graph(os.path.dirname(file), file)
        except (SyntaxError, IndexError, RecursionError):  # TODO: Fix the IndexError and RecursionError
            print_warning("Syntax error in file %s" % file)
            return None

        try:
            df = transform_graph(g, flaw_dict, mark_whole_path, feature_filter)
        except RecursionError:
            print_warning("Maximum recursion depth exceeded (%s)" % file)
            return None

        return df
示例#4
0
def delete_transforms():
    remove = False

    for dataset in ['NVD', 'SAMATE']:
        for language in config.get_list('dataset', 'Languages'):
            for vuln_type in config.get_list('dataset', 'Vulnerabilities'):
                transform_filename = get_transform_filename(
                    dataset, language, vuln_type)
                features_filename = get_features_filename(
                    dataset, language, vuln_type)

                for f in [transform_filename, features_filename]:
                    if os.path.isfile(f):
                        print_notice("Removing %s" % f)
                        os.remove(f)
                        remove = True

    if not remove:
        print_warning("Could not find any transform files to remove.")