def delete_sets(self): pkl = self.pickle_path if os.path.isfile(pkl): print_notice("Removing %s" % pkl) os.remove(pkl) else: print_warning("Unable to remove %s. File does not exist." % pkl)
def create_popular_features(dataset, sets, language): mark_whole_path = config.get_boolean('dataset', 'MarkWholePathVulnerable') flaw_dict = sets['flaw_dict'][language] num_processes = 100 with Pool(processes=num_processes) as pool: for vuln_type in config.get_list('dataset', 'Vulnerabilities'): filename = get_features_filename(dataset, language, vuln_type) if not os.path.isfile(filename): f = transform_file(flaw_dict[vuln_type], mark_whole_path) set_type = 'training_set' counter = 0 l = len(sets[set_type][language][vuln_type]) generator = iter(sets[set_type][language][vuln_type]) pf = PopularFeatures(num_features=200) while True: next_elements = list(next(generator) for _ in range(num_processes)) counter += len(next_elements) if not next_elements: break start = timeit.default_timer() res = pool.map(f, next_elements) for df in res: if df is None: continue if not all(x in df.columns.values for x in ['file_name', 'line', 'vulnerable', 'tainted']): print_warning("Could not find the right columns in data frame. Ignoring.") continue # We drop these columns so our feature filter can ignore them df.drop(['file_name', 'line', 'vulnerable', 'tainted'], axis=1, inplace=True) pf.partial_fit(df) print_notice( "%s %s %s: %d/%d (run took %.2f secs)" % (language, vuln_type, set_type, counter, l, timeit.default_timer() - start)) with open(filename, 'wb') as pickle_file: # Protocol version 4 supports large objects (> 4GB) pickle.dump(pf, pickle_file, protocol=4) else: print_notice("Pickle file %s already created" % filename)
def f(file): try: g = cfg.create_graph(os.path.dirname(file), file) except (SyntaxError, IndexError, RecursionError): # TODO: Fix the IndexError and RecursionError print_warning("Syntax error in file %s" % file) return None try: df = transform_graph(g, flaw_dict, mark_whole_path, feature_filter) except RecursionError: print_warning("Maximum recursion depth exceeded (%s)" % file) return None return df
def delete_transforms(): remove = False for dataset in ['NVD', 'SAMATE']: for language in config.get_list('dataset', 'Languages'): for vuln_type in config.get_list('dataset', 'Vulnerabilities'): transform_filename = get_transform_filename( dataset, language, vuln_type) features_filename = get_features_filename( dataset, language, vuln_type) for f in [transform_filename, features_filename]: if os.path.isfile(f): print_notice("Removing %s" % f) os.remove(f) remove = True if not remove: print_warning("Could not find any transform files to remove.")