rootdir_bening = os.path.join("inputs", "benign", "wordpress") # i = 0 prev = 0 for root, subFolders1, files in os.walk(rootdir_bening): for _file in files: if _file.endswith(".php"): file_path = root + "/" + _file print file_path # with open(file_path, "r") as fin: # _file = fin.read() # with open(file_path, "a") as fin: # if not _file.rstrip(" ").rstrip("\n").endswith("?>"): # fin.write("?>") fe = FeatureExtractor(file_path) try: feature_dict = fe.extract_features() feature_dict["label"] = 1 f_dict[file_path] = feature_dict f_dict[file_path]["number_of_lines"] -= prev prev += f_dict[file_path]["number_of_lines"] f_dict[file_path]["number_of_lines"] += 1 chars = f_dict[file_path]["length_in_characters"] lines = f_dict[file_path]["number_of_lines"] f_dict[file_path]["characters_per_line"] = float(chars) / lines number_of_comments = f_dict[file_path]["number_of_comments"] f_dict[file_path]["average_comments_per_line"] = float( number_of_comments) / lines except: f_dict.pop(file_path, None) print "Something went wrong with file {}".format(file_path)
def extract_features(filepath): feature_extractor = FeatureExtractor( 'config_files/feature_extraction.json') return feature_extractor.extract_features(filepath)