def process_results(): result_folders = get_first_layer_folders("." + os.sep + "api_results") csv_cnt = 0 csv_file_cnt = 0 for res_folder in result_folders: csv_files = get_first_layer_files(res_folder, html=False) csv_file_cnt += len(csv_files) api_sum = 0 for csv_file in csv_files: csv = open(csv_file, "r", encoding="utf-8") # print(csv_file) lines = csv.readlines() csv_name = csv_file.split(os.sep)[-1][:-4] sum_cnt = 0 general_cnt = 0 for line in lines: if "," in line: sum_cnt = sum_cnt + 1 if "logevent" in line or "GeneralLogEvent" in line or "trackEvent" in line or \ "GeneralUserProperty" in line: general_cnt = general_cnt + 1 api_sum += sum_cnt if sum_cnt >= 0: if general_cnt >= 0: csv_cnt = csv_cnt + 1 # print("CSV_Name=" + csv_name + ", Privacy API=" + str(sum_cnt) + ", General API=" + str(general_cnt)) # print(csv_name + "," + str(sum_cnt) + "," + str(general_cnt)) # loss = loss + (sum_cnt - general_cnt) / sum_cnt if api_sum == 0: print("!!!=" + res_folder) print("CSV File Count=" + str(csv_file_cnt))
def run(self): get_sensitive_keywords() print(self.sensitive_keywords) sum_items = 0 sum_tp = 0 sum_fp = 0 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8') api_folders = get_first_layer_files( "E:\\Lab Work\\dataset_science\\API_Docs\\Countly_API_Docs_test\\") sum_acs = 0 for api_folder in api_folders: logger.info("Processing Folder:" + str(api_folder)) (items, tp, fp, acs) = self.process_api(api_folder) sum_acs = sum_acs + acs sum_items = sum_items + items sum_tp = sum_tp + tp sum_fp = sum_fp + fp print("SUM=" + str(sum_acs)) # print("SUM_ITEMS=" + str(sum_items)) # print("SUM_TP=" + str(sum_tp)) # print("SUM_FP=" + str(sum_fp)) print("-----Sensitive Results-----") for sensitive_result in self.sensitive_results: print(sensitive_result) # logger.info("API Count=" + str(api_cnt)) logger.info("Sensitive API Count=" + str(len(self.sensitive_results)))
def run(self): get_sensitive_keywords() api_folders = get_first_layer_files(Config.target_folder) for api_folder in api_folders: logger.info("Processing Folder:" + str(api_folder)) self.process_api(api_folder) print("-----Sensitive Results-----") print("APIs Count=" + str(len(self.apis))) print("Sensitive APIs Count=" + str(len(self.sensitive_results)))
def parse_dex_folder(target_folder): logger.info("Dex Folder=" + target_folder) files = get_first_layer_files(target_folder, False) # print(len(files)) for file in files: try: parser = DexFileParser(file) parser.run() parser.print_results() # parser.print_to_csv() except Exception as e: print(e)
def process_api(self, folder): tp = 0 fp = 0 files_list = get_first_layer_files(folder) for i in range(0, len(files_list)): file = files_list[i] self.processing_class = file.split("\\")[-1].split(" ")[0] soup = BeautifulSoup(open(file, encoding='utf-8'), features='html.parser') tag_list = soup.find_all() c_tp, c_fp = self.get_privacy(tag_list) tp = tp + c_tp fp = fp + c_fp return tp, fp
def process_javadoc_package_folder(target_folder): file_list = get_first_layer_files(target_folder, html=False) java_doc_folders = [] new_folder_path = target_folder + os.sep + "new_sdks" for file in file_list: if ".jar" in file: # print("File=" + file) zip_file = zipfile.ZipFile(file, "r") sdk_name = file.split(os.sep)[-1][:-4] # print("SDK_Name=" + sdk_name) folder_path = file[0:file.rfind(os.sep)] # print("Folder_Path=" + folder_path) extract_path = folder_path + os.sep + sdk_name # print("Extract_Path=" + extract_path) zip_file.extractall(extract_path) file_list = get_all_files(extract_path, html=True) # print(extract_path) black_list = [ "allclasses", "constant-values", "deprecated-list", "help-doc", "index", "overview-", "package-" ] html_list = [] for html_file in file_list: file_name = html_file.split("\\")[-1] neglect = False for prefix in black_list: if file_name.startswith(prefix): neglect = True break if neglect: continue html_list.append(html_file) new_sdk_folder = new_folder_path + os.sep + sdk_name + "_new" next_folder = new_sdk_folder + os.sep + "All" # print("new_folder=" + new_sdk_folder) # print("next_folder=" + next_folder) if not os.path.exists(new_folder_path): os.mkdir(new_folder_path) if not os.path.exists(new_sdk_folder): os.mkdir(new_sdk_folder) if not os.path.exists(next_folder): os.mkdir(next_folder) for html_file in html_list: html_name = html_file.split(os.sep)[-1] copyfile(html_file, next_folder + os.sep + html_name) java_doc_folders.append(new_sdk_folder) parse_historical_javadoc_folder_( new_folder_path.split("\\")[-2], new_folder_path)
def process_api(self, folder): tp = 0 fp = 0 files_list = get_first_layer_files(folder) for i in range(0, len(files_list)): file = files_list[i] self.processing_class = file.split("\\")[-1].split(" ")[0] # We should consider the full class name here. # logger.info("Processing Class=" + self.processing_class) try: soup = BeautifulSoup(open(file, encoding="gb18030"), features='html.parser') except Exception: soup = BeautifulSoup(open(file, encoding="utf-8"), features='html.parser') tag_list = soup.find_all() c_tp, c_fp = self.get_privacy(tag_list) tp = tp + c_tp fp = fp + c_fp return tp, fp
def parse_jar_folder(target_folder): # logger.info("Jar Folder=" + target_folder) jar_files = get_first_layer_files(target_folder, False) for jar_file in jar_files: if not jar_file.endswith(".jar"): continue jar_name = jar_file.split(os.sep)[-1] print(jar_name) sdk_name = jar_name[-4] # logger.info("Processing File=" + jar_file) try: parser = DexFileParser(sdk_name, jar_file) parser.run() parser.print_results() parser.print_to_csv() except Exception as e: print(jar_name + " meets exception!") print(e)