def parse_edb_cve(self, url, item, html): edb_html = HTML(html=html) raw_id, edb_title, edb_author, edb_type, edb_platform, edb_rport, edb_published = item edb_id = "EDB-{}".format(raw_id) edb_url = url edb_verified = get_val(edb_html.xpath(element_xpath['edb_verified'])) try: edb_cve_num = [ i.strip() for i in edb_html.xpath(element_xpath['edb_cve']) ] if edb_cve_num: maped_edb_cve = [ "CVE-{}".format(cve_id) for cve_id in edb_cve_num ] edb_cve = ','.join(maped_edb_cve) tqdm.write("Detected {} <--> {}".format(edb_id, edb_cve)) except Exception: edb_cve = 'N/A' if 'mdi-close' in edb_verified: edb_verified = 'Unverified' else: edb_verified = 'Verified' edb_exploit_raw_url = 'https://www.exploit-db.com/raw/{}'.format( raw_id) edb_vulnerable_app_url = get_val( edb_html.xpath(element_xpath['edb_vulnerable_app_url'])) if edb_vulnerable_app_url != "": edb_vulnerable_app_url = 'https://www.exploit-db.com' + edb_vulnerable_app_url edb_collect_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) exploit_record = EdbRecord( edb_id=edb_id, edb_title=edb_title, edb_url=edb_url, edb_author=edb_author, edb_cve=edb_cve, edb_type=edb_type, edb_platform=edb_platform, edb_remote_ports=edb_rport, edb_verified=edb_verified, edb_vulnerable_app_url=edb_vulnerable_app_url, edb_exploit_raw_url=edb_exploit_raw_url, edb_published=edb_published, edb_collect_date=edb_collect_date) self.insert_record(exploit_record)
def categorize(self, check_list): logging.info("Categorizing the Exploits...") parsed_list = [] target_url_list = [] target_edb_list = [] try: raw_map_url = EDB_MAP_URL raw_map = requests.get(raw_map_url, headers=self.headers) edb_cve_map = ujson.loads(raw_map.text) except Exception: edb_cve_map = self.recover_map_from_db() for row in tqdm(check_list): e_id, e_file, e_title, e_published, e_author, e_platform, e_type, e_rport = tuple( row) e_url = "https://www.exploit-db.com/exploits/" + e_id target_id = "EDB-{}".format(e_id) if not self.edb_dao.exist(target_id): if e_id in edb_cve_map.keys(): try: e_cve = get_val(edb_cve_map[e_id]) except KeyError: e_cve = 'N/A' parsed_list.append([ e_id, e_title, e_url, e_author, e_cve, e_type, e_platform, e_rport, e_published ]) else: target_url_list.append(e_url) target_edb_list.append([ e_id, e_title, e_author, e_type, e_platform, e_rport, e_published ]) else: # TODO: update of records existing in the DB pass return parsed_list, target_url_list, target_edb_list
def parse_msf_module_local(self, target_file): regex_pattern = { 'module_info': r"initialize[\s\S]*?end\n", 'module_title': r"['|\"]Name['|\"][ |\t|\S]+['|\"|\)]", 'module_describe': r"['|\"]Description['|\"][\s\S]*?['|\"|\)],\n|['|\"]Description['|\"][^\}]+},\n", 'module_authors': r"['|\"]Author['|\"][^\]]+\],\n|['|\"]Author['|\"][ |\t|\S]+['|\"|\)|\]],\n", 'module_cve': r"['|\"]CVE['|\"],\s['|\"]\d{4}-\d+['|\"]", 'module_edb': r"['|\"]EDB['|\"],\s['|\"]\d+['|\"]", 'module_cwe': r"['|\"]CWE['|\"],\s['|\"]\d+['|\"]", 'module_bid': r"['|\"]BID['|\"],\s['|\"]\d+['|\"]", 'module_zdi': r"['|\"]ZDI['|\"],\s['|\"]\d{2}-\d+['|\"]", 'module_msb': r"['|\"]MSB['|\"],\s['|\"]MS\d{2}-\d+['|\"]", 'module_osvdb': r"['|\"]OSVDB['|\"],\s['|\"]\d+['|\"]", 'module_wpvdb': r"['|\"]WPVDB['|\"],\s['|\"]\d+['|\"]", 'module_uscert': r"['|\"]US-CERT-VU['|\"],\s['|\"]\S+['|\"]", 'module_packet': r"['|\"]PACKETSTORM['|\"],\s['|\"]\S+['|\"]", 'module_ref_url': r"['|\"]URL['|\"],\s['|\"]\S+['|\"]", 'module_platforms_fmt': r"['|\"]Platform['|\"][ |\t]+=>[ |\t]%+[^\}]+},\n", 'module_platforms': r"['|\"]Platform['|\"][ |\t|\S]+['|\"|\)|\]],\n|['|\"]Platform['|\"][^\}]+},\n", 'module_disclosure_date': r"['|\"]DisclosureDate['|\"][ |\t|\S]+['|\"],*\n", } file_obj = open(target_file, "r") source_code = file_obj.read() update_info_code = get_val( re.findall(regex_pattern['module_info'], source_code)) module_name_start_pos = target_file.find("modules") module_name = target_file[module_name_start_pos:] module_class = module_name.split(PATH_SPLIT)[1] module_url = f"https://www.rapid7.com/db/modules/{module_name}".replace( "exploits", "exploit").replace(".rb", "") module_title = self.optimize_title( get_val(re.findall(regex_pattern['module_title'], update_info_code))) module_describe_words = self.optimize_describe( get_val( re.findall(regex_pattern['module_describe'], update_info_code))).split() module_describe = ' '.join(module_describe_words) # TODO: Efficient author's parsing method # module_authors = get_val(re.findall(regex_pattern['module_authors'], update_info_code)) module_cve = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_cve'], update_info_code))) module_edb = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_edb'], update_info_code))) module_cwe = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_cwe'], update_info_code))).split(",") module_bid = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_bid'], update_info_code))).split(",") module_zdi = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_zdi'], update_info_code))).split(",") module_msb = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_msb'], update_info_code))).split(",") module_osvdb = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_osvdb'], update_info_code))).split(",") module_wpvdb = self.optimize_ref_id( get_val(re.findall(regex_pattern['module_wpvdb'], update_info_code))).split(",") module_uscert = self.optimize_ref_id( get_val( re.findall(regex_pattern['module_uscert'], update_info_code))).split(",") module_packet = self.optimize_ref_id( get_val( re.findall(regex_pattern['module_packet'], update_info_code))).split(",") module_ref_url = self.optimize_ref_url( get_val( re.findall(regex_pattern['module_ref_url'], update_info_code))) module_ref_list = module_cwe + module_bid + module_zdi + module_msb + \ module_osvdb + module_wpvdb + module_uscert + module_packet + module_ref_url module_ref_list = list(filter(lambda str: str != '', module_ref_list)) module_references = get_val(module_ref_list) try: module_platforms = self.optimize_platforms( re.findall(regex_pattern['module_platforms_fmt'], update_info_code)[0]) except IndexError: try: module_platforms = self.optimize_platforms( re.findall(regex_pattern['module_platforms'], update_info_code)[0]) except IndexError: module_platforms = "" module_remote_ports = self.optimize_remote_port(source_code) module_disclosure_date = self.optimize_disclosure_date( get_val( re.findall(regex_pattern['module_disclosure_date'], update_info_code))) module_collect_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) file_obj.close() msf_record = MsfRecord(module_name=module_name, module_class=module_class, module_title=module_title, module_url=module_url, module_describe=module_describe, module_cve=module_cve, module_edb=module_edb, module_references=module_references, module_platforms=module_platforms, module_remote_ports=module_remote_ports, module_disclosure_date=module_disclosure_date, module_collect_date=module_collect_date) self.insert_record(msf_record)
def parse_edb_cve(self, edb_item): raw_id = edb_item['id'] edb_id = "EDB-{}".format(raw_id) edb_url = "https://www.exploit-db.com/exploits/{}/".format(raw_id) page = self.request(edb_url) try: raw_id = page.html.xpath( element_xpath['edb_id'])[0].strip(':').strip() edb_id = "EDB-{}".format(raw_id) except Exception: logging.error("Request error, maybe record have been removed") exploit_record = EdbRecord(edb_id=raw_id) self.insert_record(exploit_record) edb_title = get_val(page.html.xpath(element_xpath['edb_title'])) edb_author = get_val(page.html.xpath(element_xpath['edb_author'])) try: edb_cve_num = [ i.strip() for i in page.html.xpath(element_xpath['edb_cve']) ] if edb_cve_num != '' and edb_cve_num != 'N/A': maped_edb_cve = [ "CVE-{}".format(cve_id) for cve_id in edb_cve_num ] edb_cve = ','.join(maped_edb_cve) except Exception: edb_cve = 'N/A' edb_type = get_val(page.html.xpath(element_xpath['edb_type'])) edb_platform = get_val(page.html.xpath(element_xpath['edb_platform'])) edb_verified = get_val(page.html.xpath(element_xpath['edb_verified'])) if 'mdi-close' in edb_verified: edb_verified = 'Unverified' else: edb_verified = 'Verified' edb_exploit_raw_url = 'https://www.exploit-db.com/raw/{}'.format( raw_id) edb_vulnerable_app_url = get_val( page.html.xpath(element_xpath['edb_vulnerable_app_url'])) if edb_vulnerable_app_url != "": edb_vulnerable_app_url = 'https://www.exploit-db.com' + edb_vulnerable_app_url edb_published = page.html.xpath( element_xpath['edb_published'])[0].strip(':').strip() edb_collect_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) exploit_record = EdbRecord( edb_id=edb_id, edb_title=edb_title, edb_url=edb_url, edb_author=edb_author, edb_cve=edb_cve, edb_type=edb_type, edb_platform=edb_platform, edb_remote_ports="", edb_verified=edb_verified, edb_vulnerable_app_url=edb_vulnerable_app_url, edb_exploit_raw_url=edb_exploit_raw_url, edb_published=edb_published, edb_collect_date=edb_collect_date) self.insert_record(exploit_record)
def parse_msf_module(self, item): url = "https://www.rapid7.com{}".format(item) page = self.request(url) if page.status_code != 200: msf_record = MsfRecord(module_name=item[11:]) self.msf_dao.add(msf_record) element_xpath = { 'module_title': '//div[@class="vulndb__detail-main"]/h3/text()', 'module_url': '/html/head/link[@rel="canonical"]/@href', 'module_devlink': '//section[contains(@class,"vulndb__solution")]/ul/li[1]/a/@href', 'module_describe': '//div[contains(@class,"vulndb__detail-content")]/p/text()', 'module_authors': '//div[contains(@class,"vulndb__detail-content")]/ul/li/text()', 'module_references': '//section[contains(@class,"vulndb__references")]/ul/li//text()', 'module_platforms': '//div[contains(@class,"vulndb__detail-content")]/p[2]/text()', 'module_architectures': '//div[contains(@class,"vulndb__detail-content")]/p[3]/text()', } module_url = get_val(page.html.xpath(element_xpath["module_url"])) module_devlink = get_val( page.html.xpath(element_xpath["module_devlink"])) module_name = module_devlink[60:] module_title = get_val(page.html.xpath(element_xpath["module_title"])) module_describe_words = page.html.xpath( element_xpath["module_describe"])[0].split() module_describe = ' '.join(module_describe_words) module_authors = get_val( page.html.xpath(element_xpath["module_authors"])) module_references = get_val( page.html.xpath(element_xpath["module_references"])) module_cve = "" module_edb = "" # Extracting CVEs&EDBs from reference information if module_references is not None: cve_list = [] edb_list = [] pattern = r"CVE-\d{4}-\d+|EDB-\d+" numbering_list = re.findall(pattern, module_references) exclusion_pattern = r"CVE-\d{4}-\d+,?|EDB-\d+,?" module_references = re.sub(exclusion_pattern, "", module_references) for item in numbering_list: if "CVE" in item: cve_list.append(item) elif "EDB" in item: edb_list.append(item) if len(cve_list) >= 1: module_cve = ','.join(cve_list) if len(edb_list) >= 1: module_edb = ','.join(edb_list) module_platforms = get_val( page.html.xpath(element_xpath["module_platforms"])) module_architectures = get_val( page.html.xpath(element_xpath["module_architectures"])) modified_date = self.get_modified_date(module_name) module_update_date = parser.parse(modified_date).strftime( "%Y-%m-%d %H:%M:%S") module_collect_date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) msf_record = MsfRecord(module_name=module_name, module_title=module_title, module_url=module_url, module_describe=module_describe, module_authors=module_authors, module_cve=module_cve, module_edb=module_edb, module_references=module_references, module_platforms=module_platforms, module_architectures=module_architectures, module_update_date=module_update_date, module_collect_date=module_collect_date) self.insert_record(msf_record)