def __init__(self): self.cfg = read_cfg("redis") self.rj = Client(host=self.cfg.get("ip"), port=self.cfg.get("port"), decoder=RedisJsonDecoder(), decode_responses=True) self.logger = logger.myLogger("Redis")
def update(): logger.myLogger("Scraper").info('Now downloading and extracting files...') download_and_unzip() logger.myLogger("Scraper").info('Getting mitre refs...') scrape_mitre() logger.myLogger("Scraper").info('Getting descriptions...') parse_description_file() logger.myLogger("Scraper").info('Parsing files...') parse_folder()
def parse_folder(): for (root, dirs, files) in os.walk(path, topdown=True): for name in files: filename = os.path.join(root, name) with open(filename, encoding='utf-8') as f: exploit = f.read() exploit_type = root.split('/')[-1] name1, ext = os.path.splitext(name) platform_edb = None description_edb = None date = None edb = exploitdb.find_one({"filename": name}) if edb is not None: description_edb = edb['title'] platform_edb = edb['platform'] date = edb['date'] if dictionary.get(ext) is not None: dictionary[ext]['total'] += 1 else: obj = {"total": 1, "filename": filename} dictionary[ext] = obj if ext == '.rb': metasploit = re.findall( '(?:class Metasploit|msf/core)', exploit) # Search for 'Metasploit' occurence if metasploit: ext = '.metasploit' parser = scrapers.get(ext) if not parser: continue scraper = parser(filename, name1, exploit_type, description_edb, platform_edb, exploit, mongodb, date) scraper.parse_infos() scraper.logger.info(f"Done parsing {filename}") logger.myLogger("Scraper").info("Done all files!")
def __init__(self, url_list): self.url_list = list(set(url_list)) self.parsed_url = [] self.logger = logger.myLogger("Queuer") self.parsed_domains = [] self.current_domain = '' self.blacklist = [ 'instagram', 'facebook', 'twitter', 'flickr', 'linkedin', 'whatsapp', 'pinterest', 'www.wordpress.com', 'hbo', 'netflix', 'amazon', 'premiumcoding', 'javascript', 'oembed', 'wikipedia', 'fonts', 'google', 'bing', 'yahoo' ]
def __init__(self, domain, collection, data=None): self.domain = domain self.data = data self.collection = collection self.vulns_by_cms = [] self.vulns_by_cms_and_plugs = [] self.vulns = { "true_vulns": [], "almost_true": [], "probable_vulns": [], "possible_vulns": [] } self.logger = logger.myLogger('Checker') self.logger.info('Initiating checker...')
def __init__(self, filename, name, exploit_type, title, platform, exploit, mongoclient, date, ext=None): self.exploit = exploit self.filename = filename self.name = name self.exploit_type = exploit_type self.title = title self.platform = platform self.client = mongoclient self.date = date if mongoclient is not None: self.db = mongoclient.get_db() self.collection = self.db['cve_refs'] self.parsed_col = self.db['parse_exploit'] self.ext = ext self.logger = logger.myLogger('Scraper') # useful regexes self.between = r'((?:[\dx]+\.?)+\s*(?:-\d+)?)(\s*<=?\s*)((?:[\dx]+\.?)+\s*(?:-(?:[\dx]+\.?)+)?)' self.single = r'(?:x64|x32|x86|(?<![<=>\-\s\.\d]))\s*((?:(?:\d+\.?)+(?:[\dx]+)?)(?:\s*?)(?:-(?:[\dx]+\.?)+)?)(?!\w)' self.small = r'(<=?)\s*((?:[\dx]+\.?)+\s*(?:-(?:[\dx]+\.?)+)?)' self.big = r'(>=?)\s*((?:[\dx]+\.?)+\s*(?:-(?:[\dx]+\.?)+)?)' # object self.extracted_object = { "refs": [], "description": [], "vversion": [], "name": [], "targets": [] } # for comments if self.exploit is None: self.source_at_begin = [] else: self.source_at_begin = regex.findall( r'^(?:\/\/\s+)?[Ss]ource\s*:\s*(.*)\s+(.*)\s+(.*)\s+([^#]+?)\n', self.exploit, flags=regex.M) # For comments like source .. \n text \n text
def __init__(self, urls): self.db = mongodb.get_db() self.collection = self.db["cves"] self.domains = {} self.myQueuer = Queuer(urls) self.exploits = {} self.myRedis = Redis() self.myCheckers = {} self.myChecker = None self.logger = logger.myLogger("Crawler") self.logger.info("Initializing Crawler...") self.logger.info(f"Redis at {self.myRedis.get_rj()}") ping = False self.logger.info('Waiting for Redis...') while ping == False: try: ping = self.myRedis.get_rj().ping() except: pass time.sleep(0.5)