def enqueueAndFlush(self): # print "Thread trying to get the lock : %s" %(threading.currentThread().getName()) with self._lock: buffer_length = len(self.linkbuffer) if buffer_length > 0: queuer = Queuer(self.linkbuffer) queuer.enqueue() self.creationTime = time.time() self.linkbuffer = [] throttler = Throttler() throttler.throttle()
def __init__(self, urls): self.db = mongodb.get_db() self.collection = self.db["cves"] self.domains = {} self.myQueuer = Queuer(urls) self.exploits = {} self.myRedis = Redis() self.myCheckers = {} self.myChecker = None self.logger = logger.myLogger("Crawler") self.logger.info("Initializing Crawler...") self.logger.info(f"Redis at {self.myRedis.get_rj()}") ping = False self.logger.info('Waiting for Redis...') while ping == False: try: ping = self.myRedis.get_rj().ping() except: pass time.sleep(0.5)
def leaveQueue(self, roomId, clientId): Queuer.leaveQueue(self, roomId, clientId) d = self._broadcastUpdateQueue(roomId)
def popQueue(self, roomId): first = Queuer.popQueue(self, roomId) d = self._broadcastUpdateQueue(roomId) return first
def enterQueue(self, roomId, clientId): Queuer.enterQueue(self, roomId, clientId) d = self._broadcastUpdateQueue(roomId)
def removeQueue(self, roomId): assert(roomId in self.queue.keys()) Queuer.removeQueue(self, roomId) d = self._broadcastRemoveQueue(roomId)
def addQueue(self, roomId): Queuer.addQueue(self, roomId) query = "select id, first_name, last_name from person where id = %s" d = self.db.db.runQuery(query, (roomId,)) d.addCallback(lambda res: res[0][1] + " " + res[0][2]) d.addCallback(self._broadcastNewQueue, roomId)
def __init__(self, clientList): Queuer.__init__(self) self.db = DBConnect() self.clients = clientList
class Crawler(object): def __init__(self, urls): self.db = mongodb.get_db() self.collection = self.db["cves"] self.domains = {} self.myQueuer = Queuer(urls) self.exploits = {} self.myRedis = Redis() self.myCheckers = {} self.myChecker = None self.logger = logger.myLogger("Crawler") self.logger.info("Initializing Crawler...") self.logger.info(f"Redis at {self.myRedis.get_rj()}") ping = False self.logger.info('Waiting for Redis...') while ping == False: try: ping = self.myRedis.get_rj().ping() except: pass time.sleep(0.5) def get_index_page(self, domain): try: resp = requests.get( url='http://' + domain, headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36' }) if resp.status_code == 200: return resp.headers, resp.text except Exception: extensions = ['html', 'htm', 'php', 'asp', 'aspx'] for ext in extensions: resp = requests.get( url='http://' + domain + f'/index.{ext}', headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36' }) if resp.status_code == 200: return resp.headers, resp.text return None, None def get_checker(self, domain): if domain not in self.myCheckers.keys(): self.myChecker = Checker(domain, self.collection) self.myCheckers[domain] = self.myChecker else: self.myChecker = self.myCheckers[domain] def crawl(self): while not self.myQueuer.empty(): url = self.myQueuer.pop() self.logger.info(f'Url to check: {url}') domain = urlparse(url).netloc if domain not in self.myQueuer.parsed_domains: self.myQueuer.current_domain = domain self.get_checker(domain) elif domain != self.myQueuer.current_domain: continue if not domain or domain == '': domain = 'Paths' if domain not in self.exploits: self.exploits[domain] = { "true_vulns": [], "almost_true": [], "probable_vulns": [], "possible_vulns": [], "malware": [] } if domain not in self.domains.keys(): self.domains[domain] = {"data": {}, "is_parsed": False} if not url.startswith('http'): myurl = "http://" + url else: myurl = url try: malw_or_not = check_file.get_prediction_from_single_pe(myurl) except Exception as e: self.logger.error(e) if malw_or_not is not None: if isinstance(malw_or_not, tuple): self.exploits[domain]["malware"].append(malw_or_not[1]) self.logger.info( f'Found a \033[91mMalware\033[0m file in {myurl}!') else: self.logger.info( f'Found a \033[32mLegit\033[0m file in {myurl}!') continue data_about_domain = {} to_url = False domain = self.myQueuer.current_domain try: resp = requests.get( myurl, headers={ 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36' }) response = extract(resp.content) if '/tag/' in myurl or '/feed' in myurl: self.myQueuer.parsed_url.append(myurl) continue self.myQueuer.push(response) if not self.domains.get(domain)["is_parsed"]: if domain not in self.domains or not self.domains.get( domain)["data"]: headers, data = self.get_index_page(domain) if headers is None: headers = resp.headers data = resp.text data_about_domain = extract_infos(headers, data) self.domains[domain]["data"] = data_about_domain else: data_about_domain = self.domains.get(domain)["data"] if data_about_domain['cms'] == 'Default': to_url = True if not to_url: self.myChecker.set_data(data_about_domain) full = self.myRedis.get_redis_full(data_about_domain) if full is not None and full: self.myChecker.update_vulns_from_redis(full) else: just_cms = self.myRedis.get_redis_just_cms( data_about_domain) if just_cms is not None and just_cms: self.myChecker.update_vulns_just_cms(just_cms) else: self.myChecker.check_details() self.extract_vulns(data_about_domain=data_about_domain) except Exception as e: self.logger.error(e) traceback.print_tb(e.__traceback__) if to_url: self.extract_vulns(url=url, data_about_domain=None) self.myQueuer.parsed_url.append(url) self.exploits[domain]["possible_vulns"] = [ item for item in self.exploits[domain]["possible_vulns"] if item not in ['true_vulns', 'almost_true', "probable_vulns", "possible_vulns"] ] for domain in self.exploits.keys(): self.logger.info(domain) self.logger.info("True Vulns") self.logger.info(list(set(self.exploits[domain]["true_vulns"]))) self.logger.info("Almost true Vulns") self.logger.info(list(set(self.exploits[domain]["almost_true"]))) self.logger.info("Probable Vulns") self.logger.info(list(set( self.exploits[domain]["probable_vulns"]))) self.logger.info("Possible Vulns") self.logger.info( str(list(set(self.exploits[domain]["possible_vulns"])))) self.logger.info("Malware found") self.logger.info(list(set(self.exploits[domain]["malware"]))) self.logger.info(self.domains) def extract_vulns(self, data_about_domain=None, url=None): if url is not None: self.myChecker.check_path(urlparse(url).path) vulns = self.myChecker.get_all_vulns() self.exploits[domain]["true_vulns"].extend(vulns["true_vulns"]) self.exploits[domain]["almost_true"].extend(vulns["almost_true"]) self.exploits[domain]["probable_vulns"].extend(vulns["probable_vulns"]) self.exploits[domain]["possible_vulns"].extend(vulns["possible_vulns"]) self.domains[domain]["is_parsed"] = True self.update_redis(data_about_domain) def update_redis(self, data_about_domain): exploits_full = self.myChecker.get_vulns_by_cms_and_plug() self.myRedis.update_redis_full(data_about_domain, exploits_full) exploits_just_cms = self.myChecker.get_vulns_by_cms() self.myRedis.update_redis_just_cms(data_about_domain, exploits_just_cms)