def __init__(self, maximum_size, window_percentage=1): super().__init__(maximum_size) self.data = {} self.cms = CMS(maximum_size) self.sentinel_window = Node() # LRU self.sentinel_probation = Node() # SLRU self.sentinel_protected = Node() # SLRU self.max_window_size = (self.maximum_size * window_percentage) // 100 max_main = self.maximum_size - self.max_window_size self.max_protected = max_main * 4 // 5 self.size_window = 0 self.size_protected = 0
def __init__(self): self.cms = CMS(5000) self.hinter_sum = 0 self.hinter_count = 0 self.freqs = Counter() # Alternatively use SpaceSaving
def entry_point(): ''' Main logic ''' # =================== Stg. 1 Getting parametrs ============================= main_obj = pg.url_parse(pg.url) pg.domain = main_obj["domain"] pg.scheme = main_obj["scheme"] # getting a file prefix file_prefix = "_".join([pg.domain, time.strftime("%Y-%m-%d_%H-%M")]) file_out = file_prefix + ".json" # =================== Stg. 2 Getting a page source ========================= pg.get_webpage() if len(pg.data) == 0: print('[!] Web page is empty\n') return False # saving page source in debug mode if len(sys.argv) == 3: if sys.argv[2] == "debug": write_file(pg.data, file_prefix + ".html") # ========================================================================== # parsing a page source soup = BeautifulSoup(pg.data, 'html.parser') # a final report report = dict() report["basic_url"] = pg.url report["internal"] = dict() report["subdomain"] = dict() report["external"] = dict() # getting links for link in soup.find_all('a'): if link.get('href') is not None: href_dic = pg.href_filter(link.get('href')) if href_dic != False: href_text = link.get_text().strip() if href_dic["type"] == "internal": report["internal"][href_dic["href"]] = href_text elif href_dic["type"] == "subdomain": report["subdomain"][href_dic["href"]] = href_text elif href_dic["type"] == "external": report["external"][href_dic["href"]] = href_text else: print("Undefined type of href - %s" % href_text) # CMS detect cms_obj = CMS(pg.data, pg.domain, pg.scheme) cms_obj.cms_detect() if cms_obj.cms != "undefined": cms_obj.get_version() report["cms"] = cms_obj.cms report["cms_version"] = cms_obj.version # getting of security headers report["sec_headers"] = dict() report["sec_headers"]["X-XSS-Protection"] = pg.headers.get( "X-XSS-Protection") report["sec_headers"]["X-Frame-Options"] = pg.headers.get( "X-Frame-Options") report["sec_headers"]["X-Content-Type-Options"] = pg.headers.get( "X-Content-Type-Options") report["sec_headers"]["Content-Security-Policy"] = pg.headers.get( "Content-Security-Policy") report["sec_headers"]["Strict-Transport-Security"] = pg.headers.get( "Strict-Transport-Security") report["sec_headers"]["Public-Key-Pins"] = pg.headers.get( "Public-Key-Pins") # getting of cache headers report["cache_headers"] = dict() report["cache_headers"]["Pragma"] = pg.headers.get("Pragma") report["cache_headers"]["Cache-Control"] = pg.headers.get("Cache-Control") report["cache_headers"]["ETag"] = pg.headers.get("ETag") report["cache_headers"]["Vary"] = pg.headers.get("Vary") report["cache_headers"]["Age"] = pg.headers.get("Age") report["cache_headers"]["Expires"] = pg.headers.get("Expires") report["cache_headers"]["Warning"] = pg.headers.get("Warning") # seaching a substring "cache" in an ignore case for head in list(pg.headers): if "cache" in head.lower(): report["cache_headers"][head] = pg.headers.get(head) # getting of IP and whois dmn_obj = Domain(pg.domain) dmn_obj.get_ip() if dmn_obj.ip != "undefined": dmn_obj.get_netname() report["domain_ip"] = dmn_obj.ip report["domain_netname"] = dmn_obj.whois # write a report to a file with open(file_out, 'w') as json_file: json.dump(report, json_file, ensure_ascii=False, sort_keys=True, indent=0) # write a short report to console console_report = dict() console_report["filename"] = file_out console_report["internal"] = len(report["internal"]) console_report["subdomain"] = len(report["subdomain"]) console_report["external"] = len(report["external"]) console_report["cms"] = report["cms"] console_report["cms_version"] = report["cms_version"] con_report(console_report) return True