def does_everything_exist(self) -> bool: ''' check dirs and files exists, if not create them. ''' try: util = skim_utils.SkimUitls() print(str(self.basepath)) print(self.path_to_urls) print(str(self.logfile)) base = util.check_dir_exists(self.basepath) print(str(base)) urls = util.check_file_exists(self.path_to_urls) print(str(urls)) log = util.check_file_exists(self.logfile) print(str(log)) if not base or not urls or not log: raise IOError else: return True except IOError as i: print( "IO Error! - controller.does_everything_exist.Checking_files_creating_if_not_exists: " + str(i)) except Exception as e: print("Error! - controller.does_everything_exist: " + str(e))
def is_internet_available(self) -> bool: ''' Check that internet connectivity is available. ''' try: utils = skim_utils.SkimUitls() if utils.test_internet(): return True else: return False except Exception as e: print("Error! in SkimController.is_internet_available: " + str(e))
def checker(self, line: str) -> bool: ''' Checks each element of the dynamic_content "List" against the "line" parameter and return bool ''' try: import skim_utils lint = skim_utils.SkimUitls().lint line = str(line) cont = self.dynamic_content for el in cont: if el in line: return True return False except Exception as e: print("Error! in Hasher.checker: " + str(e))
def clean_and_print_banner(self) -> bool: ''' clean up and print the banner. ''' try: import toolbag import skim_conf utils = skim_utils.SkimUitls() conf = skim_conf.Skim_conf() tb = toolbag.Toolbag() utils.remove_orphan_files(self.basepath) tb.clear_screen() conf.show_banner() return True except Exception as e: print("Error! in SkimController.clean_and_print_banner: " + str(e))
def main(): ''' Execution flow is controlled from here. ''' controller = SkimController() lint = skim_utils.SkimUitls().lint if not controller.is_internet_available: lint("\nError! - Check internet connectivity\n") exit(1) #check dirs and files exists, if not create them. if not controller.does_everything_exist(): lint("\nIO Error! - File does not exist.") exit(1) else: import skim_reader_io reader = skim_reader_io.SkimReader().fetch_domain_list controller.clean_and_print_banner() list_of_domains = reader(controller.path_to_urls) controller.parallelize(list_of_domains)
def print_counts(self): ''' Print the number of sites in each category ''' try: import skim_controller import skim_utils import skim_reader_io lint = skim_utils.SkimUitls().lint lint("***************************************") lint("Total Sites: " + str(self.get_number_of_domains())) lint("Sites up: " + str(self.count_sites("up.txt"))) lint("Sites not responding: " + str(self.count_sites("not_responding.txt"))) lint("Sharepoint Sites: " + str(self.count_sites("sharepoint.txt"))) lint("Wordpress Sites: " + str(self.count_sites("wordpress.txt"))) lint("Drupal Sites: " + str(self.count_sites("drupal.txt"))) lint("Joomla Sites: " + str(self.count_sites("joomla.txt"))) lint("***************************************") except Exception as e: print("Error! in Cleaner.print_counts(): " + str(e))
def fetch_domain_list(self, dir: str) -> List: ''' Get the list of domains to scan, from file. Filter through the whitelist_sieve. Shuffle and return clean list ''' try: import toolbag import skim_utils import time url_list = [] shuf = toolbag.Toolbag().shuffler dir = str(dir) apd = url_list.append lint = skim_utils.SkimUitls().lint with open(dir, "r") as url_file: [ apd("http://" + str(url)) for url in url_file.read().splitlines() ] url_file.close() col = toolbag.Toolbag().color lint( col( "Number of domains before whitelisting: " + str(len(url_list)), "yellow")) time.sleep(.7) clean_list = self.whitelist_sieve(url_list) clean_list = shuf(clean_list) lint( col( "Number of domains after whitelisting: " + str(len(clean_list)), "yellow") + "\n\n") time.sleep(.5) return clean_list except Exception as e: print("Error! in SkimController.fetch_domain_list: " + str(e))
def send_request(self, url): '''g Take url, send HTTP request then process through logic according to response. ''' try: import toolbag import skim_cms_filter import skim_controller headers = toolbag.Toolbag().get_headers("ie") writer = skim_writer_io.Skim_writer_io().writer http_timeout = skim_controller.SkimController().http_timeout utils = skim_utils.SkimUitls() cms = skim_cms_filter.SkimCmsFilter() url = str(url) res = requests.get(url, headers=headers, timeout=http_timeout) code = res.status_code if code == 200: writer(url, "up") utils.manage_content(url, res.text) #short circuit, if one CMS then not another, in order of probability if not cms.is_it_sharepoint(url, res.headers): if not cms.is_it_wordpress(url, res.headers): if not cms.is_it_drupal(url, res.headers, res.text): cms.is_it_joomla(url, res.headers, res.text) else: pass else: pass else: #Uncomment to show performance stats #utils.lint(str(url) + " Perf Manage Content: " + str(utils.perf_manage_content)) #utils.lint(str(url) + ": Perf Drupal: " + str(cms.perf_is_it_drupal)) #utils.lint(str(url) + ": Perf Wordpress: " + str(cms.perf_is_it_wordpress)) #utils.lint(str(url) + ": Perf_sharepoint: " + str(cms.perf_is_it_sharepoint)) #utils.lint(str(url) + ": Perf_Joomla: " + str(cms.perf_is_it_joomla)) return True else: res.raise_for_status() except requests.ConnectTimeout as ct: writer = skim_writer_io.Skim_writer_io().writer lint = skim_utils.SkimUitls().lint if "SSL" in str(ct): lint(url + " - SSL Error!!! " + str(ct) + "\n") writer(url, "ssl") return True else: lint(url + " - Connect Timeout!!! " + str(ct) + "\n") writer(url, "not_responding") return True except requests.ConnectionError as c: writer = skim_writer_io.Skim_writer_io().writer lint = skim_utils.SkimUitls().lint if "SSSLError" in str(c): lint(url + " - SSL Error !!! \n") writer(url, "ssl") return True elif "503" in str(c): lint(url + " - 503 Transaction error!! \n") writer(url, "up") return True elif "Max retries exceeded with url" in str(c): lint(url + " - site not responding\n") writer(url, "not_responding") return True else: lint(url + " - Connection Error !!! " + "\n") writer(url, "not_responding") return True except requests.ReadTimeout as rt: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!! Read Timeout!!! " + str(rt) + "\n") writer(url, "investigate") return True except requests.URLRequired as ur: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!! URLRequired Error!!! " + str(ur) + "\n") writer(url, "investigate") return True except requests.TooManyRedirects as tmr: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!! TooManyRedirects Error!!! " + str(tmr) + "\n") writer(url, "investigate") return True except requests.RequestsDependencyWarning as d: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!! RequestsDependencyWarning Error!!! " + str(d) + "\n") writer(url, "investigate") return True except requests.HTTPError as h: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer if "Authorization Required" in str(h): writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!! 401 Auth required !!! " + str(h) + "\n") writer(url, "up") return True elif "Client Error: Forbidden for url" in str(h): lint(url + " !!! 403 Forbidden !!! " + str(h) + "\n") writer(url, "up") return True elif "404 Client Error: Not Found for url " in str(h): lint(url + " !!! 404 Not Found !!! " + str(h) + "\n") writer(url, "not_responding") return True elif "500 Server Error: INTERNAL SERVER ERROR" in str(h): lint(url + " !!! 500 INTERNAL SERVER ERROR !!! " + str(h) + "\n") writer(url, "up") return True elif "503 Server Error: Your transaction has failed." in str(h): lint(url + " !!! 503 Transaction Failed !!! " + str(h) + "\n") writer(url, "up") return True else: lint(url + " !!! HTTPError !!! " + str(h) + "\n") writer(url, "http_errors") return True except requests.Timeout as t: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!!Timeout!!! " + str(t) + "\n") writer(url, "investigate") return True except requests.RequestException as r: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer if "URL has an invalid label" in str(r): lint(url + " - URL has an invalid label\n") writer(url, "up") return True else: lint(url + " !!! request exception!!!! " + str(r) + "\n") writer(url, "investigate") return True except Exception as e: lint = skim_utils.SkimUitls().lint writer = skim_writer_io.Skim_writer_io().writer lint(url + " !!! Investigate !!!" + str(e) + "\n") writer(url, "investigate") return True