def __init__(self, project_id, connection, thread_semaphore, database_semaphore, url): """ Paramters: ========== :param thread_semaphore: This semaphore is used to control the running threads :param database_semaphore: This semaphore is used to add control the threads which adds information to the database :param url: The url for which the information is to be gathered :param connection: MySQL database connection object :return: None """ self.__project_id = project_id self.__connection = connection self.__thread_semaphore = thread_semaphore self.__database_semaphore = database_semaphore self.__url = url # get the ip address of the url with ThreadPoolExecutor(max_workers=1) as executor: ip = executor.submit(URL().get_ip, self.__url) self.__ip = ip.result() # we will get the headers of the request if URL().get_head_request( url=self.__url, user_agent=UserAgent.get_user_agent()) is not None: self.__headers = URL().get_head_request( url=self.__url, user_agent=UserAgent.get_user_agent()).headers else: self.__headers = ""
def __get_programming_language(self): """ We will use to get the programming language from headers of the request :return: None """ self.__thread_semaphore.acquire() try: self.__programming_language_used = self.__headers['X-Powered-By'] except KeyError: self.__programming_language_used = None except Exception as e: print(e) self.__programming_language_used = None # If we didn't get the programming language we will try to get # it from the cookies if self.__programming_language_used is None: r = URL().get_request(url=self.__url, user_agent=UserAgent.get_user_agent()) cookies = r.cookies if r is not None else "" session_id = requests.utils.dict_from_cookiejar(cookies) # session_id contains the session id of the targetted url if "PHPSESSID" in session_id: self.__programming_language_used = "PHP" elif "JSESSIONID" in session_id: self.__programming_language_used = "J2EE" elif "ASP.NET_SessionId" in session_id: self.__programming_language_used = "ASP.NET" elif "CFID & CFTOKEN" in session_id: self.__programming_language_used = "COLDFUSION" else: self.__programming_language_used = "None" self.__thread_semaphore.release()
def __check_numerical_vulnerability(self): """ Description: ----------- This method is used to check the numerical SQL vulnerability in the give url. See: ----- Numerical Vulnerability in references.txt :return: None """ self.__thread_semaphore.acquire() payloaded_urls = Query.add_one(self.__url) for payloaded_url in payloaded_urls: r = URL().get_request( url=payloaded_url, user_agent=UserAgent.get_user_agent()) if r is not None: new_soup_object = BeautifulSoup(r.content, "html.parser") if self.__soup_object == new_soup_object: print("[+] NUMERICAL VULNERABILITY FOUND IN THE DATABASE") print("[+] PAYLOAD: ", payloaded_url) SivaDB.update_analysis( connection=self.__connection, database_semaphore=self.__database_semaphore, project_id=self.__project_id, method="GET", source=self.__url, payload=payloaded_url, description="NUMERICAL VULNERABILITY") self.__thread_semaphore.release()
def __check_escape_sequence_vulnerability(self): """ Description: ------------ We will append a single quote (') to check if the sql vulnerability is happended or not :return: """ # We will append ' to all the individual parameters and store it to payloaded urls self.__thread_semaphore.acquire() payloaded_urls = Query().append_payload_to_all_queries( url=self.__url, payload="'") for payloaded_url in payloaded_urls: print(payloaded_url) r = URL().get_request( url=payloaded_url, user_agent=UserAgent.get_user_agent()) if r is not None: new_soup_object = BeautifulSoup(r.content, "html.parser") # Now compare bot soup objects SQLErrorIdentifier( project_id=self.__project_id, thread_semaphore=self.__thread_semaphore, database_semaphore=self.__database_semaphore, original_soup_object=self.__soup_object, payloaded_soup_object=new_soup_object, original_url=self.__url, payloaded_url=payloaded_url, connection=self.__connection, poc_object=self.__poc_object) self.__thread_semaphore.release()
def run(self): self.__requests_object = URL().get_request( url=self.__url, user_agent=UserAgent.get_user_agent()) self.__soup_object = BeautifulSoup(self.__requests_object.content, "html.parser") # By now we have got the requests object and soup object #================== SQL Injection Test ==================== sqli_thread = Thread(target=self.check_sql_injection) sqli_thread.start() # ================= HTML VULNERABILITIES ============ self.check_html_vulnerabilities()
def __set_response_time_of_fastest_website(self): """ Description: ------------ This method will calculate the response time of the fastest website :return: """ start_time = time.time() r = URL().get_request( url=self.__fastest_website, user_agent=UserAgent.get_user_agent()) end_time = time.time() if r is not None: self.__response_time_of_fastest_website = end_time - start_time
def __check_programming_language(self, url): """ Description: ============ This method will try its level best to get the name of the programming language used to build the website. Notes: ====== This method will heavily used URL class from url package :return: """ self.__thread_semaphore.acquire() print("[+] ANALYSING PROGRAMMING LANGUAGE") # These are the popular programming languages used for designing websites language_names = { ".php": "PHP", ".jsp": "JSP", ".asp": "ASP", ".aspx": "ASPX", ".py": "PYTHON", ".pl": "PERL" } user_agent = UserAgent.get_user_agent() r = URL().get_request(url=url, user_agent=user_agent) if r is not None: soup = BeautifulSoup(r.content, "html.parser") for i in soup.find_all("a"): try: partial_url = i.get("href") if "http" not in partial_url: new_url = URL.join_urls(url, partial_url) else: new_url = partial_url if URL.is_same_domain( url, new_url) else "" file_name = URL.get_file_name(new_url) for i in language_names: if i in file_name: self.__programming_language_used = language_names[ i] # Now we will update the programming language used into the database InfoGatheringPhaseOneDatabase.update_programming_language( self.__database_semaphore, self.__connection, self.__project_id, self.__programming_language_used) break if i in file_name: break except Exception: pass self.__thread_semaphore.release()
def crawl(self, url): """ Description: ------------ This will crawl the urls completely :param url: The url to be crawled :return: None """ start_time = time.time() r = URL().get_request(url=url, user_agent=UserAgent.get_user_agent()) end_time = time.time() total_time = end_time - start_time self.__bob_object.predict(total_time) if r is not None: soup = BeautifulSoup(r.content, "html.parser") # At this stage we have got the beautiful soup objects #First find all the href links for i in soup.find_all("a"): try: partial_url = i.get("href") url_to_be_scanned = None # we will scan this urls # Check if the partial url is actually a partial url if "http" in partial_url: if URL.is_same_domain(self.__base_url, partial_url): if partial_url not in self.__crawled_urls: self.__urls.put(partial_url) self.__crawled_urls.append(partial_url) url_to_be_scanned = partial_url else: full_url = URL.join_urls(self.__base_url, partial_url) if full_url not in self.__crawled_urls: self.__urls.put(full_url) self.__crawled_urls.append(full_url) url_to_be_scanned = full_url # run a simple scan in the url if url_to_be_scanned is not None: print("[i] CURRENTLY SCANNING [GET]: ", url_to_be_scanned) # Make the scanning as a new process SimpleScan( project_id=self.__project_id, thread_semaphore=self.__thread_semaphore, database_semaphore=self.__database_semaphore, url=url_to_be_scanned, connection=self.__connection, poc_object=self.__poc_object) except Exception as e: print("[-] EXCEPTION OCCURED ", e) while not self.__urls.empty(): self.crawl(self.__urls.get())
def add_if_page_found(self, url): """ Description: ------------ This will add the information to the database if admin page is found :param url: The url to be added to the database :return: None """ r = URL().get_head_request(url=url, user_agent=UserAgent.get_user_agent()) try: if r.status_code == 200: if url not in self.__admin_pages: self.__admin_pages.append(url) except AttributeError: pass self.__thread_semaphore.release()
def __init__(self, project_id, url, thread_semaphore, database_semaphore, soup_object, connection, poc_object): self.__project_id = project_id self.__url = url self.__thread_semaphore = thread_semaphore self.__database_semaphore = database_semaphore self.__connection = connection self.__poc_object = poc_object # NOTE: self.__soup_object is the original unaltered BeautifulSoup object if soup_object is not None: self.__soup_object = soup_object else: r = URL().get_request( url=self.__url, user_agent=UserAgent.get_user_agent()) self.__soup_object = BeautifulSoup(r.content, "html.parser") if URL.is_query_present(self.__url): self.__check_escape_sequence_vulnerability() self.__check_numerical_vulnerability()
def __get_robots(self): """ Description: ------------ This method is used to get the robots.txt file from the remote server :return: """ self.__thread_semaphore.acquire() robots_url = URL.join_urls(self.__url, "/robots.txt") print("[+] GETTING ROBOTS.TXT AT ", robots_url) r = URL().get_head_request(url=self.__url, user_agent=UserAgent.get_user_agent()) if r is not None: if r.status_code == 200: robots_file_location = "projects/project-" + str( self.__project_id) + "/robots.txt" File.download_file(local_file_location=robots_file_location, remote_file_location=robots_url) else: print("[-] NO robots.txt FOUND IN THE SERVER") self.__thread_semaphore.release()