def test_standard_numeric_sorting(self): """ Tests the standard numeric sorting. """ given = [ "1.example.com", "2.example.com", "3.example.com", "11.example.com", "12.example.com", "10.example.com", ] given_url = [f"http://{x}" for x in given] expected = [ "1.example.com", "2.example.com", "3.example.com", "10.example.com", "11.example.com", "12.example.com", ] expected_url = [f"http://{x}" for x in expected] actual = List(given).custom_format(Sort.standard) self.assertEqual(expected, actual) actual_url = List(given_url).custom_format(Sort.standard) self.assertEqual(expected_url, actual_url)
def _json_print(self): # pragma: no cover """ Management of the json template. """ if self.output: # The given output is not empty. if PyFunceble.path.isfile(self.output): # The given output already exist. # We get the content of the output. content = Dict().from_json(self.file_output_instance.read()) if isinstance(content, list): # The content is a list. # We extend the content with our data to print. content.extend(self.data_to_print) # We format our list. content = List(content).custom_format(Sort.standard) if PyFunceble.CONFIGURATION["hierarchical_sorting"]: # The hierarchical sorting is activated. # We format our content hierarchicaly content = List(content).custom_format( Sort.hierarchical) # We finally save our content into the file. Dict(content).to_json(self.output) else: # The content is not a list. # We raise an exception. raise Exception("Output not correctly formatted.") else: # The given output does not already exist. # We save our data to print into the output. # # Note: We do not have to take care if self.data_to_print is a list # formatted or not because this method should not be called if it is # not the case. Dict(self.data_to_print).to_json(self.output) else: # The given output is empty. # We raise an exception. raise Exception("Empty output given.")
def test_merge(self): """ Test List().merge(). """ to_merge = ["hello", "world", 5, {"world": "hello"}] expected = ["hello", "world", 5, {"hello": "world", "world": "hello"}] actual = List(self.main_list).merge(to_merge) self.assertEqual(expected, actual) to_merge = ["hello", "world", 5, {"world": "hello"}] expected = [ "hello", "world", 5, { "hello": "world" }, [1, 2, 3], { "world": "hello" }, ] actual = List(self.main_list).merge(to_merge, False) self.assertEqual(expected, actual) to_merge = ["hello", "world", 5, {"hello": "you!"}, [1, 2, 4, 5]] expected = ["hello", "world", 5, {"hello": "you!"}, [1, 2, 4, 5]] actual = List(self.main_list).merge(to_merge) self.assertEqual(expected, actual) to_merge = ["hello", "world", 5, {"hello": "you!"}, [1, 2, 4, 5]] expected = [ "hello", "world", 5, { "hello": "world" }, [1, 2, 3], { "hello": "you!" }, [1, 2, 4, 5], ] actual = List(self.main_list).merge(to_merge, False) self.assertEqual(expected, actual)
def __sort_generated_files(cls): """ Sort the content of all files we generated. """ for root, _, files in PyFunceble.walk( PyFunceble.OUTPUT_DIRECTORY + PyFunceble.OUTPUTS["parent_directory"] ): # We loop through the list of directories of the output directory. for file in files: # We loop through the list of file of the # currently read directory. if file.endswith(".json"): # The currently read filename ends # with .json. # We continue the loop. continue if file in [".keep", ".gitignore"]: # The currently read filename is # into a list of filename that are not relevant # for us. # We continue the loop. continue # We create an instance of our File(). file_instance = File( "{0}{1}{2}".format(root, PyFunceble.directory_separator, file) ) # We get the content of the current file. file_content = file_instance.read().splitlines() if not PyFunceble.CONFIGURATION["hierarchical_sorting"]: # We do not have to sort hierarchicaly. # We sort the lines of the file standarly. formatted = List(file_content[3:]).custom_format(Sort.standard) else: # We do have to sort hierarchicaly. # We sort the lines of the file hierarchicaly. formatted = List(file_content[3:]).custom_format(Sort.hierarchical) # We finally put the formatted data in place. file_instance.write( "\n".join(file_content[:3] + formatted), overwrite=True )
def file(self): """ Manage the case that need to test each domain of a given file path. Note: 1 domain per line. """ list_to_test = self._extract_domain_from_file() AutoContinue().restore() if PyFunceble.CONFIGURATION["adblock"]: list_to_test = self.adblock_decode(list_to_test) else: list_to_test = list(map(self._format_domain, list_to_test)) PyFunceble.Clean(list_to_test) if PyFunceble.CONFIGURATION["inactive_database"]: Database().to_test() if PyFunceble.CONFIGURATION[ "file_to_test"] in PyFunceble.CONFIGURATION[ "inactive_db"] and "to_test" in PyFunceble.CONFIGURATION[ "inactive_db"][PyFunceble.CONFIGURATION[ "file_to_test"]] and PyFunceble.CONFIGURATION[ "inactive_db"][PyFunceble.CONFIGURATION[ "file_to_test"]]["to_test"]: list_to_test.extend(PyFunceble.CONFIGURATION["inactive_db"][ PyFunceble.CONFIGURATION["file_to_test"]]["to_test"]) regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$" # pylint: disable=line-too-long list_to_test = List( Regex(list_to_test, regex_delete).not_matching_list()).format() if PyFunceble.CONFIGURATION["filter"]: list_to_test = List( Regex(list_to_test, PyFunceble.CONFIGURATION["filter"], escape=True).matching_list()).format() list( map( self.domain, list_to_test[PyFunceble. CONFIGURATION["counter"]["number"]["tested"]:], repeat(list_to_test[-1]), ))
def list_of_mined(cls): """ Provide the list of mined so they can be added to the list queue. :return: The list of mined domains or URL. :rtype: list """ # We initiate a variable which will return the result. result = [] if PyFunceble.CONFIGURATION["mining"]: # The mining is activated. if PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN["mined"]: # The file we are testing is into our mining database. for element in PyFunceble.INTERN["mined"][ PyFunceble.INTERN["file_to_test"]]: # We loop through the list of index of the file we are testing. # We append the element of the currently read index to our result. result.extend(PyFunceble.INTERN["mined"][ PyFunceble.INTERN["file_to_test"]][element]) # We format our result. result = List(result).format() # We return the result. return result
def file_url(self): """ Manage the case that we have to test a file .. note:: 1 URL per line. """ # We get, format, clean the list of URL to test. list_to_test = self._file_list_to_test_filtering() # We initiate a local variable which will save the current state of the list. not_filtered = list_to_test try: # We remove the element which are in the database from the # current list to test. list_to_test = List( list( set( list_to_test[PyFunceble.INTERN["counter"]["number"]["tested"] :] ) - set(PyFunceble.INTERN["flatten_inactive_db"]) ) ).format() _ = list_to_test[-1] except IndexError: # Our list to test is the one with the element from the database. list_to_test = not_filtered[ PyFunceble.INTERN["counter"]["number"]["tested"] : ] # We delete the undesired variable. del not_filtered if PyFunceble.CONFIGURATION["hierarchical_sorting"]: # The hierarchical sorting is desired by the user. # We format the list. list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical) try: # We test each URL from the list to test. return [self.url(x, list_to_test[-1]) for x in list_to_test if x] except IndexError: # We print a message on screen. print(PyFunceble.Fore.CYAN + PyFunceble.Style.BRIGHT + "Nothing to test.")
def _get_list_to_of_subjects_to_test_from_file( self, file_object): # pragma: no cover """ Give a file object, we construct/get the list of subject to test. """ to_retest_inactive_db = self.inactive_db.get_to_retest() if PyFunceble.CONFIGURATION["multiprocess"]: with Pool(PyFunceble.CONFIGURATION["maximal_processes"]) as pool: if not PyFunceble.CONFIGURATION["adblock"]: formatted_subjects = set( pool.map(self._format_line, file_object)) else: formatted_subjects = { x for x in AdBlock(file_object).decode() } else: if not PyFunceble.CONFIGURATION["adblock"]: formatted_subjects = { self._format_line(x) for x in file_object } else: formatted_subjects = {x for x in AdBlock(file_object).decode()} subjects_to_test = (formatted_subjects - self.autocontinue.get_already_tested() - self.inactive_db.get_already_tested() - to_retest_inactive_db) if not subjects_to_test: subjects_to_test = list(formatted_subjects) else: subjects_to_test = list(subjects_to_test) if not PyFunceble.CONFIGURATION["multiprocess"]: if not PyFunceble.CONFIGURATION["hierarchical_sorting"]: subjects_to_test = List(subjects_to_test).custom_format( Sort.standard) else: subjects_to_test = List(subjects_to_test).custom_format( Sort.hierarchical) return chain(subjects_to_test, to_retest_inactive_db)
def test_hierarchical_sorting(self): """ Tests hierarchical sorting. """ expected = [ "google.com", "adservice.google.com", "hello_world.google.com", "s0-2mdn-net.l.google.com", "ssl-google-analytics.l.google.com", "www-google-analytics.l.google.com", "googleadservices.com", "pagead2.googleadservices.com", "partner.googleadservices.com", "www.googleadservices.com", "google-analytics.com", "ssl.google-analytics.com", "www.google-analytics.com", "chart.googleapis.com", "ad-creatives-public.commondatastorage.googleapis.com", "imasdk.googleapis.com", "ade.googlesyndication.com", "pagead2.googlesyndication.com", "tpc.googlesyndication.com", "www.googletagmanager.com", "www.googletagservices.com", "redirector.googlevideo.com", "0.gravatar.com", "1.gravatar.com", "hello", ] actual = List(self.data_list).custom_format(Sort.hierarchical) self.assertEqual(expected, actual) expected_url = [f"https://{x}" for x in expected] actual_url = List(self.data_url_list).custom_format(Sort.hierarchical) self.assertEqual(expected_url, actual_url)
def file(self): """ Manage the case that need to test each domain of a given file path. .. note:: 1 domain per line. """ # We get, format, filter, clean the list to test. list_to_test = self._file_list_to_test_filtering() if PyFunceble.CONFIGURATION["idna_conversion"]: # We have to convert domains to idna. # We convert if we need to convert. list_to_test = domain2idna(list_to_test) if PyFunceble.CONFIGURATION["hierarchical_sorting"]: # The hierarchical sorting is desired by the user. # We format the list. list_to_test = List(list_to_test).custom_format(Sort.hierarchical) else: # The hierarchical sorting is not desired by the user. # We format the list. list_to_test = List(list_to_test).custom_format(Sort.standard) try: # We test each element of the list to test. list( map( self.domain, list_to_test[ PyFunceble.CONFIGURATION["counter"]["number"]["tested"] : ], PyFunceble.repeat(list_to_test[-1]), ) ) except IndexError: # We print a message on screen. print(PyFunceble.Fore.CYAN + PyFunceble.Style.BRIGHT + "Nothing to test.")
def _add_to_test(self, to_add): """ Add an element or a list of element into :code:`PyFunceble.INTERN['inactive_db'][PyFunceble.INTERN["file_to_test"]]['to_test']`. :param to_add: The domain, IP or URL to add. :type to_add: str|list """ if PyFunceble.CONFIGURATION["inactive_database"]: # The database subsystem is activated. if not isinstance(to_add, list): # The element to add is not a list. # We set it into a list. to_add = [to_add] if PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN[ "inactive_db"]: # The file we are testing is into the database. if ("to_test" in PyFunceble.INTERN["inactive_db"][ PyFunceble.INTERN["file_to_test"]]): # The `to_test` index is into the database related to the file # we are testing. # We extend the `to_test` element with the list we have to restest. PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN[ "file_to_test"]]["to_test"].extend(to_add) else: # The `to_test` index is not into the database related to the file # we are testing. # We initiate the `to_test` element with the list we have to retest. PyFunceble.INTERN["inactive_db"][ PyFunceble.INTERN["file_to_test"]]["to_test"] = to_add else: # The file we are testing is not into the database. # We add the file and its to_test information into the database. PyFunceble.INTERN["inactive_db"].update( {PyFunceble.INTERN["file_to_test"]: { "to_test": to_add }}) # We format the list to test in order to avoid duplicate. PyFunceble.INTERN["inactive_db"][ PyFunceble.INTERN["file_to_test"]]["to_test"] = List( PyFunceble.INTERN["inactive_db"] [PyFunceble.INTERN["file_to_test"]]["to_test"]).format() # And we finally backup the database. self._backup()
def _add(self, to_add): """ Add the currently mined information to the mined "database". :param to_add: The element to add. :type to_add: dict """ if PyFunceble.CONFIGURATION["mining"]: # The mining is activated. if PyFunceble.INTERN["file_to_test"] not in PyFunceble.INTERN[ "mined"]: # Our tested file path is not into our mined database. # We initiate it. PyFunceble.INTERN["mined"][ PyFunceble.INTERN["file_to_test"]] = {} for element in to_add: # We loop through the element to add. if (element in PyFunceble.INTERN["mined"][ PyFunceble.INTERN["file_to_test"]]): # The element is already into the tested file path database. # We extent it with our element to add. PyFunceble.INTERN["mined"][ PyFunceble.INTERN["file_to_test"]][element].extend( to_add[element]) else: # The element is already into the tested file path database. # We initiate it. PyFunceble.INTERN["mined"][PyFunceble.INTERN[ "file_to_test"]][element] = to_add[element] # We format the added information in order to avoid duplicate. PyFunceble.INTERN["mined"][ PyFunceble.INTERN["file_to_test"]][element] = List( PyFunceble.INTERN["mined"] [PyFunceble.INTERN["file_to_test"]][element]).format() # We backup everything. self._backup()
def _extensions(self, line): """ Extract the extension from the given line. :param line: The line from the official public suffix repository. :type line: str """ # We strip the parsed line. line = line.strip() if not line.startswith("//") and "." in line: # * The parsed line is not a commented line. # and # * There is a point in the parsed line. line = line.encode("idna").decode("utf-8") if line.startswith("*."): # The parsed line start with `*.`. # We remove the first two characters. line = line[2:] # We we split the points and we get the last element. # Explanation: The idea behind this action is to # always get the extension. extension = line.split(".")[-1] if extension in self.public_suffix_db: # The extension is alrady in our database. # We update the content of the 1st level TDL with # the content of the suffix. # In between, we format so that we ensure that there is no # duplicate in the database index content. self.public_suffix_db[extension] = List( self.public_suffix_db[extension] + [line]).format() else: # The extension is not already in our database. # We append the currently formatted extension and the line content. self.public_suffix_db.update({extension: [line]})
def __generate_complements(self): # pragma: no cover """ Generate the complements from the given list of tested. """ # We get the list of domains we are going to work with. result = [ z for x, y in self.get_already_tested() for z in y if not PyFunceble.Check(z).is_subdomain() and PyFunceble.Check(z).is_domain() ] # We generate the one without "www." if "www." is given. result.extend([x[4:] for x in result if x.startswith("www.")]) # We generate the one with "www." if "www." is not given. result.extend( ["www.{0}".format(x) for x in result if not x.startswith("www.")]) # We remove the already tested subjects. return set(List(result).format()) - self.get_already_tested()
def _format_adblock_decoded(cls, to_format, result=None): """ Format the exctracted adblock line before passing it to the system. Arguments: - to_format: str The extracted line from the file. - result: None or list The list of extracted domain. Returns: list The list of extracted domains. """ if not result: result = [] for data in List(to_format).format(): if data: if "#" in data: return cls._format_adblock_decoded(data.split("#"), result) elif "," in data: return cls._format_adblock_decoded(data.split(","), result) elif "~" in data: return cls._format_adblock_decoded(data.split("~"), result) elif "!" in data: return cls._format_adblock_decoded(data.split("!"), result) elif "|" in data: return cls._format_adblock_decoded(data.split("|"), result) elif data and (ExpirationDate.is_domain_valid(data) or ExpirationDate.is_ip_valid(data)): result.append(data) return result
def url_file(self): """ Manage the case that we have to test a file Note: 1 URL per line. """ list_to_test = self._extract_domain_from_file() AutoContinue().restore() PyFunceble.Clean(list_to_test) if PyFunceble.CONFIGURATION["inactive_database"]: Database().to_test() if PyFunceble.CONFIGURATION[ "file_to_test"] in PyFunceble.CONFIGURATION[ "inactive_db"] and "to_test" in PyFunceble.CONFIGURATION[ "inactive_db"][PyFunceble.CONFIGURATION[ "file_to_test"]] and PyFunceble.CONFIGURATION[ "inactive_db"][PyFunceble.CONFIGURATION[ "file_to_test"]]["to_test"]: list_to_test.extend(PyFunceble.CONFIGURATION["inactive_db"][ PyFunceble.CONFIGURATION["file_to_test"]]["to_test"]) if PyFunceble.CONFIGURATION["filter"]: list_to_test = List( Regex(list_to_test, PyFunceble.CONFIGURATION["filter"], escape=True).matching_list()).format() list( map( self.url, list_to_test[PyFunceble. CONFIGURATION["counter"]["number"]["tested"]:], repeat(list_to_test[-1]), ))
def test_hierarchical_numeric_sorting(self): """ Tests the hierarchical numeric sorting. """ given = [ "1.example.com", "2.example.com", "3.example.com", "11.example.com", "12.example.com", "10.example.com", "hello.1.example.com", "hello.2.example.com", "hello.3.example.com", "hello.11.example.com", "hello.12.example.com", "hello.10.example.com", ] expected = [ "1.example.com", "hello.1.example.com", "2.example.com", "hello.2.example.com", "3.example.com", "hello.3.example.com", "10.example.com", "hello.10.example.com", "11.example.com", "hello.11.example.com", "12.example.com", "hello.12.example.com", ] actual = List(given).custom_format(Sort.hierarchical) self.assertEqual(expected, actual)
def adblock_decode(self, list_to_test): """ Convert the adblock format into a readable format which is understood by the system. Argument: - list_to_test: list The read content of the given file. Returns: list The list of domain to test. """ result = [] regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$" regex_v2 = r"(.*\..*)(?:#{1,}.*)" for line in list_to_test: rematch = Regex(line, regex, return_data=True, rematch=True, group=0).match() rematch_v2 = Regex(line, regex_v2, return_data=True, rematch=True, group=0).match() if rematch: result.extend(rematch) if rematch_v2: result.extend( List(self._format_adblock_decoded(rematch_v2)).format()) return result
def __get_or_generate_complements_json(self): # pragma: no cover """ Get or generate the complements while working with as JSON formatted database. """ result = [] if "complements" not in self.database[self.filename].keys(): # The complements are not saved, # We get the list of domains we are going to work with. result = [ z for x, y in self.get_already_tested() for z in y if not PyFunceble.Check(z).is_subdomain() and PyFunceble.Check(z).is_domain() ] # We generate the one without "www." if "www." is given. result.extend([x[4:] for x in result if x.startswith("www.")]) # We generate the one with "www." if "www." is not given. result.extend([ "www.{0}".format(x) for x in result if not x.startswith("www.") ]) # We remove the already tested subjects. result = set(List(result).format()) - self.get_already_tested() # We save the constructed list of complements self.database[self.filename]["complements"] = list(result) self.save() else: # We get the complements we still have to test. result = self.database[self.filename]["complements"] return result
def _format_decoded(self, to_format, result=None): # pragma: no cover """ Format the exctracted adblock line before passing it to the system. :param str to_format: The extracted line from the file. :param list result: A list of the result of this method. :return: The list of domains or IP to test. :rtype: list """ if not result: # The result is not given. # We set the result as an empty list. result = [] for data in List(to_format).format(): # We loop through the different lines to format. if data: # The currently read line is not empty. if "^" in data: # There is an accent in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("^"), result) if "#" in data: # There is a dash in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("#"), result) if "," in data: # There is a comma in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split(","), result) if "!" in data: # There is an exclamation mark in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("!"), result) if "|" in data: # There is a vertival bar in the currently read line. # We recall this method but with the current result state # and splited data. return self._format_decoded(data.split("|"), result) if data: # The currently read line is not empty. data = self._extract_base(data) # We create an instance of the checker. checker = Check(data) if data and (checker.is_domain() or checker.is_ipv4()): # The extraced base is not empty. # and # * The currently read line is a valid domain. # or # * The currently read line is a valid IP. # We append the currently read line to the result. result.append(data) elif data: # * The currently read line is not a valid domain. # or # * The currently read line is not a valid IP. # We try to get the url base. url_base = checker.is_url(return_base=True) if url_base: # The url_base is not empty or equal to False or None. # We append the url base to the result. result.append(url_base) # We return the result element. return result
def decode(self): """ Decode/extract the domains to test from the adblock formated file. :return: The list of domains to test. :rtype: list """ # We initiate a variable which will save what we are going to return. result = [] # We initiate the first regex we are going to use to get # the element to format. regex = r"^(?:.*\|\|)([^\/\$\^]{1,}).*$" # We initiate the third regex we are going to use to get # the element to format. regex_v3 = ( r"(?:#+(?:[a-z]+?)?\[[a-z]+(?:\^|\*)\=(?:\'|\"))(.*\..*)(?:(?:\'|\")\])" ) # We initiate the fourth regex we are going to use to get # the element to format. regex_v4 = r"^\|(.*\..*)\|$" for line in self.to_format: # We loop through the different line. rematch = rematch_v3 = rematch_v4 = None # We extract the different group from our first regex. rematch = Regex(line, regex, return_data=True, rematch=True, group=0).match() # We extract the different group from our fourth regex. # # Note: We execute the following in second because it is more # specific that others. rematch_v4 = Regex(line, regex_v4, return_data=True, rematch=True, group=0).match() # We extract the different group from our third regex. rematch_v3 = Regex(line, regex_v3, return_data=True, rematch=True, group=0).match() if rematch: # The first extraction was successfull. if self.options_separator in line: options = line.split(self.options_separator)[-1].split( self.option_separator) if (not options[-1] or "third-party" in options or "script" in options or "popup" in options or "xmlhttprequest" in options): # We extend the result with the extracted elements. result.extend(self._extract_base(rematch)) extra = self._handle_options(options) if extra and isinstance(extra, list): # pragma: no cover extra.extend(self._extract_base(rematch)) result.extend(self._extract_base(extra)) elif extra: result.extend(self._extract_base(rematch)) else: # We extend the result with the extracted elements. result.extend(self._extract_base(rematch)) if rematch_v4: # The fourth extraction was successfull. # We extend the formatted elements from the extracted elements. result.extend(List(self._format_decoded(rematch_v4)).format()) if rematch_v3: # The second extraction was successfull. # We extend the formatted elements from the extracted elements. result.extend(List(self._format_decoded(rematch_v3)).format()) # We return the result. return List(result).format()
def _merge(self): """ Merge the real database with the older one which has already been set into :code:`PyFunceble.INTERN["inactive_db"]` """ if PyFunceble.CONFIGURATION["inactive_database"]: # The database subsystem is activated. # We get the content of the database. database_content = Dict().from_json( File(self.inactive_db_path).read()) # We get the database top keys. database_top_keys = database_content.keys() for database_top_key in database_top_keys: # We loop through the list of database top keys. if database_top_key not in PyFunceble.INTERN["inactive_db"]: # The currently read top key is not already into the database. # We initiate the currently read key with the same key from # our database file. PyFunceble.INTERN["inactive_db"][ database_top_key] = database_content[database_top_key] else: # The currently read top key is already into the database. # We get the list of lower indexes. database_low_keys = database_content[ database_top_key].keys() for database_low_key in database_low_keys: # We loop through the lower keys. if (database_low_key not in PyFunceble.INTERN["inactive_db"] [database_top_key]): # pragma: no cover # The lower key is not already into the database. # We initiate the currently read low and top key with the # same combinaison from our database file. PyFunceble.INTERN["inactive_db"][database_top_key][ database_low_key] = database_content[ database_top_key][database_low_key] else: # The lower key is not already into the database. # We exted the currently read low and top key combinaison # with the same combinaison from our database file. PyFunceble.INTERN["inactive_db"][database_top_key][ database_low_key].extend( database_content[database_top_key] [database_low_key]) # And we format the list of element to ensure that there is no # duplicate into the database content. PyFunceble.INTERN["inactive_db"][database_top_key][ database_low_key] = List( PyFunceble.INTERN["inactive_db"] [database_top_key] [database_low_key]).format()
def _file_list_to_test_filtering(self): """ Unify the way we work before testing file contents. """ # We get the list to test from the file we have to test. list_to_test = self._extract_domain_from_file() # We save the original list to test globally. PyFunceble.INTERN["extracted_list_to_test"] = list_to_test # We get the list of mined. mined_list = Mining().list_of_mined() if mined_list: list_to_test.extend(mined_list) # We generate the directory structure. DirectoryStructure() # We restore the data from the last session if it does exist. AutoContinue().restore() if PyFunceble.CONFIGURATION["adblock"]: # The adblock decoder is activated. # We get the list of domain to test (decoded). list_to_test = AdBlock(list_to_test).decode() else: # The adblock decoder is not activated. # We get the formatted list of domain to test. list_to_test = list(map(self._format_domain, list_to_test)) # We clean the output directory if it is needed. PyFunceble.Clean(list_to_test) # We set the start time. ExecutionTime("start") # We get the list we have to test in the current session (from the database). Inactive().to_test() if ( PyFunceble.CONFIGURATION["inactive_database"] and PyFunceble.INTERN["file_to_test"] in PyFunceble.INTERN["inactive_db"] and "to_test" in PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]] and PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]][ "to_test" ] ): # * The current file to test in into the database. # and # * The `to_test` index is present into the database # related to the file we are testing. # and # * The `to_test` index content is not empty. # We extend our list to test with the content of the `to_test` index # of the current file database. list_to_test.extend( PyFunceble.INTERN["inactive_db"][PyFunceble.INTERN["file_to_test"]][ "to_test" ] ) # We set a regex of element to delete. # Understand with this variable that we don't want to test those. regex_delete = r"localhost$|localdomain$|local$|broadcasthost$|0\.0\.0\.0$|allhosts$|allnodes$|allrouters$|localnet$|loopback$|mcastprefix$|ip6-mcastprefix$|ip6-localhost$|ip6-loopback$|ip6-allnodes$|ip6-allrouters$|ip6-localnet$" # pylint: disable=line-too-long # We load the flatten version of the database. PyFunceble.INTERN.update({"flatten_inactive_db": Inactive().content()}) # We initiate a local variable which will save the current state of the list. not_filtered = list_to_test try: # We remove the element which are in the database from the # current list to test. list_to_test = List( list( set(Regex(list_to_test, regex_delete).not_matching_list()) - set(PyFunceble.INTERN["flatten_inactive_db"]) ) ).format() _ = list_to_test[-1] except IndexError: # We test without the database removing. list_to_test = List( Regex(not_filtered, regex_delete).not_matching_list() ).format() # We delete the not_filtered variable. del not_filtered if PyFunceble.CONFIGURATION["filter"]: # The filter is not empty. # We get update our list to test. Indeed we only keep the elements which # matches the given filter. list_to_test = List( Regex( list_to_test, PyFunceble.CONFIGURATION["filter"], escape=False ).matching_list() ).format() list_to_test = List(list(list_to_test)).custom_format(Sort.standard) if PyFunceble.CONFIGURATION["hierarchical_sorting"]: # The hierarchical sorting is desired by the user. # We format the list. list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical) # We return the final list to test. return list_to_test
def test_standard_sorting(self): """ Tests standard sorting. """ expected = [ "0.gravatar.com", "1.gravatar.com", "ad-creatives-public.commondatastorage.googleapis.com", "ade.googlesyndication.com", "adservice.google.com", "chart.googleapis.com", "googleadservices.com", "google-analytics.com", "google.com", "hello", "hello_world.google.com", "imasdk.googleapis.com", "pagead2.googleadservices.com", "pagead2.googlesyndication.com", "partner.googleadservices.com", "redirector.googlevideo.com", "s0-2mdn-net.l.google.com", "ssl.google-analytics.com", "ssl-google-analytics.l.google.com", "tpc.googlesyndication.com", "www.googleadservices.com", "www.google-analytics.com", "www-google-analytics.l.google.com", "www.googletagmanager.com", "www.googletagservices.com", ] actual = List(self.data_list).custom_format(Sort.standard) self.assertEqual(expected, actual) expected = [ "https://0.gravatar.com", "https://1.gravatar.com", "https://ad-creatives-public.commondatastorage.googleapis.com", "https://ade.googlesyndication.com", "https://adservice.google.com", "https://chart.googleapis.com", "https://googleadservices.com", "https://google-analytics.com", "https://google.com", "https://hello", "https://hello_world.google.com", "https://imasdk.googleapis.com", "https://pagead2.googleadservices.com", "https://pagead2.googlesyndication.com", "https://partner.googleadservices.com", "https://redirector.googlevideo.com", "https://s0-2mdn-net.l.google.com", "https://ssl.google-analytics.com", "https://ssl-google-analytics.l.google.com", "https://tpc.googlesyndication.com", "https://www.googleadservices.com", "https://www.google-analytics.com", "https://www-google-analytics.l.google.com", "https://www.googletagmanager.com", "https://www.googletagservices.com", ] actual = List(self.data_url_list).custom_format(Sort.standard) self.assertEqual(expected, actual)
def file(self): """ Manage the case that need to test each domain of a given file path. .. note:: 1 domain per line. """ # We get, format, filter, clean the list to test. list_to_test = self._file_list_to_test_filtering() if PyFunceble.CONFIGURATION["idna_conversion"]: # We have to convert domains to idna. # We convert if we need to convert. list_to_test = domain2idna(list_to_test) if PyFunceble.CONFIGURATION["hierarchical_sorting"]: # The hierarchical sorting is desired by the user. # We format the list. list_to_test = List(list_to_test).custom_format(Sort.hierarchical) else: # The hierarchical sorting is not desired by the user. # We format the list. list_to_test = List(list_to_test).custom_format(Sort.standard) # We initiate a local variable which will save the current state of the list. not_filtered = list_to_test try: # We remove the element which are in the database from the # current list to test. list_to_test = List( list( set( list_to_test[PyFunceble.INTERN["counter"]["number"]["tested"] :] ) - set(PyFunceble.INTERN["flatten_inactive_db"]) ) ).format() _ = list_to_test[-1] except IndexError: # Our list to test is the one with the element from the database. list_to_test = not_filtered[ PyFunceble.INTERN["counter"]["number"]["tested"] : ] # We delete the undesired variable. del not_filtered if PyFunceble.CONFIGURATION["hierarchical_sorting"]: # The hierarchical sorting is desired by the user. # We format the list. list_to_test = List(list(list_to_test)).custom_format(Sort.hierarchical) try: # We test each element of the list to test. return [self.domain(x, list_to_test[-1]) for x in list_to_test if x] except IndexError: # We print a message on screen. print(PyFunceble.Fore.CYAN + PyFunceble.Style.BRIGHT + "Nothing to test.")