def setUp(self) -> None: """ Setups everything needed for the tests. """ self.helper = ListHelper() self.mixed_test_subject = [ ("Hello", "World"), "Hello", { "hello": "world" }, "World", 1, 4.5, None, "", ] self.str_test_subject = [ "hello", "world", "", "!", " ", "hello", "world!", "Hello", ]
def test_list_all_subdirectories(self) -> None: """ Tests the method which let us list all subdirectories. """ dirname = [secrets.token_hex(6) for _ in range(10)] for directory in dirname: self.helper.set_path(os.path.join(self.temp_path.name, directory)).create() self.helper.set_path( os.path.join(self.temp_path.name, directory, directory)).create() self.helper.set_path( os.path.join(self.temp_path.name, directory, directory, directory)).create() self.helper.set_path(self.temp_path.name) expected = (ListHelper( [os.path.join(self.temp_path.name, x) for x in dirname] + [os.path.join(self.temp_path.name, x, x) for x in dirname] + [os.path.join(self.temp_path.name, x, x, x) for x in dirname]).remove_duplicates().sort().subject) actual = self.helper.list_all_subdirectories() self.assertEqual(expected, actual)
def https(self, ) -> Optional[List[str]]: """ Request the chosen record through the https protocol. """ self.lookup_record.used_protocol = "HTTPS" result = [] for nameserver in self._mix_order(self.nameservers.get_nameservers()): PyFunceble.facility.Logger.debug( "Started to query information of %r from %r", self.subject, nameserver) try: response = dns.query.https(self.query_message, nameserver, timeout=self.query_timeout) local_result = self._get_result_from_response(response) if local_result: result.extend(local_result) self.lookup_record.nameserver = nameserver PyFunceble.facility.Logger.debug( "Successfully queried information of %r from %r.", self.subject, nameserver, ) if not self.trust_server: # pragma: no cover: Per case. break if self.trust_server: # pragma: no cover: Per case. break except (dns.exception.Timeout, socket.error): # Example: Resource temporarily unavailable. pass except dns.query.UnexpectedSource: # Example: got a response from XXX instead of XXX. pass except dns.query.BadResponse: # Example: A DNS query response does not respond to the question # asked. pass except ValueError: # Example: Input is malformed. break PyFunceble.facility.Logger.debug( "Unsuccessfully queried information of %r from %r. Sleeping %fs.", self.subject, nameserver, self.delay, ) time.sleep(self.delay) return ListHelper(result).remove_duplicates().subject
def start(self, max_workers: Optional[int] = None): """ Starts the generation of the dataset file. """ raw_data = DownloadHelper( self.UPSTREAM_LINK).download_text().split("\n") with concurrent.futures.ThreadPoolExecutor( max_workers=max_workers) as executor: for result in executor.map(self.parse_line, raw_data): for extension, suffixes in result.items(): if extension not in self.database: self.database[extension] = suffixes else: self.database[extension].extend(suffixes) PyFunceble.facility.Logger.debug( "Got: extension: %r ; suffixes: %r.", extension, suffixes) for extension, suffixes in self.database.items(): self.database[extension] = (ListHelper( suffixes).remove_duplicates().remove_empty().sort().subject) DictHelper(self.database).to_json_file(self.destination) return self
def test_set_subject_through_init(self) -> None: """ Tests the overwritting of the subject to work through the class constructor. """ given = self.mixed_test_subject expected = given helper = ListHelper(given) actual = helper.subject self.assertEqual(expected, actual)
def list_all_subdirectories(self) -> List[str]: """ Provides the list of all subdirectories of the current path. """ result = [] if self.exists(): for root, directories, _ in os.walk(self.path): for directory in directories: result.append(os.path.join(root, directory)) return ListHelper(result).remove_duplicates().sort().subject
def test_list_all_files(self) -> None: """ Tests the method which let us list all subdirectories. """ dirname = [secrets.token_hex(6) for _ in range(10)] filename = secrets.token_hex(6) for directory in dirname: self.helper.set_path(os.path.join(self.temp_path.name, directory)).create() with open(os.path.join(self.helper.path, filename), "w", encoding="utf-8") as file_stream: file_stream.write("Hello") self.helper.set_path( os.path.join(self.temp_path.name, directory, directory)).create() with open(os.path.join(self.helper.path, filename), "w", encoding="utf-8") as file_stream: file_stream.write("Hello") self.helper.set_path( os.path.join(self.temp_path.name, directory, directory, directory)).create() with open(os.path.join(self.helper.path, filename), "w", encoding="utf-8") as file_stream: file_stream.write("Hello") self.helper.set_path(self.temp_path.name) expected = (ListHelper( [os.path.join(self.temp_path.name, x, filename) for x in dirname] + [ os.path.join(self.temp_path.name, x, x, filename) for x in dirname ] + [ os.path.join(self.temp_path.name, x, x, x, filename) for x in dirname ]).sort().subject) actual = self.helper.list_all_files() self.assertEqual(expected, actual)
def get_converted(self) -> List[str]: """ Provides the converted data. """ result = set() if not self.should_be_ignored(self.data_to_convert.strip()): result.update(self._decode_v1(self.data_to_convert)) result.update(self._decode_v2(self.data_to_convert)) result.update(self._decode_v3(self.data_to_convert)) result.update(self._decode_v5(self.data_to_convert)) result.update(self._decode_v6(self.data_to_convert)) result.update(self._decode_v4(self.data_to_convert)) return ListHelper(list(result)).sort().subject
def list_all_files(self) -> List[str]: """ Lists all files of the current path. """ result = [] if self.exists(): for directory in self.list_all_subdirectories(): for element in os.listdir(directory): possible_element = os.path.join(directory, element) if not os.path.isfile(possible_element): continue result.append(possible_element) return ListHelper(result).sort().subject
class TestListHelper(unittest.TestCase): """ Provides the test of our dictionnary helper. """ def setUp(self) -> None: """ Setups everything needed for the tests. """ self.helper = ListHelper() self.mixed_test_subject = [ ("Hello", "World"), "Hello", { "hello": "world" }, "World", 1, 4.5, None, "", ] self.str_test_subject = [ "hello", "world", "", "!", " ", "hello", "world!", "Hello", ] def tearDown(self) -> None: """ Destroy everything needed by the tests. """ del self.mixed_test_subject del self.str_test_subject def test_set_subject_return(self) -> None: """ Tests the response from the method which let us set the subject to work with. """ actual = self.helper.set_subject(self.mixed_test_subject) self.assertIsInstance(actual, ListHelper) def test_set_subject_method(self) -> None: """ Tests the method which let us set the subject to work with. """ given = self.mixed_test_subject expected = list(self.mixed_test_subject) self.helper.set_subject(given) actual = self.helper.subject self.assertEqual(expected, actual) def test_set_subject_attribute(self) -> None: """ Tests overwritting of the :code:`subject` attribute. """ given = self.mixed_test_subject expected = list(self.mixed_test_subject) self.helper.subject = given actual = self.helper.subject self.assertEqual(expected, actual) def test_set_subject_through_init(self) -> None: """ Tests the overwritting of the subject to work through the class constructor. """ given = self.mixed_test_subject expected = given helper = ListHelper(given) actual = helper.subject self.assertEqual(expected, actual) def test_set_subject_not_list(self) -> None: """ Tests the response of the method which let us set the subject for the case that the given subject is not a list. """ given = {"Hello": "World"} self.assertRaises(TypeError, lambda: self.helper.set_subject(given)) def test_remove_empty(self) -> None: """ Tests the method which let us remove the empty strings from a given list. """ given = copy.deepcopy(self.str_test_subject) expected = ["hello", "world", "!", " ", "hello", "world!", "Hello"] actual = self.helper.set_subject(given).remove_empty().subject self.assertEqual(expected, actual) given = copy.deepcopy(self.mixed_test_subject) expected = [ ("Hello", "World"), "Hello", { "hello": "world" }, "World", 1, 4.5, None, ] actual = self.helper.set_subject(given).remove_empty().subject self.assertEqual(expected, actual) def test_remove_duplicates(self) -> None: """ Tests the method which let us remove the duplicates from a given list. """ given = copy.deepcopy(self.str_test_subject) expected = ["hello", "world", "", "!", " ", "world!", "Hello"] actual = self.helper.set_subject(given).remove_duplicates().subject self.assertEqual(expected, actual) def test_sort(self) -> None: """ Tests the method which let us sort a given list. """ given = copy.deepcopy(self.str_test_subject) expected = [ "", " ", "!", "hello", "hello", "Hello", "world", "world!", ] actual = self.helper.set_subject(given).sort().subject self.assertEqual(expected, actual) def test_sort_reverse(self) -> None: """ Tests the method which let us sort a given list. """ given = copy.deepcopy(self.str_test_subject) expected = ["world!", "world", "hello", "hello", "Hello", "!", " ", ""] actual = self.helper.set_subject(given).sort(reverse=True).subject self.assertEqual(expected, actual) def test_custom_sort(self) -> None: """ Tests the method which let us sort a given list with a custom method. """ given = copy.deepcopy(self.str_test_subject) custom_method = lambda x: x[-1] if x else x expected = ["", " ", "!", "world!", "world", "hello", "hello", "Hello"] actual = self.helper.set_subject(given).custom_sort( custom_method).subject self.assertEqual(expected, actual)
def get_subjects_from_line( line: str, checker_type: str, *, adblock_inputline2subject: Optional[AdblockInputLine2Subject] = None, wildcard2subject: Optional[Wildcard2Subject] = None, rpz_policy2subject: Optional[RPZPolicy2Subject] = None, rpz_inputline2subject: Optional[RPZInputLine2Subject] = None, inputline2subject: Optional[InputLine2Subject] = None, subject2complements: Optional[Subject2Complements] = None, url2netloc: Optional[Url2Netloc] = None, cidr2subject: Optional[CIDR2Subject] = None, ) -> List[str]: """ Provides the list of subject to test. """ result = [] if adblock_inputline2subject is None: adblock_inputline2subject = AdblockInputLine2Subject() if wildcard2subject is None: wildcard2subject = Wildcard2Subject() if rpz_policy2subject is None: rpz_policy2subject = RPZPolicy2Subject() if rpz_inputline2subject is None: rpz_inputline2subject = RPZInputLine2Subject() if inputline2subject is None: inputline2subject = InputLine2Subject() if subject2complements is None: subject2complements = Subject2Complements() if url2netloc is None: url2netloc = Url2Netloc() if cidr2subject is None: cidr2subject = CIDR2Subject() if PyFunceble.storage.CONFIGURATION.cli_decoding.adblock: result.extend( # pylint: disable=line-too-long adblock_inputline2subject.set_aggressive( bool(PyFunceble.storage.CONFIGURATION.cli_decoding. adblock_aggressive) ).set_data_to_convert(line).get_converted()) elif PyFunceble.storage.CONFIGURATION.cli_decoding.wildcard: result.append( wildcard2subject.set_data_to_convert(line).get_converted()) elif PyFunceble.storage.CONFIGURATION.cli_decoding.rpz: result.extend([ rpz_policy2subject.set_data_to_convert(x).get_converted() for x in rpz_inputline2subject.set_data_to_convert(line).get_converted() ]) else: result.extend( inputline2subject.set_data_to_convert(line).get_converted()) if PyFunceble.storage.CONFIGURATION.cli_testing.complements: result.extend([ y for x in result for y in subject2complements.set_data_to_convert( x).get_converted() ]) if PyFunceble.storage.CONFIGURATION.cli_testing.cidr_expand: result = [ y for x in result for y in cidr2subject.set_data_to_convert(x).get_converted() ] if checker_type.lower() != "syntax": for index, subject in enumerate(result): if not subject: continue netloc = url2netloc.set_data_to_convert(subject).get_converted() result[index] = subject.replace(netloc, netloc.lower()) return ListHelper(result).remove_duplicates().remove_empty().subject
def process_file_sorting( cls, file: str, remove_duplicates: bool = True, write_header: bool = True, sorting_key: Any = None, ) -> None: """ Process the sorting of the given file. The idea is to split the file piece by piece and at the end join all sorted files. For that job, we create a temporary directory which will store the temporary files. :param file: The file to sort. :param remove_duplicates: Activates the deletion of duplicates. :param write_header: Activates the writing of the PyFunceble related header. .. warning:: When this is set to :py:class:`True`, we assume that the header itself was already given. Meaning that the first 2 commented lines will be excluded from the sorting and regenerated. :param sorting_key: The sorting key to apply while sorting. This is the lambda/function that goes into the :code:`key` argument of the :py:class:`sorted` function. """ # pylint: disable=too-many-locals,too-many-statements def merge_files( files: List[TextIOWrapper], ) -> Generator[Tuple[List[TextIOWrapper]], str, None]: """ Merges the given files and yield each "lines" of the merged file. :param files: The files to merge. """ result = [] for index, file in enumerate(files): try: iterator = iter(file) value = next(iterator) heapq.heappush( result, ((sorting_key(value), index, value, iterator, file))) except StopIteration: file.close() previous = None comment_count = 0 max_comment_count = 2 while result: ignore = False _, index, value, iterator, file = heapq.heappop(result) if remove_duplicates and value == previous: ignore = True if (write_header and comment_count < max_comment_count and value[0] == "#"): ignore = True max_comment_count += 1 if not ignore: yield value previous = value try: value = next(iterator) heapq.heappush( result, ((sorting_key(value), index, value, iterator, file))) except StopIteration: file.close() temp_directory = tempfile.TemporaryDirectory() temporary_output_file = os.path.join(temp_directory.name, secrets.token_hex(6)) if not sorting_key: sorting_key = get_best_sorting_key() file_helper = FileHelper(file) sorted_files = [] PyFunceble.facility.Logger.info("Started sort of %r.", file) with file_helper.open("r", encoding="utf-8", buffering=cls.FILE_BUFFER_SIZE) as file_stream: while True: to_sort = list(islice(file_stream, cls.MAX_LINES)) if not to_sort: break new_file = open( os.path.join(temp_directory.name, secrets.token_hex(6)), "w+", encoding="utf-8", buffering=cls.FILE_BUFFER_SIZE, ) new_file.writelines( ListHelper(to_sort).remove_duplicates().custom_sort( key_method=sorting_key).subject) new_file.flush() new_file.seek(0) sorted_files.append(new_file) with open(temporary_output_file, "w", cls.FILE_BUFFER_SIZE, encoding="utf-8") as file_stream: if write_header: file_stream.write(FilePrinter.STD_FILE_GENERATION) file_stream.write(FilePrinter.get_generation_date_line()) file_stream.write("\n\n") file_stream.writelines(merge_files(sorted_files)) FileHelper(temporary_output_file).move(file) PyFunceble.facility.Logger.info("Finished sort of %r.", file) temp_directory.cleanup()