def migrate(self) -> "WhoisRecordIDNASubjectMigrator": # pylint: disable=singleton-comparison broken = False for row in self.db_session.query(WhoisRecord).filter( WhoisRecord.idna_subject == None): if (self.continuous_integration and self.continuous_integration.is_time_exceeded()): broken = True break PyFunceble.facility.Logger.info( "Started to fix idna_subject field of %r", row.subject) row.idna_subject = domain2idna.domain2idna(row.subject) self.db_session.add(row) if self.print_action_to_stdout: print_single_line() PyFunceble.facility.Logger.info( "Finished to fix idna_subject field of %r", row.subject) self.db_session.commit() if not broken: self.done = True return self
def migrate(self) -> "FileClenupMigratorBase": """ Provides the migrator (itself) """ FileHelper(self.source_file).delete() PyFunceble.facility.Logger.debug("Deleted: %r", self.source_file) self.done = True if self.print_action_to_stdout: print_single_line()
def migrate(self) -> "MigratorBase": """ Provides the migrator (itself). """ file_helper = FileHelper(self.source_file) if file_helper.exists(): with file_helper.open("r", encoding="utf-8") as file_stream: first_line = next(file_stream) if any(x in first_line for x in self.TO_DELETE): temp_destination = tempfile.NamedTemporaryFile( "a+", newline="", encoding="utf-8", delete=False ) file_handler = file_helper.open(newline="") reader = csv.DictReader(file_handler) writer = csv.DictWriter( temp_destination, fieldnames=[x for x in self.FIELDS if x not in self.TO_DELETE], ) writer.writeheader() keys_found = False for row in reader: row = dict(row) for key in self.TO_DELETE: if key in row: del row[key] keys_found = True if not keys_found: break writer.writerow(row) if self.print_action_to_stdout: print_single_line() temp_destination.seek(0) FileHelper(temp_destination.name).move(self.source_file) self.done = True
def print_central_messages(check_force_update: bool = False) -> None: """ Collect all possible messages from upstream and downstream and print them. """ upstream_version = get_upstream_version() if check_force_update: handle_force_update(upstream_version) _ = (not handle_deprecated_version(upstream_version) and not handle_greater_version(upstream_version) and not handle_older_version(upstream_version)) handle_messages(upstream_version) for extra_message in PyFunceble.cli.storage.EXTRA_MESSAGES: print_single_line(extra_message, force=True)
def run_stdout_printer(self, test_result: CheckerStatusBase) -> None: """ Runs the stdout printer (if necessary). :param test_result: The rest result dataset. """ if not PyFunceble.storage.CONFIGURATION.cli_testing.display_mode.quiet: # pylint: disable=line-too-long if self.should_we_print_status_to_stdout(test_result.status): self.stdout_printer.template_to_use = get_template_to_use() if not self.header_already_printed: self.stdout_printer.print_header() self.header_already_printed = True self.stdout_printer.set_dataset( test_result.to_dict()).print_interpolated_line() else: print_single_line() else: print_single_line()
def migrate(self) -> "InactiveJSON2CSVMigrator": """ Starts the migration. """ file_helper = FileHelper(self.source_file) if file_helper.exists(): self.dataset.set_authorized(True) dataset = { "idna_subject": None, "status": None, "status_source": None, "checker_type": "AVAILABILITY", "destination": None, "source": None, "tested_at": None, "session_id": None, } delete_file = True with file_helper.open("r", encoding="utf-8") as file_stream: for line in file_stream: if (self.continuous_integration and self.continuous_integration.is_time_exceeded()): delete_file = False break line = (line.strip().replace('"', "").replace(",", "").replace( "{", "", ).replace("}", "")) if ":" not in line: continue index, value = [x.strip() for x in line.rsplit(":", 1)] if not value: if index.isdigit(): dataset[ "tested_at"] = datetime.datetime.fromtimestamp( float(index)).isoformat() else: dataset["source"] = os.path.abspath(index) dataset[ "destination"] = get_destination_from_origin( dataset["source"]) continue dataset["idna_subject"] = domain2idna.domain2idna(index) dataset["status"] = value if not dataset["tested_at"]: dataset["tested_at"] = datetime.datetime.utcnow( ).isoformat() PyFunceble.facility.Logger.debug("Decoded dataset:\n%r.", dataset) self.dataset.update(dataset) if self.print_action_to_stdout: print_single_line() PyFunceble.facility.Logger.info("Added %r into %r", dataset["idna_subject"], self.dataset) if delete_file: file_helper.delete() self.done = True return self
def start(self, print_dots: bool = False) -> "FilePreloader": """ Starts the pre-loading of the currently set file path. """ self.__load_description() broken = False file_helper = FileHelper(self.protocol["subject"]) self.__description[ self.__matching_index]["hash"] = HashHelper().hash_file( file_helper.path) if isinstance(self.continue_dataset, CSVContinueDataset): self.continue_dataset.set_base_directory( self.protocol["output_dir"]) if (self.__description[self.__matching_index]["checker_type"] != self.protocol["checker_type"] or self.__description[self.__matching_index]["subject_type"] != self.protocol["subject_type"]): try: self.continue_dataset.cleanup() except TypeError: self.continue_dataset.cleanup( session_id=self.protocol["session_id"]) if (self.__description[self.__matching_index]["previous_hash"] and self.__description[self.__matching_index]["hash"] != self.__description[self.__matching_index]["previous_hash"]): # Forces the reading of each lines because there is literally no # way to know where something has been changed. self.__description[self.__matching_index]["line_number"] = 1 if (self.__description[self.__matching_index]["checker_type"] != self.protocol["checker_type"] or self.__description[self.__matching_index]["subject_type"] != self.protocol["subject_type"] or self.__description[self.__matching_index]["hash"] != self.__description[self.__matching_index]["previous_hash"]): try: with file_helper.open("r", encoding="utf-8") as file_stream: line_num = 1 for line in file_stream: if (line_num < self.__description[ self.__matching_index]["line_number"]): line_num += 1 continue if (self.continuous_integration and self.continuous_integration.is_time_exceeded() ): broken = True break line = line.strip() if self.rpz_policy2subject and "SOA" in line: self.rpz_policy2subject.set_soa(line.split()[0]) for subject in get_subjects_from_line( line, self.checker_type, adblock_inputline2subject=self. adblock_inputline2subject, wildcard2subject=self.wildcard2subject, rpz_policy2subject=self.rpz_policy2subject, rpz_inputline2subject=self. rpz_inputline2subject, inputline2subject=self.inputline2subject, subject2complements=self.subject2complements, url2netloc=self.url2netloc, cidr2subject=self.cidr2subject, ): to_send = copy.deepcopy(self.protocol) to_send["subject"] = subject to_send["idna_subject"] = domain2idna(subject) to_send["tested_at"] = datetime.utcnow( ) - timedelta(days=365.25 * 20) if self.inactive_dataset.exists(to_send): print_single_line("I") continue if TesterWorker.should_be_ignored( subject=to_send["idna_subject"]): print_single_line("X") continue self.continue_dataset.update(to_send, ignore_if_exist=True) if print_dots: print_single_line() self.__description[ self.__matching_index]["line_number"] += 1 line_num += 1 except KeyboardInterrupt as exception: self.__save_description() raise exception if not broken: self.__description[ self.__matching_index]["previous_hash"] = self.__description[ self.__matching_index]["hash"] self.__save_description() return self
def target(self, consumed: dict) -> Optional[Tuple[Any, ...]]: """ This the target that is run to process something. This method should return a result which will pu send to the output queue. """ if not isinstance(consumed, dict): PyFunceble.facility.Logger.debug( "Skipping latest dataset because consumed data was not " "a dictionnary.") return None # Just for human brain. test_dataset = consumed if self.should_be_ignored(test_dataset["idna_subject"]): # X means that it was ignored because of our core ignore procedure. print_single_line("X") return None if PyFunceble.storage.CONFIGURATION.cli_testing.cooldown_time > 0: PyFunceble.facility.Logger.info( "Sleeping: %rs for our own safety :-)", PyFunceble.storage.CONFIGURATION.cli_testing.cooldown_time, ) # Apply cooldowntime. time.sleep( PyFunceble.storage.CONFIGURATION.cli_testing.cooldown_time) PyFunceble.facility.Logger.info( "Slept: %rs for our own safety :-)", PyFunceble.storage.CONFIGURATION.cli_testing.cooldown_time, ) if test_dataset["type"] != "single": if test_dataset[ "output_dir"] and "from_preload" not in test_dataset: if isinstance(self.continue_dataset, CSVContinueDataset): self.continue_dataset.set_base_directory( test_dataset["output_dir"]) if self.continue_dataset.exists(test_dataset): # A means that it was ignored because of the continue # logic. PyFunceble.facility.Logger.info( "Ignoring %r because it was already tested previously " "(continue).", test_dataset["idna_subject"], ) PyFunceble.cli.utils.stdout.print_single_line("A") return None if "from_inactive" not in test_dataset and self.inactive_dataset.exists( test_dataset): # "I" means that it was ignored because of the inactive (db) # logic. PyFunceble.facility.Logger.info( "Ignoring %r because it was already tested previously " "(inactive).", test_dataset["idna_subject"], ) PyFunceble.cli.utils.stdout.print_single_line("I") return test_dataset, "ignored_inactive" PyFunceble.facility.Logger.info( "Started test of %r.", test_dataset["idna_subject"], ) self._init_testing_object(test_dataset["subject_type"], test_dataset["checker_type"]) result = (self.testing_object.set_subject( test_dataset["idna_subject"]).query_status().get_status()) PyFunceble.facility.Logger.info( "Successfully handled %r.", test_dataset["idna_subject"], ) PyFunceble.facility.Logger.debug("Got status:\n%r.", result) return test_dataset, result
def migrate(self) -> "FileAndStatusMigrator": inactive_statuses = ( PyFunceble.storage.STATUS.down, PyFunceble.storage.STATUS.invalid, ) inactive_dataset = PyFunceble.cli.utils.testing.get_inactive_dataset_object() continue_dataset = PyFunceble.cli.utils.testing.get_continue_databaset_object() drop_table = True for file_info in self.get_rows("SELECT * from pyfunceble_file"): if ( self.continuous_integration and self.continuous_integration.is_time_exceeded() ): drop_table = False break destination = get_destination_from_origin(file_info["path"]) for status in self.get_rows( f"SELECT * from pyfunceble_status WHERE file_id = {file_info['id']}" ): if ( self.continuous_integration and self.continuous_integration.is_time_exceeded() ): drop_table = False break to_send = { "idna_subject": domain2idna.domain2idna(status["tested"]), "checker_type": "AVAILABILITY", "destination": destination, "source": file_info["path"], "tested_at": status["tested_at"], "session_id": None, } if status["status"] in inactive_statuses: inactive_dataset.update(to_send) if self.print_action_to_stdout: print_single_line() PyFunceble.facility.Logger.debug("Dataset: %r", to_send) continue_dataset.update(to_send) if self.print_action_to_stdout: print_single_line() PyFunceble.facility.Logger.info( "Added %r into %r", to_send["idna_subject"], continue_dataset ) # pylint: disable=line-too-long self.db_session.execute( f"DELETE from pyfunceble_status WHERE id = {status['id']}" ) self.db_session.commit() PyFunceble.facility.Logger.debug( "Deleted from pyfunceble_status: \n%r", status ) if drop_table: # pylint: disable=line-too-long self.db_session.execute( f"DELETE from pyfunceble_file WHERE id = {file_info['id']}" ) self.db_session.commit() PyFunceble.facility.Logger.debug( "Deleted from pyfunceble_file: \n%r", file_info ) else: PyFunceble.facility.Logger.debug( "Not deleted from pyfunceble_file (not authorized): \n%r", file_info ) if drop_table: self.db_session.execute("DROP TABLE pyfunceble_file") self.db_session.commit() PyFunceble.facility.Logger.debug("Deleted pyfunceble_file table.") self.db_session.execute("DROP TABLE pyfunceble_status") self.db_session.commit() PyFunceble.facility.Logger.debug("Deleted pyfunceble_status table.") self.done = True else: PyFunceble.facility.Logger.debug( "No table deleted. Reason: not authorized." ) return self
def migrate(self) -> "WhoisJSON2CSVMigrator": """ Provides the migration logic. """ file_helper = FileHelper(self.source_file) if file_helper.exists(): self.dataset.set_authorized(True) dataset = { "subject": None, "idna_subject": None, "expiration_date": None, "epoch": None, } delete_file = True with file_helper.open("r", encoding="utf-8") as file_stream: for line in file_stream: if (self.continuous_integration and self.continuous_integration.is_time_exceeded()): delete_file = False break line = (line.strip().replace('"', "").replace(",", "").replace( "{", "", ).replace("}", "")) if ":" not in line: continue index, value = [x.strip() for x in line.split(":")] if not value: dataset["subject"], dataset["idna_subject"] = ( index, domain2idna.domain2idna(index), ) continue if index == "epoch": dataset["epoch"] = float(value) elif index == "expiration_date": dataset["expiration_date"] = value elif index == "state": PyFunceble.facility.Logger.debug( "Decoded dataset:\n%r.", dataset) self.dataset.update(dataset) if self.print_action_to_stdout: print_single_line() PyFunceble.facility.Logger.info( "Added %r into %r", dataset["idna_subject"], self.dataset) if delete_file: file_helper.delete() self.done = True return self
def target(self, consumed: Tuple[dict, CheckerStatusBase]) -> None: if not isinstance(consumed, tuple) or not isinstance( consumed[1], CheckerStatusBase): PyFunceble.facility.Logger.info( "Skipping latest dataset because consumed data was not a tuple." ) return None # Just for human brain. test_dataset, test_result = consumed if "from_miner" in test_dataset: PyFunceble.facility.Logger.info( "Skipping dataset because it comes from the mining mechanism.") return None if test_result.status in self.INACTIVE_STATUSES: PyFunceble.facility.Logger.info( "Skipping dataset because status is not active.") return None if test_dataset["subject_type"] == "domain": subject = f"http://{test_result.idna_subject}:80" else: # Assuming it's already a URL. subject = test_result.idna_subject print_single_line("M") self.add_to_output_queue("pyfunceble") self.share_waiting_message() mined = self.mine_from(subject) for url in mined: to_send = copy.deepcopy(test_dataset) to_send["from_miner"] = True if test_dataset["subject_type"] == "domain": netloc = self.url2netloc.set_data_to_convert( url).get_converted() if ":" in netloc: netloc = netloc[:netloc.find(":")] to_send["subject"] = netloc to_send["idna_subject"] = domain2idna(netloc) else: if not test_result.idna_subject.endswith("/") and url.endswith( "/"): url = url[:-1] to_send["subject"] = url to_send["idna_subject"] = domain2idna(url) if to_send["idna_subject"] == test_result.idna_subject: PyFunceble.facility.Logger.info( "Skipping %r because found in test result.", url) continue self.add_to_output_queue(to_send) # Returning None because we manually add into the queue. return None