def run(self): while True: try: self.log.debug("Worker-%s: waiting", self.identifier) [metadata, data] = self.query.get() time.sleep(0.1) except Exception: break if self.transfer_type in [ "QUERY_NEXT_METADATA", "STREAM_METADATA" ]: self.log.debug("Worker-%s: metadata %s", self.identifier, metadata["filename"]) filepath = generate_filepath(self.basepath, metadata) self.log.debug("Worker-%s: filepath %s", self.identifier, filepath) with open(filepath, "r") as file_descriptor: file_descriptor.read() self.log.debug("Worker-%s: file %s read", self.identifier, filepath) else: print("filepath", generate_filepath(self.basepath, metadata)) print("metadata", metadata) print("data", str(data)[:100])
def get_metadata(self, targets, metadata): """Implementation of the abstract method get_metadata. Args: targets (list): The target list this file is supposed to go. metadata (dict): The dictionary with the metadata to extend. """ # Build source file self.source_file = generate_filepath(metadata["source_path"], metadata) # Build target file # if local_target is not set (== None) generate_filepath returns None self.target_file = generate_filepath(self.config_df["local_target"], metadata) if targets: try: filesize = os.path.getsize(self.source_file) file_mod_time = os.stat(self.source_file).st_mtime file_create_time = os.stat(self.source_file).st_ctime except Exception: self.log.error("Unable to create metadata dictionary.") raise try: self.log.debug("create metadata for source file...") # metadata = { # "filename" : ..., # "source_path" : ..., # in unix format # "relative_path" : ..., # in unix format # "filesize" : ..., # "file_mod_time" : ..., # "file_create_time" : ..., # "chunksize" : ... # } if self.is_windows: # TODO use pathlib here instead metadata["source_path"] = (metadata["source_path"].replace( "\\", "/")) metadata["relative_path"] = ( metadata["relative_path"].replace("\\", "/")) metadata["filesize"] = filesize metadata["file_mod_time"] = file_mod_time metadata["file_create_time"] = file_create_time metadata["chunksize"] = self.config_df["chunksize"] if (self.config_df["use_cleaner"] and self.config_df["remove_data"] == "with_confirmation"): metadata["confirmation_required"] = ( self.confirmation_topic.decode()) else: metadata["confirmation_required"] = False self.log.debug("metadata = %s", metadata) except Exception: self.log.error("Unable to assemble multi-part message.") raise
def get_metadata(self, targets, metadata): """Implementation of the abstract method get_metadata. Args: targets (list): The target list this file is supposed to go. metadata (dict): The dictionary with the metadata to extend. """ timeout = 10000 # Get new data self.metadata_r, self.data_r = self.transfer.get(timeout) if (metadata["relative_path"] != self.metadata_r["relative_path"] or metadata["source_path"] != self.metadata_r["source_path"] or metadata["filename"] != self.metadata_r["filename"]): self.log.error("Received metadata do not match data") # Use received data to prevent mismatch of metadata and data # TODO handle case if file type requested by target does not match # pylint: disable=attribute-defined-outside-init # Build source file self.source_file = generate_filepath(self.metadata_r["source_path"], self.metadata_r) # Build target file # if local_target is not set (== None) generate_filepath returns None self.target_file = generate_filepath(self.config_df["local_target"], self.metadata_r) # Extends metadata if targets: if "filesize" not in self.metadata_r: self.log.error("Received metadata do not contain 'filesize'") if "file_mod_time" not in self.metadata_r: self.log.error("Received metadata do not contain " "'file_mod_time'. Setting it to current time") self.metadata_r["file_mod_time"] = time.time() if "file_create_time" not in self.metadata_r: self.log.error("Received metadata do not contain " "'file_create_time'. Setting it to current " "time") self.metadata_r["file_create_time"] = time.time() if "chunksize" not in self.metadata_r: self.log.error("Received metadata do not contain 'chunksize'. " "Setting it to locally configured one") self.metadata_r["chunksize"] = self.config_df["chunksize"]
def get_metadata(self, targets, metadata): """Implementation of the abstract method get_metadata. Args: targets (list): The target list this file is supposed to go. metadata (dict): The dictionary with the metadata to extend. """ # Build source file self.source_file = generate_filepath(metadata["source_path"], metadata) if targets: try: self.log.debug("create metadata for source file...") # metadata = { # "filename" : ..., # "file_mod_time" : ..., # "file_create_time" : ..., # "chunksize" : ... # } metadata["filesize"] = None metadata["file_mod_time"] = time.time() metadata["file_create_time"] = time.time() metadata["chunksize"] = None self.log.debug("metadata = %s", metadata) except Exception: self.log.error("Unable to assemble multi-part message.") raise
def run_plugin_thread(plugin_name, plugin_config, target_dir, data_queue, log, event): """ Load, configure, and execute a plugin This function is intended to execute all plugin code on a separate thread to protect the main thread from slow or blocking plugins. Parameters ---------- plugin_name: str The name of the module in the plugin directory plugin_config: dict The plugin configuration options target_dir: str The local part of the target directory data_queue: queue.Queue The queue instance use for message passing log: logging.Logger A logger instance even: threading.Event Event instance for receiving the stop signal at shutdown """ try: plugin_m = import_module("plugins." + plugin_name) plugin = plugin_m.Plugin(plugin_config) plugin.setup() log.info("Loading '%s' plugin", plugin_name) except Exception: log.error("Could not load '%s' plugin", plugin_name, exc_info=True) return while not event.is_set(): try: data = data_queue.get(timeout=0.5) except queue.Empty: continue try: [metadata, data] = data plugin.process(local_path=generate_filepath(target_dir, metadata), metadata=metadata, data=data) except Exception: log.error("Processing data with '%s' plugin failed.", plugin_name, exc_info=True) data_queue.task_done() try: plugin.stop() except Exception: log.error("Error while stopping '%s' plugin", plugin_name, exc_info=True)
def get_metadata(self, targets, metadata): """Implementation of the abstract method get_metadata. Args: targets (list): The target list this file is supposed to go. metadata (dict): The dictionary with the metadata to extend. """ # pylint: disable=attribute-defined-outside-init # no normpath used because that would transform http://... # into http:/... self.source_file = os.path.join(metadata["source_path"], metadata["relative_path"], metadata["filename"]) # Build target file # if local_target is not set (== None) generate_filepath returns None self.target_file = generate_filepath(self.config_df["local_target"], metadata) metadata["chunksize"] = self.config_df["chunksize"] if targets: try: self.log.debug("create metadata for source file...") # metadata = { # "filename" : ..., # "source_path" : ..., # "relative_path" : ..., # "filesize" : ..., # "file_mod_time" : ..., # "file_create_time" : ..., # "chunksize" : ... # } metadata["file_mod_time"] = time.time() metadata["file_create_time"] = time.time() if self.config_df["remove_data"] == "with_confirmation": metadata["confirmation_required"] = ( self.confirmation_topic.decode() ) else: metadata["confirmation_required"] = False self.log.debug("metadata = %s", metadata) except Exception: self.log.error("Unable to assemble multi-part message.", exc_info=True) raise
def get_metadata(self, targets, metadata): """Implementation of the abstract method get_metadata. Args: targets (list): The target list this file is supposed to go. metadata (dict): The dictionary with the metadata to extend. """ # pylint: disable=attribute-defined-outside-init # Build source file self.source_file = generate_filepath(metadata["source_path"], metadata) # Build target file # if local_target is not set (== None) generate_filepath returns None self.target_file = generate_filepath(self.config_df["local_target"], metadata) # Extends metadata if targets: metadata["filesize"] = 0 metadata["file_mod_time"] = 1481734310.6207027 metadata["file_create_time"] = 1481734310.6207028 metadata["chunksize"] = self.config_df["chunksize"]
def main(): """Connects to hidra and request metadata. """ parser = argparse.ArgumentParser() parser.add_argument("--signal_host", type=str, help="Host where HiDRA is running", default=socket.getfqdn()) parser.add_argument("--target_host", type=str, help="Host where the data should be send to", default=socket.getfqdn()) arguments = parser.parse_args() targets = [[arguments.target_host, "50101", 0]] base_target_path = os.path.join(BASE_DIR, "data", "target") print("\n==== TEST: Query for the newest filename ====\n") query = Transfer("QUERY_NEXT_METADATA", arguments.signal_host) query.initiate(targets) query.start() try: while True: try: [metadata, _] = query.get() except Exception: query.stop() raise print() print(generate_filepath(base_target_path, metadata)) print() finally: query.stop() print("\n==== TEST END: Query for the newest filename ====\n")