def __new__(cls, queue_sim_datacopy_new_file, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, event_data_manager_shutdown, lock_datacopy_ceph_filename_and_hash ): cl.info("Starting LocalDataManager") if not cls._instance: cls._instance = cls cls.__init__(cls, queue_sim_datacopy_new_file, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, event_data_manager_shutdown, lock_datacopy_ceph_filename_and_hash ) return cls._instance
async def _index_updater_coro(cls): """ Read the queue for new things to do. """ while True: new_files = list() if (cls._queue_datacopy_ceph_filename_and_hash.qsize() > 0): with cls._lock_datacopy_ceph_filename_and_hash: while True: try: new_file_dict = cls._queue_datacopy_ceph_filename_and_hash.get(block=False) except queue.Empty: break else: new_files.append(new_file_dict) if len(new_files) > 0: cl.verbose("Got {} files from queue".format(len(new_files))) for new_file_dict in new_files: namespace = new_file_dict["namespace"] key = new_file_dict["key"] sha1sum = new_file_dict["sha1sum"] cls.add_file(namespace, key, sha1sum) cl.verbose("Done adding files to index") await asyncio.sleep(1) # check once per second; things should appear in large chunks anyway
def stop(self): cl.info("Shutdown client") self._loop.stop() for task in self.tasks: task.cancel() with suppress(asyncio.CancelledError): self._loop.run_until_complete(task) # pending = asyncio.Task.all_tasks() # for task in pending: # task.cancel() # with suppress(asyncio.CancelledError): # self._loop.run_until_complete(task) # print(task) # # for task in pending: # # print(task) self._loop.close() # clear when shutdown complete self._shutdown_client_event.clear() cl.info("Client shutdown complete")
def _reset(cls): """ Delete the instance. """ cl.debug("Resetting LocalDataManager") cls._instance = None cls._hashset = set() cls._local_copy = dict() del cls
def get_namespaces(ceph_conf, ceph_pool, ceph_user): """ Use rados on the commandline to parse all namespaces. """ ceph_conf = str(pathlib.Path(ceph_conf)) # a set can not have duplicates namespaces = set() cwd = pathlib.Path(__file__).parent.parent pathlib.Path(cwd / ".radosoutput").mkdir(exist_ok=True) rados_file = cwd / ".radosoutput" / "radosoutput.txt" # delete existing file first try: rados_file.unlink() except FileNotFoundError: pass try: rados_cmd = [ "rados", "-p", "{}".format(ceph_pool), "ls", "{}".format(str(rados_file)), "--user", "{}".format(ceph_user), "--keyring", "{}".format(ceph_conf), "--all" ] result = subprocess.call(rados_cmd) except FileNotFoundError: cl.warning("call to commandline failed -- file not found") return None if not result == 0: cl.warning("call to command line failed -- non zero exit code") return None if not rados_file.exists(): cl.warning("no data for parsing present") return None cl.verbose("parsing data in .radosoutput/radosoutput.txt") with open(str(rados_file), 'r') as rf: while True: line = rf.readline() if line == "": break pts = line.split("\t") # split on tab # len = 2 -> this has a namespace if len(pts) == 2: if pts[0] != "": namespaces.add(pts[0]) cl.verbose("got {} namespaces".format(len(namespaces))) return namespaces
def setup_logging(logging_level): """ Setup the loggers. """ cl(logging_level) # setup simulation logging cl.info("Started Core logging with level '{}'".format(logging_level)) sl(logging_level) # setup simulation logging sl.info("Started Simulation logging with level '{}'".format(logging_level)) bl(logging_level) # setup backend logging bl.info("Started Backend logging with level '{}'".format(logging_level))
async def watch_file_request_queue(self, reader, writer): """ Watch index events in a separate executor. """ file_request_in_queue = await self._loop.run_in_executor( None, self.file_request_event_executor) if not file_request_in_queue: return None cl.info("Requested file data and hash") await self.send_connection(reader, writer, file_request_in_queue)
def _queue_reader_executor(self, pattern=None): """ Read the queue in a separate executor. """ while True: if self._event_shutdown_process.is_set(): cl.debug("Ceph connection shutdown event is set") return None # default pattern, not recommended but if nothing is provided we do this if not pattern: pattern = [{ "queue": self._queue_ceph_task_data, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, { "queue": self._queue_ceph_task_index_namespace, "blocking_time": 0 }, { "queue": self._queue_ceph_task_index, "blocking_time": 0 }] override_blocking = False for i, q in enumerate(pattern): try: if override_blocking or q["blocking_time"] == 0: new_ceph_task = q["queue"].get(False) else: new_ceph_task = q["queue"].get(True, q["blocking_time"]) except queue.Empty: # block on the first queue if we have gotten nothing from all queues if pattern[i] == pattern[-1]: # cl.verbose("override_blocking = False") override_blocking = False else: # if we got something from a non priority queue we could speed through this a bit faster if (i >= 1): cl.verbose("override_blocking = True") override_blocking = True return new_ceph_task
async def check_ack(self, reader): """ Check for ack or nack. """ ck = await reader.read(8) try: ck = ck.decode("UTF-8") except Exception as e: cl.error("An Exception occured: {}".format(e)) else: if ck.lower() == "ack": return True return False
async def read_new_file(self, reader, writer): """ Read a new file from the server. """ res = await self.read_data(reader, writer) if not res: await self.send_nack(writer) return # send a final ack await self.send_ack(writer) cl.info("New file from server: {}".format(res)) self._new_file_receive_queue.put(res)
def _calc_and_write_objhash(self, objname): """ Calculate the objhash and write it to the obj tags on the cluster. """ cl.debug("Calculating hash for {}".format(objname)) objval = self._get_objval(objname) objhash = hashlib.sha1(objval).hexdigest() try: self._ioctx.set_xattr(objname, "sha1sum", objhash.encode()) except AttributeError: # can't encode objhash pass return objhash
async def _periodic_index_update_coro(cls): """ Update the index periodically. """ await asyncio.sleep(5) # wait for other processes to get their stuff together # this messes with the tracking of datasets # while True: # cl.info("Updating index") # cls._event_datacopy_ceph_update_index.set() # await asyncio.sleep(600) # wait 10 minutes cl.info("Updating index") cls._event_datacopy_ceph_update_index.set()
async def watch_index_events(self, reader, writer): """ Watch index events in a separate executor. """ get_index_event = await self._loop.run_in_executor( None, self.index_event_executor) if not get_index_event: return None cl.info("Index request received") index = await self.get_index(reader, writer) self._index_data_queue.put(index) # self._index_pipe_remote.send(index) # self._index_avail_event.set() return True # something other than None
async def _file_request_connection_coro(self): """ Handle file requests from the server. """ while True: try: # open a connection cl.info("Attempting to open a connection for requesting " "files") file_request_reader, file_request_writer = ( await asyncio.open_connection( self._host, self._port, loop=self._loop) ) except (OSError, asyncio.TimeoutError): cl.info("Can't establish a connection to request files, " "waiting a bit") await asyncio.sleep(3.5) else: # perform a handshake for this connection task_handshake_request = {"task": "file_requests"} await self.send_connection( file_request_reader, file_request_writer, task_handshake_request ) self._file_request_connection_active = True self._cancel_file_request_executor_event = threading.Event() file_request_connection_watchdog = self._loop.create_task( self._watch_file_request_connection( file_request_reader, file_request_writer)) try: while not file_request_reader.at_eof(): watch_file_request_queue_task = self._loop.create_task( self.watch_file_request_queue( file_request_reader, file_request_writer) ) await watch_file_request_queue_task except Exception as e: cl.error("Exception in requests: {}".format(e)) finally: file_request_writer.close() self._file_request_connection_active = False cl.info("Request connection closed")
async def _index_connection_coro(self): """ Handle index requests. """ while True: try: # open a connection cl.info("Attempting to open a connection for receiving " "the index") index_request_reader, index_request_writer = ( await asyncio.open_connection( self._host, self._port, loop=self._loop) ) except (OSError, asyncio.TimeoutError): cl.info("Can't establish a connection to receive the " "index, waiting a bit") await asyncio.sleep(3.5) else: # perform a handshake for this connection task_handshake = {"task": "index"} await self.send_connection( index_request_reader, index_request_writer, task_handshake) self._index_connection_active = True self._cancel_index_executor_event = threading.Event() index_connection_watchdog = self._loop.create_task( self._watch_index_connection( index_request_reader, index_request_writer)) try: while not index_request_reader.at_eof(): index_event_watch_task = self._loop.create_task( self.watch_index_events( index_request_reader, index_request_writer) ) await index_event_watch_task except Exception as e: cl.error("Exception in index: {}".format(e)) finally: index_request_writer.close() self._index_connection_active = True cl.info("Index connection closed")
def read_index_for_namespace(self, task_info): """ Generate the index for a namespace. Returns a list of dictionaries with object attributes. """ namespace = task_info["namespace"] cl.verbose("Reading index for namespace {}".format(namespace)) self._set_namespace(namespace) index = self._get_index() return_dict = dict() return_dict["namespace"] = namespace return_dict["index"] = index self._unset_namespace() return return_dict
def __del__(self): """ Close and shutdown the connection. """ try: self._ioctx.close() cl.debug("Ceph IO context closed") except: cl.debug("Could not close ceph IO context") try: self._cluster.shutdown() cl.debug("Cluster access shut down") except: cl.debug("Could not shutdown cluster access")
async def _file_answer_connection_coro(self): """ Handle answers to file requests from the server. """ while True: try: # open a connection cl.info("Attempting to open a connection for receiving " "requested files") file_answer_reader, file_answer_writer = ( await asyncio.open_connection( self._host, self._port, loop=self._loop) ) except (OSError, asyncio.TimeoutError): cl.info("Can't establish a connection to receive " "requested files, waiting a bit") await asyncio.sleep(3.5) else: # perform a handshake for this connection task_handshake_answer = {"task": "file_answers"} await self.send_connection( file_answer_reader, file_answer_writer, task_handshake_answer) self._file_answer_connection_active = True try: while not file_answer_reader.at_eof(): watch_file_request_server_answer_task = ( self._loop.create_task( self.watch_file_request_server_answer( file_answer_reader, file_answer_writer) ) ) await watch_file_request_server_answer_task except Exception as e: cl.error("Exception in requests: {}".format(e)) finally: file_answer_writer.close() self._file_answer_connection_active = False cl.info("Answer connection closed")
async def _new_file_information_connection_coro(self): """ Handle information about new files from the server. """ # try to maintain the connection while True: try: # open a connection cl.info("Attempting to open a connection for receiving " "new file information") new_file_reader, new_file_writer = ( await asyncio.open_connection( self._host, self._port, loop=self._loop) ) except (OSError, asyncio.TimeoutError): cl.info("Can't establish a connection to receive new file " "information, waiting a bit") await asyncio.sleep(3.5) else: # perform a handshake on this connection task_handshake = {"task": "new_file_message"} await self.send_connection( new_file_reader, new_file_writer, task_handshake) self._new_file_information_connection_active = True try: while not new_file_reader.at_eof(): read_connection_task = self._loop.create_task( self.read_new_file(new_file_reader, new_file_writer) ) await read_connection_task except Exception as e: cl.error("Exception in new_files: {}".format(e)) finally: new_file_writer.close() self._new_file_information_connection_active = False cl.info("New file connection closed")
async def read_data(self, reader, writer): """ Read data from the connection. NOTE: Do not forget to send an ACK or NACK after using this method. Otherwise the connection might hang up. await self.send_ack(writer) await self.send_nack(writer) """ # wait until we have read something that is up to 1k (until the newline # comes) length_b = await reader.read(1024) if reader.at_eof(): return try: # try and parse it as an int (expecting the length of the data) length = struct.unpack("L", length_b)[0] except Exception as e: # if something goes wrong send a nack and start anew await self.send_nack(writer) cl.error("An Exception occured: {}".format(e)) raise return else: # otherwise send the ack await self.send_ack(writer) try: # try and read exactly the length of the data data = await reader.readexactly(length) res = data.decode("UTF-8") res = json.loads(res) except json.decoder.JSONDecodeError: # if we can not parse the json send a nack and start from the # beginning cl.debug("Parsing {} as json failed".format(res)) await self.send_nack(writer) raise return except Exception as e: # if ANYTHING else goes wrong send a nack and start from the # beginning await self.send_nack(writer) cl.error("An Exception occured: {}".format(e)) raise return else: # otherwise return the received data return res
def __init__(cls, queue_sim_datacopy_new_file, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, event_data_manager_shutdown, lock_datacopy_ceph_filename_and_hash ): # receive new file information from the simulation cls._queue_sim_datacopy_new_file = queue_sim_datacopy_new_file # request a hash for the file cls._queue_datacopy_ceph_request_hash_for_new_file = queue_datacopy_ceph_request_hash_for_new_file cls._queue_datacopy_ceph_answer_hash_for_new_file = queue_datacopy_ceph_answer_hash_for_new_file # forward file and hash to the backend cls._queue_datacopy_backend_new_file_and_hash = queue_datacopy_backend_new_file_and_hash # serve index requests from the backend cls._event_datacopy_backend_get_index = event_datacopy_backend_get_index cls._queue_datacopy_backend_index_data = queue_datacopy_backend_index_data # request the index from the ceph cluster cls._event_datacopy_ceph_update_index = event_datacopy_ceph_update_index cls._queue_datacopy_ceph_filename_and_hash = queue_datacopy_ceph_filename_and_hash # shutdown event cls._event_data_manager_shutdown = event_data_manager_shutdown # index queue lock cls._lock_datacopy_ceph_filename_and_hash = lock_datacopy_ceph_filename_and_hash try: # # asyncio: watch the queue and the shutdown event cls._loop = asyncio.get_event_loop() # task for reading the queues cls._queue_reader_task = cls._loop.create_task( cls._queue_reader_coro(cls)) # task for periodically updating the index cls._index_updater_task = cls._loop.create_task( cls._index_updater_coro(cls)) cls._periodic_index_update_task = cls._loop.create_task( cls._periodic_index_update_coro(cls)) tasks = [ cls._queue_reader_task, cls._index_updater_task, cls._periodic_index_update_task ] cls._loop.run_until_complete(asyncio.wait(tasks)) # stop the event loop cls._loop.call_soon_threadsafe(cls._loop.stop()) cls.__del__() cl.debug("Shutdown of local data manager process complete") except KeyboardInterrupt: # Ctrl C passes quietly pass
def add_file(cls, namespace, key, sha1sum): # # this can take on the order of microseconds # cl.debug("Adding file {}/{}/{}".format(namespace, key, sha1sum)) hashed_key = hash(str("{}\t{}".format(namespace, key))) if hashed_key not in cls._hashset: simtype = None to_parse = None field_type = None fieldname = None skintype = None elemtype = None try: string = key.split("universe.fo.")[1] objects, timestep = string.split("@") objects_definition = objects.split(".") simtype = objects_definition[0] # if the simtype is not ta or ma the file format is not supported if simtype not in ["ta", "ma"]: raise ValueError # parse mesh or field, only field has field_type if objects_definition[1] in ["nodal", "elemental"]: to_parse = "field" else: to_parse = "mesh" if to_parse == "field": usage = objects_definition[1] fieldname = objects_definition[2] try: elemtype = objects_definition[3] except IndexError: pass elif to_parse == "mesh": usage = objects_definition[1] if usage == "nodes": pass elif usage == "elements": elemtype = objects_definition[2] elif usage == "skin": skintype = objects_definition[2] elemtype = objects_definition[3] elif usage == "elementactivationbitmap": elemtype = objects_definition[2] elif usage == "elset": fieldname = objects_definition[2] elemtype = objects_definition[3] elif usage == "nset": fieldname = objects_definition[2] elif usage == "boundingbox": pass else: cl.debug_warning("Can not add file {}/{}".format(namespace, key)) return except: # YOU SHALL NOT PARSE cl.debug_warning("Can not add file {}/{}".format(namespace, key)) return try: cls._hashset.add(hashed_key) # Create the dictionary if namespace not in cls._local_copy: cls._local_copy[namespace] = {} if timestep not in cls._local_copy[namespace]: cls._local_copy[namespace][timestep] = {} if simtype not in cls._local_copy[namespace][timestep]: cls._local_copy[namespace][timestep][simtype] = {} if usage not in cls._local_copy[namespace][timestep][simtype]: cls._local_copy[namespace][timestep][simtype][usage] = {} if usage in ["nodes", "boundingbox"]: i_entry = cls._local_copy[namespace][timestep][simtype][usage] if usage in ["elements", "elementactivationbitmap"]: if elemtype not in cls._local_copy[namespace][timestep][simtype][usage]: cls._local_copy[namespace][timestep][simtype][usage][elemtype] = {} i_entry = cls._local_copy[namespace][timestep][simtype][usage][elemtype] if usage == "skin": if skintype not in cls._local_copy[namespace][timestep][simtype][usage]: cls._local_copy[namespace][timestep][simtype][usage][skintype] = {} if elemtype not in cls._local_copy[namespace][timestep][simtype][usage][skintype]: cls._local_copy[namespace][timestep][simtype][usage][skintype][elemtype] = {} i_entry = cls._local_copy[namespace][timestep][simtype][usage][skintype][elemtype] if usage in ["elemental", "elset"]: if fieldname not in cls._local_copy[namespace][timestep][simtype][usage]: cls._local_copy[namespace][timestep][simtype][usage][fieldname] = {} if elemtype not in cls._local_copy[namespace][timestep][simtype][usage][fieldname]: cls._local_copy[namespace][timestep][simtype][usage][fieldname][elemtype] = {} i_entry = cls._local_copy[namespace][timestep][simtype][usage][fieldname][elemtype] if usage in ["nodal", "nset"]: if fieldname not in cls._local_copy[namespace][timestep][simtype][usage]: cls._local_copy[namespace][timestep][simtype][usage][fieldname] = {} i_entry = cls._local_copy[namespace][timestep][simtype][usage][fieldname] i_entry['object_key'] = key i_entry['sha1sum'] = sha1sum except: pass
async def _queue_reader_coro(self, pattern=None): """ Read the queue for new things to do. """ # select a priority pattern and parse the queues based on that # we do this because it is very difficult to get a fast priority # queue when multiprocessing is involved # data_pattern = [ { "queue": self._queue_ceph_task_data, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, # {"queue": self._queue_ceph_task_index_hashes, "blocking_time": 0} ] # hashes_pattern = [ { "queue": self._queue_ceph_task_hashes, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_data, "blocking_time": 0 }, # {"queue": self._queue_ceph_task_index_hashes, "blocking_time": 0} ] index_namespaces_pattern = [{ "queue": self._queue_ceph_task_index_namespace, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, { "queue": self._queue_ceph_task_data, "blocking_time": 0 }] # index_pattern = [{ "queue": self._queue_ceph_task_index, "blocking_time": 1e-1 }, { "queue": self._queue_ceph_task_hashes, "blocking_time": 0 }, { "queue": self._queue_ceph_task_data, "blocking_time": 0 }] if pattern == "data": queue_pattern = data_pattern elif pattern == "hashes": queue_pattern = hashes_pattern # elif pattern == "index_hashes": # queue_pattern = index_hashes_pattern elif pattern == "index_namespaces": queue_pattern = index_namespaces_pattern elif pattern == "index": queue_pattern = index_pattern else: cl.verbose_warning( "Pattern {} not found, assigning None".format(pattern)) queue_pattern = None while True: new_task = await self._loop.run_in_executor( None, functools.partial(self._queue_reader_executor, pattern=queue_pattern)) if not new_task: # return None when we want to stop return None try: task = new_task["task"] task_info = new_task["task_info"] except KeyError: cl.warning( "Could not read task dictionary {}".format(new_task)) else: if (task == "read_object_value"): cl.debug("Reading object value, task_info = {}".format( task_info)) object_value_dict = self.read_everything_for_object( task_info) self._queue_object_data.put(object_value_dict) if (task == "read_object_hash"): cl.debug("Reading object hash, task_info = {}".format( task_info)) object_value_dict = self.read_hash_for_object(task_info) self._queue_object_hash.put(object_value_dict) if (task == "read_object_tags"): cl.debug("Reading object tags, task_info = {}".format( task_info)) object_value_dict = self.read_tags_for_object(task_info) self._queue_object_tags.put(object_value_dict) if (task == "read_namespace_index"): cl.debug("Reading namespace index, task_info = {}".format( task_info)) namespace_index_dict = self.read_index_for_namespace( task_info) self._queue_namespace_index.put(namespace_index_dict) if (task == "read_index"): cl.debug("Reading index, task_info = {}".format(task_info)) index_dict = self.read_index(task_info) self._queue_index.put(index_dict) # empty the index request queue, we just finished updating # and dont need to do it for a while while True: try: self._queue_ceph_task_index.get() except queue.Empty: break
def start_tasks(args): """ Start the three main tasks. """ cl.debug("Starting program tasks") ceph_conf = pathlib.Path(args.config) ceph_pool = args.pool ceph_user = args.user host = "" simulation_port = args.simulation_port backend_port = args.backend_port # create all necessary queues, pipes and events for inter process # communication # # inter process communication for registering new files # # a queue for sending information about new files from the simulation to the # data copy process queue_sim_datacopy_new_file = multiprocessing.Queue() # # a queue for requesting the hash for a new file from the ceph cluster queue_datacopy_ceph_request_hash_for_new_file = multiprocessing.Queue() # # a queue for answering the request for a hash for a new file from the ceph # cluster. contains the name and the hash queue_datacopy_ceph_answer_hash_for_new_file = multiprocessing.Queue() # # a queue for sending the name and hash of a new file to the backend manager queue_datacopy_backend_new_file_and_hash = multiprocessing.Queue() # inter process communication for requesting files from the ceph cluster # # a queue for sending a request for a file to the ceph manager queue_backend_ceph_request_file = multiprocessing.Queue() # # a queue for answering the request for a file with the file name, contents # and hash queue_backend_ceph_answer_file_name_contents_hash = multiprocessing.Queue() # inter process communication for requesting the index for the backend # manager from the data copy # # an event for requesting the index for the backend from the data copy event_datacopy_backend_get_index = multiprocessing.Event() # # a queue for returning the requested index queue_datacopy_backend_index_data = multiprocessing.Queue() # inter process communication for requesting the index for the data manager # from the ceph cluster # # an event for requesting the index for the data copy from the ceph cluster event_datacopy_ceph_update_index = multiprocessing.Event() # # a queue for updating the local datacopy with these names and hashes queue_datacopy_ceph_filename_and_hash = multiprocessing.Queue() # # a lock for queue_datacopy_ceph_filename_and_hash lock_datacopy_ceph_filename_and_hash = multiprocessing.Lock() # inter process communication for shutting down processes # # an event for shutting down the backend manager event_backend_manager_shutdown = multiprocessing.Event() # # an event for shutting down the ceph manager event_ceph_shutdown = multiprocessing.Event() # # an event for shutting down the local data manager event_data_manager_shutdown = multiprocessing.Event() # threads would have done it probably but no time to change now # localdata_manager = multiprocessing.Process( target=LocalDataManager, args=( queue_sim_datacopy_new_file, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, event_data_manager_shutdown, lock_datacopy_ceph_filename_and_hash ) ) simulation_manager = multiprocessing.Process( target=SimulationManager, args=( host, simulation_port, queue_sim_datacopy_new_file, ) ) backend_manager = multiprocessing.Process( target=BackendManager, args=( host, backend_port, queue_datacopy_backend_new_file_and_hash, event_datacopy_backend_get_index, queue_datacopy_backend_index_data, queue_backend_ceph_request_file, queue_backend_ceph_answer_file_name_contents_hash, event_backend_manager_shutdown ) ) ceph_manager = multiprocessing.Process( target=CephManager, args=( ceph_conf, ceph_pool, ceph_user, event_ceph_shutdown, queue_datacopy_ceph_request_hash_for_new_file, queue_datacopy_ceph_answer_hash_for_new_file, queue_backend_ceph_request_file, queue_backend_ceph_answer_file_name_contents_hash, event_datacopy_ceph_update_index, queue_datacopy_ceph_filename_and_hash, lock_datacopy_ceph_filename_and_hash ) ) try: localdata_manager.start() backend_manager.start() simulation_manager.start() ceph_manager.start() localdata_manager.join() backend_manager.join() simulation_manager.join() ceph_manager.join() except KeyboardInterrupt: print() cl.info('Detected KeyboardInterrupt -- Shutting down') event_backend_manager_shutdown.set() event_ceph_shutdown.set() # event_data_manager_shutdown.set() time.sleep(.1) # Give the process some time to flush it all out finally: localdata_manager.terminate() backend_manager.terminate() simulation_manager.terminate() ceph_manager.terminate()
async def _ceph_task_coro(self): """ Loop over all the possible task queues for ceph. """ try: # seems sane.. 100 checks per second # set to 0 if we need speed loop_throttle_time = 1e-2 while True: # check for ceph shutdown if self._event_ceph_shutdown.is_set(): break ################################################################ # first go through all the tasks and give them to the processes # # index request if self._event_datacopy_ceph_update_index.is_set(): self._event_datacopy_ceph_update_index.clear() task = { "task": "read_index", "task_info": {} } self._queue_ceph_process_new_task_index.put(task) # request for hash of file try: hash_request = ( self._queue_datacopy_ceph_request_hash_for_new_file.get( block=False)) namespace = hash_request["namespace"] key = hash_request["key"] task = { "task": "read_object_hash", "task_info": { "namespace": namespace, "object": key } } self._queue_ceph_process_new_task_hashes.put(task) # self._queue_ceph_process_new_task.put(task) except queue.Empty: pass # request for everything of file try: file_request = ( self._queue_backend_ceph_request_file.get(block=False)) namespace = file_request["namespace"] key = file_request["key"] task = { "task": "read_object_value", "task_info": { "namespace": namespace, "object": key } } self._queue_ceph_process_new_task_data.put(task) except queue.Empty: pass ################################################################ # then go through everything that the processes have done and # return that to the other managers # # get the index and dump it into the data copy try: fresh_index = self._queue_ceph_process_index.get(block=False)["index"] except queue.Empty: # turn rate throttling back on if not loop_throttle_time: cl.verbose("Throttling loop again (index is updated)") loop_throttle_time = 1e-2 else: # disable rate throttling so we can get the whole index quickly cl.verbose("Unthrottling loop (updating index)") loop_throttle_time = 0 with self._lock_datacopy_ceph_filename_and_hash: # LOCK for namespace_index in fresh_index: namespace = namespace_index["namespace"] for obj in namespace_index["index"].keys(): tags = namespace_index["index"][obj] try: sha1sum = tags["sha1sum"] except KeyError: sha1sum = "" ns_name_hash = { "namespace": namespace, "key": obj, "sha1sum": sha1sum } self._queue_datacopy_ceph_filename_and_hash.put(ns_name_hash) # get the hash for an object try: obj_hash = self._queue_ceph_process_object_hash.get(block=False) new_file_dict = dict() new_file_dict["namespace"] = obj_hash["namespace"] new_file_dict["key"] = obj_hash["object"] new_file_dict["sha1sum"] = obj_hash["tags"]["sha1sum"] self._queue_datacopy_ceph_answer_hash_for_new_file.put(new_file_dict) except queue.Empty: pass # get everything for an object try: obj_everything = self._queue_ceph_process_object_data.get(block=False) self._queue_backend_ceph_answer_file_name_contents_hash.put(obj_everything) except queue.Empty: pass ################################################################ await asyncio.sleep(loop_throttle_time) # rate throttling finally: # shut down the ceph connections self._event_ceph_process_shutdown.set() time.sleep(.1) for conn in self._conns: conn.terminate()
def __init__( self, ceph_config, ceph_pool, pool_user, task_pattern, # pattern to follow when doing tasks queue_ceph_task, # queue for receiving things to do queue_ceph_task_data, # queue for task to retrieve data (contents and hashes) queue_ceph_task_hashes, # queue for task to retrieve hashes (externally) queue_ceph_task_index_namespace, # queue for retrieving the index of a namespace queue_ceph_task_index, # queue for retrieving the index (this will start a series of events like getting the namespaces, then the files in every namespace and then the respective hashes) event_shutdown_process, # when this event is set the connection will be closed queue_index, # return queue for the index queue_namespace_index, # return queue for the index for a namespace queue_object_tags, # return queue for object tags queue_object_data, # return queue for object data (with tags) queue_object_hash # return queue for object hash ): """ initialize connection. """ self._conffile = str(pathlib.Path(ceph_config)) self._target_pool = ceph_pool self._rados_id = pool_user self._task_pattern = task_pattern self._queue_ceph_task = queue_ceph_task self._queue_ceph_task_data = queue_ceph_task_data self._queue_ceph_task_hashes = queue_ceph_task_hashes self._queue_ceph_task_index = queue_ceph_task_index self._queue_ceph_task_index_namespace = queue_ceph_task_index_namespace self._event_shutdown_process = event_shutdown_process self._queue_index = queue_index self._queue_namespace_index = queue_namespace_index self._queue_object_tags = queue_object_tags self._queue_object_data = queue_object_data self._queue_object_hash = queue_object_hash # Connect to cluster self._cluster = rados.Rados(conffile=self._conffile, rados_id=self._rados_id) self._cluster.connect() # Try opening an IO context try: self._ioctx = self._cluster.open_ioctx(self._target_pool) except Exception as ex: cl.error("Exception occured: {}".format(ex)) raise try: # # asyncio: watch the queue and the shutdown event self._loop = asyncio.get_event_loop() # task for reading the queue self._queue_reader_task = self._loop.create_task( self._queue_reader_coro(self._task_pattern)) self._loop.run_until_complete(self._queue_reader_task) # stop the event loop self._loop.call_soon_threadsafe(self._loop.stop()) self.__del__() cl.debug("Shutdown of ceph_connection process complete") except KeyboardInterrupt: # Ctrl C passes quietly pass
def __init__( self, host, port, new_file_receive_queue, get_index_event, index_data_queue, # index_avail_event, # index_pipe_remote, file_name_request_client_queue, file_contents_name_hash_client_queue, shutdown_client_event ): cl.info("Client init") self._host = host self._port = port # set up queues and events self._new_file_receive_queue = new_file_receive_queue self._get_index_event = get_index_event self._index_data_queue = index_data_queue # self._index_avail_event = index_avail_event # self._index_pipe_remote = index_pipe_remote self._file_name_request_client_queue = file_name_request_client_queue self._file_contents_name_hash_client_queue = file_contents_name_hash_client_queue self._shutdown_client_event = shutdown_client_event self._index_connection_active = False self._file_request_connection_active = False self._file_answer_connection_active = False self._new_file_information_connection_active = False self._loop = asyncio.get_event_loop() # create tasks for the individual connections new_file_information_connection_task = self._loop.create_task( self._new_file_information_connection_coro()) index_connection_task = self._loop.create_task( self._index_connection_coro()) file_request_connection_task = self._loop.create_task( self._file_request_connection_coro()) file_answer_connection_task = self._loop.create_task( self._file_answer_connection_coro()) # manage the queue cleanup when there are no active connections queue_cleanup_task = self._loop.create_task( self._queue_cleanup_coro()) # shutdown_watch_task = self._loop.create_task( # self._watch_shutdown_event_coro()) self.tasks = [ index_connection_task, file_request_connection_task, file_answer_connection_task, new_file_information_connection_task, queue_cleanup_task# , # shutdown_watch_task ] try: # start the tasks self._loop.run_until_complete(asyncio.wait(self.tasks)) if self._shutdown_client_event.wait(): self.stop() except KeyboardInterrupt: self.stop()