def load_all_processes(self): """ Will detect all pickles in the running directory and will try to load them up into Processes. As soon as a pickle is considered for loading, a lock is placed on it, which is not released until the process is destroyed. This is necessary to prevent another thread from loading up the same process. :return: a list of Process instances """ processes = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: process = self.create_from_file_and_persist(f) except (portalocker.LockException, IOError): continue except BaseException: LOGGER.warning("Failed to load checkpoint '{}' (deleting)\n{}" .format(f, traceback.format_exc())) try: os.remove(f) except OSError: pass else: processes.append(process) return processes
def on_process_playing(self, process): try: self._filelocks[process.pid].acquire() except portalocker.LockException: LOGGER.warning( "Couldn't acquire file lock for '{}', not persisting.".format( self.get_running_path(process.pid))) del self._filelocks[process.pid]
def load_all_checkpoints(self): checkpoints = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: checkpoints.append(self.load_checkpoint_from_file(f)) except BaseException as e: LOGGER.warning( "Failed to load checkpoint {} because of exception\n" "{}".format(f, e.message)) return checkpoints
def load_all_checkpoints(self): checkpoints = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: checkpoints.append(self.load_checkpoint_from_file(f)) except (portalocker.LockException, IOError): # Don't load locked checkpoints or those with IOErrors # these often come if the pickle was deleted since the glob pass except BaseException: LOGGER.warning( "Failed to load checkpoint '{}' (deleting)\n" "{}".format(f, traceback.format_exc())) # Deleting try: os.remove(f) except OSError: pass return checkpoints
def load_all_processes(self): """ Will detect all pickles in the running directory and will try to load them up into Processes. As soon as a pickle is considered for loading, a lock is placed on it, which is not released until the process is destroyed. This is necessary to prevent another thread from loading up the same process. :return: a list of Process instances """ processes = [] for f in glob.glob(path.join(self._running_directory, "*.pickle")): try: process = self.create_from_file_and_persist(f) except (portalocker.LockException, IOError): continue except BaseException: LOGGER.warning("Failed to load checkpoint '{}'\n{}".format( f, traceback.format_exc())) # Try to load the node corresponding to the corrupt pickle, set it to FAILED and seal it # At the end we will also move the pickle to the failed directory so it can be inspected for debugging try: from aiida.orm import load_node pk, extension = os.path.splitext(os.path.basename(f)) node = load_node(int(pk)) node._set_attr(node.FAILED_KEY, True) node.seal() except BaseException as exception: LOGGER.warning( 'failed to clean up the node of the corrupt pickle {}'. format(traceback.format_exc())) finally: LOGGER.warning("moving '{}' to failed directory".format(f)) try: filename = os.path.basename(f) os.rename( f, os.path.join(self.failed_directory, filename)) except OSError: pass else: processes.append(process) return processes