def should_taxis_be_running(self, taxi_list): """Determines whether tasks are available for each taxi to run. Taxis should be run if there are trunks available for them. If there are no active trunks in the forest, but there are tasks ready to run, tells Pool to run enough taxis to work on all ready tasks. Args: taxi_list: List of taxi objects; are there tasks available for these taxis to run? Returns: Dictionary like {(taxi object) : (should taxi be running?)} """ task_blob = self.get_all_tasks(None, include_complete=False) # dict(id:task) # There's nothing we can do with errored E or held H taxis taxi_list = [t for t in taxi_list if t.status in ['Q', 'R', 'I']] # Only want queued, running, or idle taxis # We only care taxis running on this dispatch taxi_list = [t for t in taxi_list if taxi.expand_path(t.dispatch_path) == taxi.expand_path(self.db_path)] # Convenient dictionary like {(name of taxi) : (taxi object)} taxi_dict = {} for my_taxi in taxi_list: taxi_dict[str(my_taxi)] = my_taxi # Initial desired state is the present state -- idle taxis idle, active taxis active # Desired state is a dict like { str(taxi_name) : (should taxi be active?) } desired_state = {} for my_taxi in taxi_list: desired_state[str(my_taxi)] = my_taxi.status in ['Q', 'R'] # Active means queued or running # If taxi has a trunk only it can run, or some tasks are ready that only this taxi can run, it must be running for my_taxi in taxi_list: if self._trunk_number(task_blob, for_taxi=my_taxi) > 0: desired_state[str(my_taxi)] = True if self._N_ready_tasks(task_blob, for_taxi=my_taxi) > 0: desired_state[str(my_taxi)] = True # With taxi-specific requirements imposed, now just make sure we have enough taxis running active_taxis = [taxi_dict[k] for (k,v) in desired_state.items() if v] idle_taxis = [taxi_dict[k] for (k,v) in desired_state.items() if not v] N_active_taxis = len(active_taxis) N_active_trunks = self._trunk_number(task_blob) # Even without trunks, if we have tasks that are ready, we need at least one taxi N_ready_tasks = self._N_ready_tasks(task_blob) if N_active_trunks == 0 and N_ready_tasks: N_active_trunks = N_ready_tasks # Correct behavior for trunkless task forests # Activate idle taxis until we have enough for my_taxi in idle_taxis: if N_active_taxis >= N_active_trunks: break # We have enough taxis running desired_state[str(my_taxi)] = True N_active_taxis += 1 return desired_state
def test_read_log_dirs(self): dupe_pool_obj = SQLitePool(self.test_filename, 'test_pool') with self.test_pool: pass with dupe_pool_obj: self.assertEqual(dupe_pool_obj.work_dir, taxi.expand_path('./tests/work/')) self.assertEqual(dupe_pool_obj.log_dir, taxi.expand_path('./tests/log/'))
def __init__(self, work_dir, log_dir, thrash_delay=300, allocation=None, queue=None): self.work_dir = taxi.expand_path(work_dir) self.log_dir = taxi.expand_path(log_dir) ## thrash_delay sets the minimum time between taxi resubmissions, in seconds. ## Default is 5 minutes. self.thrash_delay = thrash_delay # Allocation to run on self.allocation = allocation # Queue to submit to self.queue = queue
def __enter__(self): """Context interface: connect to SQLite Pool DB. If performing multiple operations, faster to leave a "connection" open than to open and close it repeatedly; dangerous to leave a connection open constantly.""" # Don't allow layered entry if self._in_context: return self._in_context = True self.conn = sqlite3.connect(self.db_path, timeout=30.0) self.conn.row_factory = sqlite3.Row # Row factory for return-as-dict self._get_or_create_pool() # Also retrieves info about pool from DB, including working dir, so must occur here taxi.ensure_path_exists(taxi.expand_path(self.work_dir)) # Dig out working directory if it doesn't exist taxi.ensure_path_exists(taxi.expand_path(self.log_dir)) # Dig out log directory if it doesn;t exist
def __init__(self, db_path): self.db_path = taxi.expand_path(db_path) self._setup_complete = False self._in_context = False with self: pass # Semi-kludgey creation/retrieval of dispatch DB
def _rollback(self, rollback_dir=None, delete_files=False): """Called by Dispatcher.rollback() to roll back this Runner. Removes all output files generated in executing the task (which are stored in self.output_files) by either deleting them (if delete_files) or by moving them to rollback_dir (if specified). """ super(Runner, self)._rollback() if self.output_files is not None and len(self.output_files) > 0: assert not (rollback_dir is None and delete_files == False),\ "Must either provide a rollback_dir to copy files to or give permission to delete_files" if rollback_dir is not None: rollback_dir = expand_path(rollback_dir) if not os.path.exists(rollback_dir): os.makedirs(rollback_dir) # Dig out the rollback directory self.output_files = [ fn for fn in self.output_files if fn is not None ] # Happens when e.g. MCMC passes saveg up, but saveg was None for fn in [str(ss) for ss in self.output_files]: if not os.path.exists(fn): print "Rollback unable to find file: '{0}'".format(fn) continue if rollback_dir is not None: to_path = os.path.join(rollback_dir, os.path.basename(fn)) # Don't clobber any files in the rollback directory -- rename duplicate files like hmc_output(1) counter = 0 while os.path.exists(to_path): counter += 1 new_fn = os.path.basename(fn) + '({0})'.format(counter) to_path = os.path.join(rollback_dir, new_fn) print "Rollback: '{0}' -> '{1}'".format(fn, to_path) shutil.move(fn, to_path) elif delete_files: # Safety: Don't delete files even if granted permission if a rollback_dir is provided print "Rollback: deleting '{0}'".format(fn) os.remove(fn) # Output files are cleared, don't need to keep track of them anymore self.output_files = [] else: print "No output files tracked for task {0} ({1})".format( getattr(self, 'id', None), self)
def __init__(self, db_path, pool_name=None, work_dir=None, log_dir=None, allocation=None, queue=None, thrash_delay=300): """ Argument options: either [db_path(, pool_name, thrash_delay)] are specified, which specifies the location of an existing pool; or, [work_dir, log_dir(, pool_name, thrash_delay)] are specified, which specifies where to create a new pool. If all are specified, then [db_path, pool_name] takes priority (e.g., access existing pool behavior) and the remaining inputs are ignored. If no pool_name is provided when accessing an existing DB, and there is only one pool in the pool DB, then it accesses that one. If more than one pool is present, must specify pool_name. If creating a new pool DB, and pool_name is not specified, names the pool 'default'. queue specifies which queue/machine to submit to, if relevant (e.g., bc or ds on the USQCD machines). """ super(SQLitePool, self).__init__(work_dir=work_dir, log_dir=log_dir, thrash_delay=thrash_delay, allocation=allocation, queue=queue) self.db_path = taxi.expand_path(db_path) if not os.path.exists(self.db_path) and pool_name is None: # Case: Making a new pool but pool_name not provided. Set to default. self.pool_name = 'default' else: # Case: Accessing existing pool. If pool name is not provided, and # only one pool in DB, use that one. If pool name is provided, find that one. self.pool_name = pool_name self.conn = None self._in_context = False with self: pass # Semi-kludgey creation/retrieval of pool DB
def initialize_new_task_pool(self, task_pool, priority_method='canvas', imports=None): """Loads the tasks from task_pool in to an empty dispatcher by compiling the specified tasks (i.e., assigning IDs and priorities, rendering them in to storable format (e.g., JSON)) and storing them in the dispatch (usually a DB). See Dispatcher._assign_priorities for priority_method options. """ ## imports: Dispatcher needs to be able to import relevant runners. ## Convenient default behavior: import the calling script (presumably, the run-spec script) if imports is None: self.imports = [taxi.expand_path(__main__.__file__)] # Import the file that called this pool (presumably, run-spec script) else: self.imports = imports ## Store imports in the dispatch metadata self._store_imports() ## Build dispatch self.trees = self.find_branches(task_pool) self._assign_priorities(task_pool, priority_method=priority_method) self._assign_task_ids(task_pool) self._populate_task_table(task_pool)
def execute(self, cores=None): """Calls the binary specified in self.binary, using mpirun (if self.use_mpi==True) as specified in local_taxi.mpirun_str and feeding the binary the input string generated by build_input_string. Smart behavior regarding output files: - Will not overwrite an existing file unless self.allow_output_clobbering; this is also useful to avoid race conditions where multiple taxis start working on the same task. - Stores the location of all output files written in self.output_files, which can then be used by rollback to remove outputs. """ ## Core logic -- reconcile task cores and taxi cores if cores is None or self.cores is None: if self.cores is not None: cores = self.cores elif cores is None: cores = 1 elif cores < self.cores: print "WARNING: Running with {n0} cores for taxi < {n1} cores for task.".format( n0=cores, n1=self.cores) elif cores > self.cores: print "WARNING: Running with {n1} cores for task < {n0} cores for taxi.".format( n0=cores, n1=self.cores) cores = self.cores ## Prepare to use MPI, if desired if self.use_mpi is not None: use_mpi = self.use_mpi else: use_mpi = cores > 1 if not use_mpi and cores > 1: print "WARNING: use_mpi=False, ignoring cores=%d" % cores if use_mpi: exec_str = local_taxi.mpirun_str.format(cores) + " " else: exec_str = "" ## Non-clobbering behavior if not self.allow_output_clobbering: # Find files that the task intends to save, check if they already exist to_clobber = [] for ofa in output_file_attributes_for_task(self): ofn = getattr(self, ofa, None) if should_save_file(ofn) and os.path.exists(str(ofn)): print "WARNING: File {0}={1} already exists, attempting to verify output.".format( ofa, ofn) to_clobber.append(ofn) if len(to_clobber) > 0: self.verify_output() # Verify output throws an error and blocks rest of function if output isn't correct print "WARNING: Pre-existing well-formatted output (according to verify_output()) detected; skipping running" return # Never clobber ## Keep track of absolute paths of output files created, for rollbacking # For user-friendliness, only have to provide a list of attributes that may contain output filenames # Track these before execution. If output fails, want to have a list of output files that may have been created. for ofa in output_file_attributes_for_task(self): ofn = getattr(self, ofa, None) if should_save_file(ofn): self.output_files.append(expand_path(str(ofn))) ## Construct binary call and execute exec_str += self.binary + " " exec_str += self.build_input_string().strip( ) # Remove leading and trailing whitespace from input string #print "exec:", exec_str os.system(exec_str) # Only keep track of files that were actually created self.output_files = [ ofn for ofn in self.output_files if os.path.exists(str(ofn)) ] ## Verify output self.verify_output()