Exemplo n.º 1
0
 def should_taxis_be_running(self, taxi_list):
     """Determines whether tasks are available for each taxi to run.
     
     Taxis should be run if there are trunks available for them.  If there are
     no active trunks in the forest, but there are tasks ready to run, tells
     Pool to run enough taxis to work on all ready tasks.
     
     Args:
         taxi_list: List of taxi objects; are there tasks available for these taxis to run?
     Returns:
         Dictionary like {(taxi object) : (should taxi be running?)}
     """
     
     task_blob = self.get_all_tasks(None, include_complete=False) # dict(id:task)
 
     # There's nothing we can do with errored E or held H taxis
     taxi_list = [t for t in taxi_list if t.status in ['Q', 'R', 'I']] # Only want queued, running, or idle taxis
     
     # We only care taxis running on this dispatch
     taxi_list = [t for t in taxi_list if taxi.expand_path(t.dispatch_path) == taxi.expand_path(self.db_path)]
     
     # Convenient dictionary like {(name of taxi) : (taxi object)}
     taxi_dict = {}
     for my_taxi in taxi_list:
         taxi_dict[str(my_taxi)] = my_taxi
 
     # Initial desired state is the present state -- idle taxis idle, active taxis active
     # Desired state is a dict like { str(taxi_name) : (should taxi be active?) }
     desired_state = {}
     for my_taxi in taxi_list:
         desired_state[str(my_taxi)] = my_taxi.status in ['Q', 'R'] # Active means queued or running
     
     # If taxi has a trunk only it can run, or some tasks are ready that only this taxi can run, it must be running
     for my_taxi in taxi_list:
         if self._trunk_number(task_blob, for_taxi=my_taxi) > 0:
             desired_state[str(my_taxi)] = True
         if self._N_ready_tasks(task_blob, for_taxi=my_taxi) > 0:
             desired_state[str(my_taxi)] = True
             
     # With taxi-specific requirements imposed, now just make sure we have enough taxis running
     active_taxis = [taxi_dict[k] for (k,v) in desired_state.items() if v]
     idle_taxis = [taxi_dict[k] for (k,v) in desired_state.items() if not v]
     
     N_active_taxis = len(active_taxis)
     
     N_active_trunks = self._trunk_number(task_blob)
     
     # Even without trunks, if we have tasks that are ready, we need at least one taxi
     N_ready_tasks = self._N_ready_tasks(task_blob)
     if N_active_trunks == 0 and N_ready_tasks:
         N_active_trunks = N_ready_tasks # Correct behavior for trunkless task forests
     
     # Activate idle taxis until we have enough
     for my_taxi in idle_taxis:
         if N_active_taxis >= N_active_trunks:
             break # We have enough taxis running
         desired_state[str(my_taxi)] = True
         N_active_taxis += 1
         
     return desired_state
Exemplo n.º 2
0
    def test_read_log_dirs(self):
        dupe_pool_obj = SQLitePool(self.test_filename, 'test_pool')

        with self.test_pool:
            pass

        with dupe_pool_obj:
            self.assertEqual(dupe_pool_obj.work_dir,
                             taxi.expand_path('./tests/work/'))
            self.assertEqual(dupe_pool_obj.log_dir,
                             taxi.expand_path('./tests/log/'))
Exemplo n.º 3
0
    def __init__(self, work_dir, log_dir, thrash_delay=300, allocation=None, queue=None):
        self.work_dir = taxi.expand_path(work_dir)
        self.log_dir = taxi.expand_path(log_dir)

        ## thrash_delay sets the minimum time between taxi resubmissions, in seconds.
        ## Default is 5 minutes.
        self.thrash_delay = thrash_delay
        
        # Allocation to run on
        self.allocation = allocation
        
        # Queue to submit to
        self.queue = queue
Exemplo n.º 4
0
 def __enter__(self):
     """Context interface: connect to SQLite Pool DB.  If performing multiple operations,
     faster to leave a "connection" open than to open and close it repeatedly; dangerous
     to leave a connection open constantly."""
     # Don't allow layered entry
     if self._in_context:
         return
     self._in_context = True
     
     self.conn = sqlite3.connect(self.db_path, timeout=30.0)
     self.conn.row_factory = sqlite3.Row # Row factory for return-as-dict
     
     self._get_or_create_pool() # Also retrieves info about pool from DB, including working dir, so must occur here
     
     taxi.ensure_path_exists(taxi.expand_path(self.work_dir)) # Dig out working directory if it doesn't exist
     taxi.ensure_path_exists(taxi.expand_path(self.log_dir)) # Dig out log directory if it doesn;t exist
Exemplo n.º 5
0
 def __init__(self, db_path):
     self.db_path = taxi.expand_path(db_path)
     self._setup_complete = False
     
     self._in_context = False
     
     with self:
         pass # Semi-kludgey creation/retrieval of dispatch DB
Exemplo n.º 6
0
    def _rollback(self, rollback_dir=None, delete_files=False):
        """Called by Dispatcher.rollback() to roll back this Runner.
        Removes all output files generated in executing the task (which are
        stored in self.output_files) by either deleting them (if delete_files) or
        by moving them to rollback_dir (if specified).
        """
        super(Runner, self)._rollback()

        if self.output_files is not None and len(self.output_files) > 0:
            assert not (rollback_dir is None and delete_files == False),\
                "Must either provide a rollback_dir to copy files to or give permission to delete_files"

            if rollback_dir is not None:
                rollback_dir = expand_path(rollback_dir)
                if not os.path.exists(rollback_dir):
                    os.makedirs(rollback_dir)  # Dig out the rollback directory

            self.output_files = [
                fn for fn in self.output_files if fn is not None
            ]  # Happens when e.g. MCMC passes saveg up, but saveg was None

            for fn in [str(ss) for ss in self.output_files]:

                if not os.path.exists(fn):
                    print "Rollback unable to find file: '{0}'".format(fn)
                    continue

                if rollback_dir is not None:
                    to_path = os.path.join(rollback_dir, os.path.basename(fn))

                    # Don't clobber any files in the rollback directory -- rename duplicate files like hmc_output(1)
                    counter = 0
                    while os.path.exists(to_path):
                        counter += 1
                        new_fn = os.path.basename(fn) + '({0})'.format(counter)
                        to_path = os.path.join(rollback_dir, new_fn)

                    print "Rollback: '{0}' -> '{1}'".format(fn, to_path)
                    shutil.move(fn, to_path)

                elif delete_files:
                    # Safety: Don't delete files even if granted permission if a rollback_dir is provided
                    print "Rollback: deleting '{0}'".format(fn)
                    os.remove(fn)

            # Output files are cleared, don't need to keep track of them anymore
            self.output_files = []
        else:
            print "No output files tracked for task {0} ({1})".format(
                getattr(self, 'id', None), self)
Exemplo n.º 7
0
    def __init__(self, db_path, pool_name=None,
                 work_dir=None, log_dir=None,
                 allocation=None, queue=None,
                 thrash_delay=300):
        """
        Argument options: either [db_path(, pool_name, thrash_delay)] are specified,
        which specifies the location of an existing pool; or,
        [work_dir, log_dir(, pool_name, thrash_delay)] are specified, which specifies
        where to create a new pool.  If all are specified,
        then [db_path, pool_name] takes priority (e.g., access existing pool
        behavior) and the remaining inputs are ignored.
        
        If no pool_name is provided when accessing an existing DB, and there is only
        one pool in the pool DB, then it accesses that one.  If more than one pool
        is present, must specify pool_name.
        
        If creating a new pool DB, and pool_name is not specified, names the pool
        'default'.
        
        queue specifies which queue/machine to submit to, if relevant
        (e.g., bc or ds on the USQCD machines).
        """
        super(SQLitePool, self).__init__(work_dir=work_dir, log_dir=log_dir,
             thrash_delay=thrash_delay, allocation=allocation, queue=queue)

        self.db_path = taxi.expand_path(db_path)
            
        if not os.path.exists(self.db_path) and pool_name is None:
            # Case: Making a new pool but pool_name not provided.  Set to default.
            self.pool_name = 'default'
        else:
            # Case: Accessing existing pool. If pool name is not provided, and
            # only one pool in DB, use that one.  If pool name is provided, find that one.
            self.pool_name = pool_name

        self.conn = None
        
        self._in_context = False
        
        with self:
            pass # Semi-kludgey creation/retrieval of pool DB
Exemplo n.º 8
0
 def initialize_new_task_pool(self, task_pool, priority_method='canvas', imports=None):
     """Loads the tasks from task_pool in to an empty dispatcher by compiling
     the specified tasks (i.e., assigning IDs and priorities, rendering them in
     to storable format (e.g., JSON)) and storing them in the dispatch (usually a DB).
     
     See Dispatcher._assign_priorities for priority_method options.
     """
     ## imports: Dispatcher needs to be able to import relevant runners.
     ## Convenient default behavior: import the calling script (presumably, the run-spec script)
     if imports is None:
         self.imports = [taxi.expand_path(__main__.__file__)] # Import the file that called this pool (presumably, run-spec script)
     else:
         self.imports = imports
     ## Store imports in the dispatch metadata
     self._store_imports()
         
     ## Build dispatch
     self.trees = self.find_branches(task_pool)
     self._assign_priorities(task_pool, priority_method=priority_method)
     self._assign_task_ids(task_pool)
     self._populate_task_table(task_pool)
Exemplo n.º 9
0
    def execute(self, cores=None):
        """Calls the binary specified in self.binary, using mpirun (if self.use_mpi==True)
        as specified in local_taxi.mpirun_str and feeding the binary the input string
        generated by build_input_string.
        
        Smart behavior regarding output files:
            - Will not overwrite an existing file unless self.allow_output_clobbering;
            this is also useful to avoid race conditions where multiple taxis start
            working on the same task.
            - Stores the location of all output files written in self.output_files,
            which can then be used by rollback to remove outputs.
        """
        ## Core logic -- reconcile task cores and taxi cores
        if cores is None or self.cores is None:
            if self.cores is not None:
                cores = self.cores
            elif cores is None:
                cores = 1
        elif cores < self.cores:
            print "WARNING: Running with {n0} cores for taxi < {n1} cores for task.".format(
                n0=cores, n1=self.cores)
        elif cores > self.cores:
            print "WARNING: Running with {n1} cores for task < {n0} cores for taxi.".format(
                n0=cores, n1=self.cores)
            cores = self.cores

        ## Prepare to use MPI, if desired
        if self.use_mpi is not None:
            use_mpi = self.use_mpi
        else:
            use_mpi = cores > 1

        if not use_mpi and cores > 1:
            print "WARNING: use_mpi=False, ignoring cores=%d" % cores

        if use_mpi:
            exec_str = local_taxi.mpirun_str.format(cores) + " "
        else:
            exec_str = ""

        ## Non-clobbering behavior
        if not self.allow_output_clobbering:
            # Find files that the task intends to save, check if they already exist
            to_clobber = []
            for ofa in output_file_attributes_for_task(self):
                ofn = getattr(self, ofa, None)
                if should_save_file(ofn) and os.path.exists(str(ofn)):
                    print "WARNING: File {0}={1} already exists, attempting to verify output.".format(
                        ofa, ofn)
                    to_clobber.append(ofn)

            if len(to_clobber) > 0:
                self.verify_output()
                # Verify output throws an error and blocks rest of function if output isn't correct
                print "WARNING: Pre-existing well-formatted output (according to verify_output()) detected; skipping running"
                return  # Never clobber

        ## Keep track of absolute paths of output files created, for rollbacking
        # For user-friendliness, only have to provide a list of attributes that may contain output filenames
        # Track these before execution. If output fails, want to have a list of output files that may have been created.
        for ofa in output_file_attributes_for_task(self):
            ofn = getattr(self, ofa, None)
            if should_save_file(ofn):
                self.output_files.append(expand_path(str(ofn)))

        ## Construct binary call and execute
        exec_str += self.binary + " "
        exec_str += self.build_input_string().strip(
        )  # Remove leading and trailing whitespace from input string

        #print "exec:", exec_str
        os.system(exec_str)

        # Only keep track of files that were actually created
        self.output_files = [
            ofn for ofn in self.output_files if os.path.exists(str(ofn))
        ]

        ## Verify output
        self.verify_output()