示例#1
0
def submit_driver(
        mvid, project, dirs, extra_arguments=None, driver_arguments=None):
    """ Submit the cascade driver job. This job will run jobmon, which manages
    all subsequent cascade jobs.

    Args:
        mvid (str): model version ID
        project (str): The name of the proj, eg. ``proj_dismod``
        dirs (dict): Dictionary of directory locations.
        extra_arguments (List[str]): command-line arguments to add to
            every Jobmon job.
        driver_arguments (List[str]): command-line arguments just for
            the driver.py job that gets launched here.
    """
    logdir = dirs['logdir']
    gfile = os.path.join(settings['code_dir'], "driver.py")
    jobname = 'dm_%s_driver' % mvid
    slots, memory, _runtime = sge.cluster_limits('driver', mvm=None)
    extra_arguments = extra_arguments if extra_arguments else list()
    driver_arguments = driver_arguments if driver_arguments else list()

    sge.qsub_w_retry(
        gfile,
        jobname,
        jobtype='python',
        project=project,
        slots=slots,
        memory=memory,
        parameters=[mvid] + driver_arguments + extra_arguments,
        conda_env=settings['conda_env'],
        environment_variables=settings['env_variables'],
        prepend_to_path=os.path.join(settings['conda_root'], 'bin'),
        stderr='%s/%s.error' % (logdir, jobname))
示例#2
0
 def submit_varnish(self, hold_jids):
     """Submits a job that 'varnishes' this run, meaning it:
         1. Uploads fits
         2. Uploads adjusted data
         3. Computes fit statistics
         4. Uploads fit statistics
         5. Attempts to generate diagnostic plots
         5. Computes finals
         6. Uploads finals
         7. Updates the status of the model to finished
     """
     varn_jobname = 'dm_%s_varnish' % (self.mvid)
     varn_jid = sge.qsub_w_retry(
         finfile,
         varn_jobname,
         project=self.project,
         slots=35,
         memory=180,
         parameters=[self.mvid],
         holds=hold_jids,
         conda_env=settings['conda_env'],
         prepend_to_path=os.path.join(settings['conda_root'], 'bin'),
         environment_variables=settings['env_variables'],
         stderr='%s/%s.error' % (self.logdir, varn_jobname))
     return varn_jid
示例#3
0
 def submit_jobtree(self, cv_iter):
     """Submits a jobtree, which manages a given full/cross-validation
     run from global on down through the cascade"""
     jobname = 'dm_{}_G{}'.format(self.mvid, cv_iter)
     jid = sge.qsub_w_retry(
         gfile,
         jobname,
         project=self.project,
         slots=20,
         memory=40,
         parameters=[self.mvid, '--submit_stage', 'jt', '--cv_iter',
                     cv_iter],
         conda_env=settings['conda_env'],
         prepend_to_path=os.path.join(settings['conda_root'], 'bin'),
         environment_variables=settings['env_variables'],
         stderr='{}/{}.error'.format(self.logdir, jobname),
         stdout='{}/{}.stdout'.format(self.logdir, jobname))
     return jid
示例#4
0
 def resubmit_self_check(self, hold_jids):
     """Submits a job that checks that all child location-year-sex groups
     have run succesfully. If any have failed, it resubmits the below-global
     levels of the cascade (i.e. the submit_cascade function)"""
     jobname = 'dm_{}_G{}'.format(self.mvid, self.cv_iter_id)
     jid = sge.qsub_w_retry(
         gfile,
         jobname,
         project=self.project,
         slots=20,
         memory=40,
         holds=hold_jids,
         parameters=[self.mvid, '--submit_stage', 'jt', '--cv_iter',
                     self.cv_iter_id],
         conda_env=settings['conda_env'],
         prepend_to_path=os.path.join(settings['conda_root'], 'bin'),
         environment_variables=settings['env_variables'],
         stderr='{}/{}.error'.format(self.logdir, jobname))
     return jid
示例#5
0
def submit_global(mvid, project, dirs):
    """ Submit the global dismod_ode job. This job will attempt to run
    the entire cascade."""
    logdir = dirs['logdir']
    gfile = os.path.join(settings['code_dir'], "run_global.py")
    jobname = 'dm_%s_boot' % mvid
    jid = sge.qsub_w_retry(gfile,
                           jobname,
                           jobtype='python',
                           project=project,
                           slots=15,
                           memory=30,
                           parameters=[mvid],
                           conda_env=settings['conda_env'],
                           environment_variables=settings['env_variables'],
                           prepend_to_path=os.path.join(
                               settings['conda_root'], 'bin'),
                           stderr='%s/%s.error' % (logdir, jobname))
    return jid
示例#6
0
 def dependent_submit(location_id, hold_ids):
     node = self.cascade.loctree.get_node_by_id(location_id)
     num_children = len(node.children)
     if num_children == 0:
         return 0
     else:
         jids = []
         for y in demo.year_ids:
             job_name = "dm_%s_%s_%s_%s_%s" % (self.mvid,
                                               location_id,
                                               sex[0],
                                               str(y)[2:],
                                               self.cv_iter_id)
             if location_id == 1:
                 num_slots = 20
             else:
                 num_slots = min(20, num_children * 2)
             if ((location_id, sex, y, self.cv_iter_id) in
                     incomplete_jobs):
                 params = [self.mvid, location_id, sex, y,
                           self.cv_iter_id]
                 jid = sge.qsub_w_retry(
                     cfile,
                     job_name,
                     project=self.project,
                     holds=hold_ids,
                     slots=num_slots,
                     memory=int(math.ceil(num_slots * 2.5)),
                     parameters=params,
                     conda_env=settings['conda_env'],
                     prepend_to_path=os.path.join(
                         settings['conda_root'], 'bin'),
                     environment_variables=(
                         settings['env_variables']),
                     stderr='%s/%s.error' % (self.logdir,
                                             job_name))
                 jids.append(jid)
                 all_jids.append(jid)
         for c in node.children:
             dependent_submit(c.id, jids)
示例#7
0
 def dependent_submit(location_id, hold_ids):
     node = loctree.get_node_by_id(location_id)
     num_children = len(node.children)
     if num_children == 0:
         return 0
     else:
         if (location_id, sex, y) in run_set:
             job_name = "casc_%s_%s_%s" % (location_id, sex[0],
                                           str(y)[2:])
             num_slots = min(8, num_children)
             jid = sge.qsub_w_retry(
                 runfile,
                 job_name,
                 holds=hold_ids,
                 slots=num_slots,
                 memory=num_slots * 2,
                 parameters=[mvid, location_id, sex, y])
             jid = [jid]
         else:
             jid = []
         for c in node.children:
             dependent_submit(c.id, jid)