Exemplo n.º 1
0
class RunManager(object):
    def __init__(self, event_list, event, config, filemanager):

        self.config = config
        self.account = config['global'].get('account', '')
        self.event_list = event_list
        self.filemanager = filemanager
        self.dryrun = True if config['global']['dryrun'] == True else False
        self.debug = True if config['global']['debug'] == True else False
        self._resource_path = config['global']['resource_path']
        """
        A list of cases, dictionaries structured as:
            case (str): the full case name
            jobs (list): a list of job.Jobs
            short_name (str): the short name of the case
        """
        self.cases = list()

        self.running_jobs = list()
        self.kill_event = event
        self._job_total = 0
        self._job_complete = 0

        self.slurm = Slurm()
        max_jobs = config['global']['max_jobs']
        self.max_running_jobs = max_jobs if max_jobs else self.slurm.get_node_number(
        ) * 6
        while self.max_running_jobs == 0:
            sleep(1)
            msg = 'Unable to communication with scontrol, checking again'
            print_line(msg, event_list)
            logging.error(msg)
            self.max_running_jobs = self.slurm.get_node_number() * 6

    def check_max_running_jobs(self):
        """
        Checks if the maximum number of jobs are running

        Returns True if the max or more are running, false otherwise
        """
        try:
            job_info = self.slurm.queue()
        except:
            return True
        else:
            running_jobs = 0
            for job in job_info:
                if job['STATE'] in ['R', 'PD']:
                    running_jobs += 1
                if running_jobs >= self.max_running_jobs:
                    return True
            return False

    def add_pp_type_to_cases(self,
                             freqs,
                             job_type,
                             start,
                             end,
                             case,
                             run_type=None):
Exemplo n.º 2
0
    def test_batch(self):
        print '\n'
        print_message(
            '---- Starting Test: {} ----'.format(inspect.stack()[0][3]), 'ok')
        slurm = Slurm()
        command = os.path.join('tests', 'test_slurm_batch.sh')
        job_id = slurm.batch(command, '-n 1 -N 1')
        self.assertTrue(job_id)
        self.assertTrue(isinstance(job_id, int))

        info = slurm.showjob(job_id)
        allowed_states = ['PENDING', 'RUNNING', 'COMPLETE', 'COMPLETING']
        self.assertTrue(info['JobState'] in allowed_states)

        info = slurm.queue()
        in_queue = False
        for item in info:
            if int(item['JOBID']) == job_id:
                in_queue = True
                self.assertTrue(item['STATE'] in ['PD', 'R'])
                break
        self.assertTrue(in_queue)
        slurm.cancel(job_id)