Exemplo n.º 1
0
    def distribute(self):
        """
        Submit the step to the scheduler parallelizing the iterable inputs
        """

        self.status = JOB_STATUS.QUEUED
        #initialize the scheduler
        if self.local_step:
            self.scheduler = get_scheduler("SCHED_LOCAL")
        else:
            self.scheduler = scheduler

        if Step.__cfg_is_changed(self.cfg):
            Step.__write_cfg_file(self.cfg)
            Step.__remove_pickle(self.cfg)
        elif self.is_pickled():
            self.load_pickle()
            if self.status == JOB_STATUS.SUCCEEDED:
                self.log.info(
                    'Skipping step %s: configuration has not been changed' %
                    self.name)
                return len(self.jobs)

        iterables = self.get_iterables()
        if iterables:
            # Step needs to be distributed
            for iterable in iterables:
                # If this is a file, convert it to list from file contents
                iterable_input = self.cfg.get(iterable, [])
                if not hasattr(iterable_input, '__iter__') \
                   and os.path.exists(iterable_input):
                    with open(iterable_input) as f:
                        self.cfg[iterable] = f.read().splitlines()
            for index in range(0, len(self.cfg[iterables[0]])):
                #copy the config file
                job_cfg = copy.deepcopy(self.cfg)
                #copy the iterable specific to the job
                for iterable in iterables:
                    if iterable in self.cfg and self.cfg[
                            iterable]:  # permit a null file
                        job_cfg[iterable] = self.cfg[iterable][index]
                job_cfg['meta']['pipeline'] = self.cfg['meta']['pipeline']
                job_cfg['meta']['step'] = self.cfg['meta']['step']
                for key, value in self.cfg['meta']['job'].iteritems():
                    job_cfg['meta']['job'][key] = value[index]
                self.submit_job(job_cfg)
        else:
            job_cfg = copy.deepcopy(self.cfg)
            self.submit_job(job_cfg)
        return len(self.jobs)
Exemplo n.º 2
0
    def distribute(self):
        """
        Submit the step to the scheduler parallelizing the iterable inputs
        """

        self.status = JOB_STATUS.QUEUED
        #initialize the scheduler
        if self.local_step:
            self.scheduler = get_scheduler("SCHED_LOCAL")
        else:
            self.scheduler = scheduler

        if Step.__cfg_is_changed(self.cfg):
            Step.__write_cfg_file(self.cfg)
            Step.__remove_pickle(self.cfg)
        elif self.is_pickled():
            self.load_pickle()
            if self.status == JOB_STATUS.SUCCEEDED:
                self.log.info('Skipping step %s: configuration has not been changed' % self.name)
                return len(self.jobs)

        iterables = self.get_iterables()
        if iterables:
            # Step needs to be distributed
            for iterable in iterables:
                # If this is a file, convert it to list from file contents
                iterable_input = self.cfg.get(iterable,[])
                if not hasattr(iterable_input, '__iter__') \
                   and os.path.exists(iterable_input):
                    with open(iterable_input) as f:
                        self.cfg[iterable] = f.read().splitlines()
            for index in range(0, len(self.cfg[iterables[0]])):
                #copy the config file
                job_cfg = copy.deepcopy(self.cfg)
                #copy the iterable specific to the job
                for iterable in iterables:
                    if iterable in self.cfg and self.cfg[iterable]: # permit a null file
                        job_cfg[iterable] = self.cfg[iterable][index]
                job_cfg['meta']['pipeline'] = self.cfg['meta']['pipeline']
                job_cfg['meta']['step'] = self.cfg['meta']['step']
                for key, value in self.cfg['meta']['job'].iteritems():
                    job_cfg['meta']['job'][key] = value[index]
                self.submit_job(job_cfg)
        else:
            job_cfg = copy.deepcopy(self.cfg)
            self.submit_job(job_cfg)
        return len(self.jobs)
Exemplo n.º 3
0
    def __init__(self):
        self.bootstrap = STARTUP_CYCLE
        self.status = JOB_STATUS.QUEUED
        self.meta = {'pipeline': {}, 'step': {}, 'job': {}}
        self.requirements = {'memory': '1', 'cpus': '1'}
        self.output_dir = '.'
        self.jobs = OrderedDict()
        self.cmd_count = 0

        logger.set_stdout_level(logger.DEBUG)
        self.log = logger.get_log()

        # parse specs and create keys
        self.spec["name"] = self.__module__.replace('nespipe.steps.',
                                                    '').split('.')[-1]
        self.name = self.spec["name"]
        self.__version__ = self.spec['version']

        self.local_step = self.spec.get('local', False)
        global scheduler
        if self.local_step:
            self.scheduler = get_scheduler("SCHED_LOCAL")
        else:
            self.scheduler = scheduler

        for k, v in self.spec["args"].iteritems():
            for param in v:
                if param.get('name', None):
                    setattr(self, param['name'], param.get('value', []))

        ut.dict_update(
            self.requirements,
            self.spec.get('requirements', {
                'memory': '1',
                'cpus': '1'
            }))
        for k, v in self.requirements.iteritems():
            setattr(self, k, int(v))

        #set the jvm memory
        if 'memory' in self.requirements:
            self.jvm_memory = int(int(self.requirements['memory']) * 0.9)
            if not self.jvm_memory:
                self.jvm_memory = 1
Exemplo n.º 4
0
    def __init__(self):
        self.bootstrap = STARTUP_CYCLE
        self.status = JOB_STATUS.QUEUED
        self.meta = { 'pipeline':{}, 'step':{}, 'job':{}}
        self.requirements = {'memory' : '1', 'cpus' : '1'}
        self.output_dir = '.'
        self.jobs = OrderedDict()
        self.cmd_count = 0

        logger.set_stdout_level(logger.DEBUG)
        self.log = logger.get_log()

        # parse specs and create keys
        self.spec["name"] = self.__module__.replace('nespipe.steps.','').split('.')[-1]
        self.name = self.spec["name"]
        self.__version__ = self.spec['version']

        self.local_step = self.spec.get('local', False)
        global scheduler
        if self.local_step:
            self.scheduler = get_scheduler("SCHED_LOCAL")
        else:
            self.scheduler = scheduler

        for k, v in self.spec["args"].iteritems():
            for param in v:
                if param.get('name', None):
                    setattr(self, param['name'], param.get('value', []))

        ut.dict_update(self.requirements, self.spec.get('requirements', {'memory' : '1', 'cpus' : '1'}))
        for k, v in self.requirements.iteritems():
            setattr(self, k, int(v))

        #set the jvm memory
        if 'memory' in self.requirements:
            self.jvm_memory = int(int(self.requirements['memory']) * 0.9)
            if not self.jvm_memory:
                self.jvm_memory = 1
Exemplo n.º 5
0
def set_scheduler(schedname):
    global scheduler
    scheduler = get_scheduler(schedname)
Exemplo n.º 6
0
from nespipe.core.constants import *
from nespipe.core.schedulers import get_scheduler
from nespipe.core.constants import *

NOT_DB_ATTR = [
    'cfg', 'reqs', 'jvm_memory', 'memory', 'cpus', 'jobs', 'sys_path',
    'step_class', 'cmd_count'
    'name', 'local_step', '__version__', 'scheduler', 'log'
]

STEP_PICKLE = '.status.pickle'
ITERABLE_TYPE = 'input_key_iterable'

STARTUP_CYCLE = 50

scheduler = get_scheduler()


def set_scheduler(schedname):
    global scheduler
    scheduler = get_scheduler(schedname)


class Step(object):
    """
    Base class for any step

    Members:
    - status: current status of the step
    - parameters: dictionary containing the definition of the parameters
                  N.B. The actual values are stored as members
Exemplo n.º 7
0
def set_scheduler(schedname):
    global scheduler
    scheduler = get_scheduler(schedname)
Exemplo n.º 8
0
from nespipe.core.constants import *



NOT_DB_ATTR = ['cfg', 'reqs', 'jvm_memory', 'memory', 'cpus', 
               'jobs', 'sys_path', 'step_class', 'cmd_count'
               'name', 'local_step', '__version__', 'scheduler', 'log']


STEP_PICKLE = '.status.pickle'
ITERABLE_TYPE = 'input_key_iterable'

STARTUP_CYCLE = 50


scheduler = get_scheduler()

def set_scheduler(schedname):
    global scheduler
    scheduler = get_scheduler(schedname)


class Step(object):
    """
    Base class for any step

    Members:
    - status: current status of the step
    - parameters: dictionary containing the definition of the parameters
                  N.B. The actual values are stored as members
    - meta: dictionary containing the metadata information