Пример #1
0
    def get_unbound_inputs(cls, cfg):
        """
        Get the unbound inputs
        """

        cfg = cls.load_cfg(cfg)
        dag = cls.create_dag(cfg)

        # Step parameters
        uinputs = defaultdict(dict)
        for stepname, classname in cfg['dag']['nodes'].iteritems():
            step = Step.create(classname)
            input_keys = step.keys('inputs', req_only=True)
            if input_keys:
                for pred in dag.predecessors(stepname):
                    # Remove any key that is already bound
                    for binding in dag[pred][stepname].get('bindings', []):
                        key = binding.split('.')[1]
                        #maybe it has been already removed
                        if key in input_keys:
                            input_keys.remove(key)

                if input_keys:
                    uinputs[stepname] = input_keys

        return uinputs
Пример #2
0
    def get_unbound_inputs(cls, cfg):
        """
        Get the unbound inputs
        """

        cfg = cls.load_cfg(cfg)
        dag = cls.create_dag(cfg)

        # Step parameters
        uinputs = defaultdict(dict)
        for stepname, classname in cfg['dag']['nodes'].iteritems():
            step = Step.create(classname)
            input_keys = step.keys('inputs', req_only=True)
            if input_keys:
                for pred in dag.predecessors(stepname):
                    # Remove any key that is already bound
                    for binding in dag[pred][stepname].get('bindings', []):
                        key = binding.split('.')[1]
                        #maybe it has been already removed
                        if key in input_keys:
                            input_keys.remove(key)

                if input_keys:
                    uinputs[stepname] = input_keys

        return uinputs
Пример #3
0
    def create_steps(cfg):
        stepobjs = {}
        if 'sys_path' in cfg:
            sys.path.insert(0, cfg['sys_path'])
        for stepname, classname in cfg['dag']['nodes'].iteritems():
            stepobjs[stepname] = Step.create(classname)
        if 'sys_path' in cfg:
            del sys.path[0]

        return stepobjs
Пример #4
0
    def create_steps(cfg):
        stepobjs = {}
        if 'sys_path' in cfg:
            sys.path.insert(0, cfg['sys_path'])
        for stepname, classname in cfg['dag']['nodes'].iteritems():
            stepobjs[stepname] = Step.create(classname)
        if 'sys_path' in cfg:
            del sys.path[0]

        return stepobjs
Пример #5
0
 def get_metainfo(self, step_name):
     """
     Return a dictionary with generic information about pipeline and step
     """
     info = {}
     info['pipeline'] = { 'name':    self.name,
                          'version': self.__version__ }
     info['user'] = { 'login':    self.user,
                      'fullname': pwd.getpwnam(self.user).pw_gecos }
     step_class = self.dag.node[step_name]['class_name']
     stepobj = Step.create(step_class)
     info['step'] = { 'name': step_name,
                      'class': step_class,
                      'version': stepobj.__version__ }
     return info
Пример #6
0
    def get_refgenomes(cls, cfg, unbound=None):
        """
        Return a 2 level dictionary containing the path of the reference
        genome grouped by labels.
        A label is a combination of species, version and variation
            {
                "label1": {
                    "stepname1" : { "input_key1" : "/path1"},
                    "stepname2" : { "input_key1" : "/path2"}
                }
                "label2": {
                    "stepname1" : { "input_key1  : "/path3"},
                    "stepname2" : { "input_key1" : "/path4"}
                }
            }
        The "unbound" dictionary contains the steps that have unbound inputs:
        if set, only those steps will be considered
        """

        refs = defaultdict(dict)
        tools = set()

        # Collect all tools that require a ref. genome
        for stepname, classname in cfg['dag']['nodes'].iteritems():
            if unbound == None or stepname in unbound:
                step = Step.create(classname)
                for ref in step.get_refgenome_tools():
                    tools.add(ref['tool'])
                    refs[stepname][ref['name']] = ref['tool']

        # Get corresponding ref genomes
        refs_by_label = {}
        for ref in mongo.get_refgenomes(tools):
            label = "%s %s" % (ref['_id']['species'], ref['_id']['version'])
            if 'variation' in ref['_id']:
                label += " (%s)" % ref['_id']['variation']
            for stepname in refs:
                if not label in refs_by_label:
                    refs_by_label[label] = {}
                refs_by_label[label][stepname] = {}
                for param_key in refs[stepname]:
                    tool = refs[stepname][param_key]
                    if tool in ref['paths']:
                        refs_by_label[label][stepname][param_key] = ref[
                            'paths'][tool]

        return refs_by_label
Пример #7
0
    def get_refgenomes(cls, cfg, unbound=None):
        """
        Return a 2 level dictionary containing the path of the reference
        genome grouped by labels.
        A label is a combination of species, version and variation
            {
                "label1": {
                    "stepname1" : { "input_key1" : "/path1"},
                    "stepname2" : { "input_key1" : "/path2"}
                }
                "label2": {
                    "stepname1" : { "input_key1  : "/path3"},
                    "stepname2" : { "input_key1" : "/path4"}
                }
            }
        The "unbound" dictionary contains the steps that have unbound inputs:
        if set, only those steps will be considered
        """

        refs = defaultdict(dict)
        tools = set()

        # Collect all tools that require a ref. genome
        for stepname, classname in cfg['dag']['nodes'].iteritems():
            if unbound==None or stepname in unbound:
                step = Step.create(classname)
                for ref in step.get_refgenome_tools():
                    tools.add(ref['tool'])
                    refs[stepname][ref['name']] = ref['tool']

        # Get corresponding ref genomes
        refs_by_label = {}
        for ref in mongo.get_refgenomes(tools):
            label = "%s %s" % (ref['_id']['species'], ref['_id']['version'])
            if 'variation' in ref['_id']:
                label += " (%s)" % ref['_id']['variation']
            for stepname in refs:
                if not label in refs_by_label:
                    refs_by_label[label] = {}
                refs_by_label[label][stepname] = {}
                for param_key in refs[stepname]:
                    tool = refs[stepname][param_key]
                    if tool in ref['paths']:
                        refs_by_label[label][stepname][param_key] = ref['paths'][tool]

        return refs_by_label
Пример #8
0
 def get_metainfo(self, step_name):
     """
     Return a dictionary with generic information about pipeline and step
     """
     info = {}
     info['pipeline'] = {'name': self.name, 'version': self.__version__}
     info['user'] = {
         'login': self.user,
         'fullname': pwd.getpwnam(self.user).pw_gecos
     }
     step_class = self.dag.node[step_name]['class_name']
     stepobj = Step.create(step_class)
     info['step'] = {
         'name': step_name,
         'class': step_class,
         'version': stepobj.__version__
     }
     return info
Пример #9
0
    def validate_config(cls, cfg, user):
        """
        Check if all the config params are ok
        """

        retval = defaultdict(dict)
        s_errors = defaultdict(dict)

        #try:
        cfg = cls.load_cfg(cfg)
        params = cls.get_params(cfg)
        unb_inputs = cls.get_unbound_inputs(cfg)

        #validate step section
        for stepname in params['steps']:
            if stepname is not 'inputs':
                classname = cfg['dag']['nodes'][stepname]
                stepobj = Step.create(classname)
                if stepname in cfg['config']['steps']:
                    required_keys = []
                    required_keys.extend(unb_inputs.get(stepname, []))
                    required_keys.extend(stepobj.keys(['params'], req_only=True))
                    stepcfg = cfg['config']['steps'][stepname]
                    for key in required_keys:
                        if key in stepcfg:
                            param_spec = stepobj.key_spec(key)
                            error_msg = stepobj.validate_value(stepcfg[key], param_spec['type'], param_spec['name'])
                            if error_msg:
                                s_errors[stepname][key] = error_msg
                        else:
                            s_errors[stepname][key] = 'missing value'
                else:
                    for key in stepobj.keys(['params'], req_only=True):
                        s_errors[stepname][key] = 'missing value'
                    if stepname in unb_inputs:
                        for key in unb_inputs[stepname]:
                            s_errors[stepname][key] = 'missing value'


        #validate pipeline section
        p_errors = {}
        if not cfg['config']['pipeline']['project_name']:
            p_errors['project_name'] = 'missing value'

        if not cfg['config']['pipeline']['description']:
            p_errors['description'] = 'missing value'

        if not cfg['config']['pipeline']['output_dir']:
            p_errors['output_dir'] = 'missing value'
        else:
            output_dir = cfg['config']['pipeline']['output_dir']
            if not output_dir.startswith('/'):
                p_errors['output_dir'] = '%s : not an absolute path' % output_dir
            if not isinstance(output_dir, basestring):
                p_errors['output_dir'] = '%s : invalid type, found %s, expected %s' % (output_dir, type(output_dir), 'str')
            #elif not ut.has_write_access(output_dir):
            #    p_errors['output_dir'] = '%s : not writable by user' % (output_dir)

        if s_errors:
            retval['steps'] = s_errors

        if p_errors:
            retval['pipeline'] = p_errors

        return retval
Пример #10
0
    def validate_config(cls, cfg, user):
        """
        Check if all the config params are ok
        """

        retval = defaultdict(dict)
        s_errors = defaultdict(dict)

        #try:
        cfg = cls.load_cfg(cfg)
        params = cls.get_params(cfg)
        unb_inputs = cls.get_unbound_inputs(cfg)

        #validate step section
        for stepname in params['steps']:
            if stepname is not 'inputs':
                classname = cfg['dag']['nodes'][stepname]
                stepobj = Step.create(classname)
                if stepname in cfg['config']['steps']:
                    required_keys = []
                    required_keys.extend(unb_inputs.get(stepname, []))
                    required_keys.extend(
                        stepobj.keys(['params'], req_only=True))
                    stepcfg = cfg['config']['steps'][stepname]
                    for key in required_keys:
                        if key in stepcfg:
                            param_spec = stepobj.key_spec(key)
                            error_msg = stepobj.validate_value(
                                stepcfg[key], param_spec['type'],
                                param_spec['name'])
                            if error_msg:
                                s_errors[stepname][key] = error_msg
                        else:
                            s_errors[stepname][key] = 'missing value'
                else:
                    for key in stepobj.keys(['params'], req_only=True):
                        s_errors[stepname][key] = 'missing value'
                    if stepname in unb_inputs:
                        for key in unb_inputs[stepname]:
                            s_errors[stepname][key] = 'missing value'

        #validate pipeline section
        p_errors = {}
        if not cfg['config']['pipeline']['project_name']:
            p_errors['project_name'] = 'missing value'

        if not cfg['config']['pipeline']['description']:
            p_errors['description'] = 'missing value'

        if not cfg['config']['pipeline']['output_dir']:
            p_errors['output_dir'] = 'missing value'
        else:
            output_dir = cfg['config']['pipeline']['output_dir']
            if not output_dir.startswith('/'):
                p_errors[
                    'output_dir'] = '%s : not an absolute path' % output_dir
            if not isinstance(output_dir, basestring):
                p_errors[
                    'output_dir'] = '%s : invalid type, found %s, expected %s' % (
                        output_dir, type(output_dir), 'str')
            #elif not ut.has_write_access(output_dir):
            #    p_errors['output_dir'] = '%s : not writable by user' % (output_dir)

        if s_errors:
            retval['steps'] = s_errors

        if p_errors:
            retval['pipeline'] = p_errors

        return retval