Exemplo n.º 1
0
    def setUp(self):
        usage=""
        RnaseqGlobals.initialize(usage)

        self.db_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),RnaseqGlobals.conf_value('testing','test_db'))
        readset=Readset(name='readset', db_file=self.db_file).load()

        self.readset=readset
        if not readset.table_exists():
            raise ProgrammerGoof("table %s doesn't exist" % readset.tablename())
Exemplo n.º 2
0
    def setUp(self):
        usage="testing: "+__file__
        RnaseqGlobals.initialize(usage)

        self.db_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),RnaseqGlobals.conf_value('testing','test_db'))
        readset=Readset(name='readset', db_file=self.db_file).load()
        self.readset=readset
        #print "readset is %s" % readset
        

        readset.execute(sql)
        print "table %s created" % readset.tablename()
Exemplo n.º 3
0
    def make_run_objects(self, session):
        #self=self.store_db()
        try: verbose=os.environ['DEBUG']
        except: debug=False
        
        # create the pipeline_run object:
        try: 
            label=RnaseqGlobals.conf_value('label') or self.readset.label
        except AttributeError as ae:
            raise UserError("No label defined.  Please specify a label for the pipeline run, either in the readset or using the '--label' command line option")

        pipeline_run=PipelineRun(status='standby',
                                 input_file=', '.join(self.readset.reads_files),
                                 user=RnaseqGlobals.conf_value('user'),
                                 label=label,
                                 working_dir=self.readset.working_dir)

        self.pipeline_runs.append(pipeline_run)
#        print "mro: self.pipeline_runs is %s" % ", ".join(str(x.id) for x in self.pipeline_runs)
#        try: warn("pipeline.id is %s" % pipeline.id)
#        except: warn("pipeline has no id")
        self=session.merge(self)
        session.commit()                
        if pipeline_run.id==None:
            raise ProgrammerGoof("no id in %s" % pipeline_run)

        self.context.pipeline_run_id=pipeline_run.id
        RnaseqGlobals.set_conf_value('pipeline_run_id',pipeline_run.id)
        
        # create step_run objects:
        step_runs={}
        for step in self.steps:
            if step.is_prov_step: continue
            step_run=StepRun(step_name=step.name, status='standby')
            for output in step.output_list():
                output=evoque_template(output, step, self.readset)
                step_run.file_outputs.append(FileOutput(path=output))

            if step.skip:               # as set by self.set_steps_current()
                if debug: print "step %s is current, skipping" % step.name
                step_run.status='skipped'
                step_run.success=True

            pipeline_run.step_runs.append(step_run)
            session.commit()
            pipeline_run.step_runs.append(step_run) # maintains list in db as well
            step_runs[step.name]=step_run
            self.context.step_runs[step.name]=step_run

        session.commit()
        return (pipeline_run, step_runs)
Exemplo n.º 4
0
    def setUp(self):
        usage=""
        RnaseqGlobals.initialize(usage)

        self.db_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),RnaseqGlobals.conf_value('testing','test_db'))
        readset=Readset(name='readset', db_file=self.db_file).load()

        self.readset=readset
        #print "readset is %s" % readset
        
        sql="DROP TABLE IF EXISTS %s" % readset.tablename()
        readset.execute(sql)
        sql="CREATE TABLE %s (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR[255], description TEXT)" % readset.tablename()
        readset.execute(sql)
Exemplo n.º 5
0
    def verify_exes(self):
        dirs=RnaseqGlobals.conf_value('rnaseq', 'path').split(":")
        dirs.extend([os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),'programs')])
            
        errors=[]
        for step in self.steps:
            if not step.verify_exe():
                errors.append("Missing executable in step %s: %s" %(step.name, step.exe))
                
        if len(errors)>0:
            errors.append("Please link these executables from the %s/programs directory, or make sure they are on the path defined in the config file." \
                          % RnaseqGlobals.conf_value('rnaseq', 'root_dir'))

        return errors
Exemplo n.º 6
0
    def __init__(self,**args):
        dict_like.__init__(self,**args)
        try:
            self.db_file=args['db_file']
        except KeyError:
            a=RnaseqGlobals.conf_value('rnaseq', 'root_dir')
            b=RnaseqGlobals.conf_value('db','db_name')
            if (a==None or b==None):
                raise ProgrammerGoof("RnaseqGlobals not initialized")
            self.db_file=os.path.join(a,b)

        self.connect()                  # should this really be called in the constructor?
        self.cursor=self.dbh.cursor()
        assert(self.columns)
Exemplo n.º 7
0
    def sh_cmdline(self):
        try:
            usage=self['usage']
            if usage==None:
                usage=''
        except KeyError:
            usage=''

        # look for exe in path, unless exe is an absolute path
        try:
            if os.path.abspath(self['exe'])!=self['exe']:
                self['exe']=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'), 'programs', self['exe'])
        except KeyError as ae:          # not all steps have self['exe']; eg header, footer
            pass


        try:
            return usage % self   

        # fixme: you don't really know what you're doing in these except blocks...
        except KeyError as e:
            raise ConfigError("Missing value %s in\n%s" % (e.args, self.name))
        except AttributeError as e:
            raise ConfigError("Missing value %s in\n%s" % (e.args, self.name))
        except ValueError as e:
            warn(e)
            warn("%s.usage: %s" % (self.name,usage))
            raise "%s.keys(): %s" % (self.name, ", ".join(self.__dict__.keys()))
        except TypeError as te:
            raise ConfigError("step %s: usage='%s': %s" % (self.name, usage, te))
Exemplo n.º 8
0
    def get_pipeline(self,**kwargs):
        session=RnaseqGlobals.get_session()
        use_template=RnaseqGlobals.conf_value('use_template')
        found=False

        assert(kwargs['name'])
        assert(kwargs['readset'])
        db_pipeline=session.query(Pipeline).filter_by(name=kwargs['name']).first()
        found=db_pipeline!=None

        if use_template or not found:   # build pipeline using template
            t_pipeline=Pipeline(name=kwargs['name'], readset=kwargs['readset']).load()

        if found:
            if use_template:      # replace existing template with newly generated one:
                assert(db_pipeline.id != None)
                session.delete(db_pipeline)
                session.commit()
                session.add(t_pipeline)
                session.commit()
                pipeline=t_pipeline
            else:
                pipeline=db_pipeline
                pipeline.readset=kwargs['readset']

        else:                           # found==False
            t_pipeline.template_file()    # sets pipeline.path
            session.add(t_pipeline)
            session.commit()
            pipeline=t_pipeline

        assert(hasattr(pipeline,'readset'))
        pipeline.set_defaults()
        return pipeline    
Exemplo n.º 9
0
    def usage(self, context):
        if self.aligner=='bowtie':
            bowtie_index=RnaseqGlobals.conf_value('rnaseq','bowtie_indexes')
            if self.paired_end():

                script='''
export BOWTIE_INDEXES=%(bowtie_index)s
bowtie ${ewbt} -1 ${inputs[0]} -2 ${inputs[1]} ${args} | perl -lane 'print unless($$F[1] == 4)' > $${ID}.${name}_BAD.$${format}
''' % {'bowtie_index': bowtie_index}

            else:
                script='''
export BOWTIE_INDEXES=%(bowtie_index)s
bowtie ${ewbt} ${args} ${inputs[0]} | perl -lane 'print unless($$F[1] == 4)' > $${ID}.${name}_BAD.$${format}
''' % {'bowtie_index': bowtie_index}
                restore_indent=True

                

        elif self.aligner=='blat':
            # fixme: need to implement this (NYI)
            raise ProgrammerGoof("step %s doesn't work for aligner==blat yet (NYI)" % self.name)
        else:
            raise ConfigError("Unknown alignment program '%s'" % self.aligner)


        return script
Exemplo n.º 10
0
    def sh_script(self, **kwargs):
        if 'sh_template' in self.dict:
            template_dir=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),"templates","sh_template")

            domain=Domain(template_dir, errors=4)
            sh_template=self['sh_template']
            template=domain.get_template(sh_template)

            vars={}
            vars.update(self)
            vars.update(self.dict)
            vars['readset']=self.pipeline.readset # fixme: really?
            vars['sh_cmd']=self.sh_cmdline() 
            vars['config']=RnaseqGlobals.config
            vars['pipeline']=self.pipeline
            vars['ID']=self.pipeline.ID()
            vars.update(kwargs)
            #print vars

            try:
                script=template.evoque(vars)
                return script
            except NameError as ne:
                raise ConfigError("%s while processing step '%s'" %(ne,self.name))
        else:
            return None
Exemplo n.º 11
0
    def is_current(self):
        if self.force: return False
        latest_input=0
        earliest_output=time.time()

        for input in self.inputs():
            try:
                mtime=os.stat(input).st_mtime
            except OSError as ose:
                return False            # missing/unaccessible inputs constitute not being current
            
            if mtime > latest_input:
                latest_input=mtime

            try:
                exe_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'), 'programs', self['exe'])
                exe_mtime=os.stat(exe_file).st_mtime
                if exe_mtime > latest_input:
                    latest_input=exe_mtime
            except OSError as oe:
                raise ConfigError("%s: %s" %(exe_file, oe))

        for output in self.outputs():
            try:
                stat_info=os.stat(output)
                if (stat_info.st_mtime < earliest_output):
                    earliest_output=stat_info.st_mtime
            except OSError as ose:
                return False            # missing/unaccessible outputs definitely constitute not being current

        #print "final: latest_input is %s, earliest_output is %s" % (latest_input, earliest_output)
        return latest_input<earliest_output
Exemplo n.º 12
0
 def check_label_unique(self, session, label):
     other_pr=session.query(PipelineRun).filterBy(label=label).first()
     if other_pr:
         if RnaseqGlobals.conf_value('force'):
             session.delete(other_pr) # delete existing run, will get over written
             session.commit()
         else:
             raise UserError("The label '%s' is already in use.\n  Please provide a new label (either in the readset or by use of the '--label' command line option), or use the '--force' option to fully override the old pipeline run.  \n  This will cause all steps to be run, also." % label)
Exemplo n.º 13
0
    def set_ID(self, *ID):
        # try to assign self.ID from ID[0], which might not be there:
        try: self.ID=ID[0]
        except IndexError: pass

        # see if self.ID exists, and if it does, is it an absolute path.  If so, do nothing
        try:
            if os.path.isabs(self.ID): pass
            else: self.ID=os.path.join(self.working_dir, self.ID) # self.ID exists and is relative

        except AttributeError: 
            # self.ID didn't exist, set to combination of working_dir and basename of reads_file
            if len(self.reads_files)==1:
                ID=os.path.join(self.working_dir,os.path.basename(self.reads_file))
                ID=re.sub('\..*$', '', ID)
                self.ID=ID
            elif len(self.reads_files)==2 and self.paired_end:
                # check that file names are of proper form:
                mg=re.search('^(.*)_[12]\.[\w_]+$', os.path.basename(self.reads_files[0])) # works of self.reads_files[0]...
                error_msg="'%s' isn't a well-formed filename for paired_end data: must match '_[12].<ext>'" % self.reads_files[0]
                try:
                    self.ID=os.path.join(self.working_dir, mg.groups()[0])
                except IndexError:
                    raise ConfigError(error_msg)
                except AttributeError:
                    raise ConfigError(error_msg)
                
            else:
                if RnaseqGlobals.conf_value('verbose') or RnaseqGlobals.conf_value('debug'):
                    print >>sys.stderr, "Cannot set ID: too many files (%d), paired_end=%s" % (len(self.reads_files), self.paired_end)
                return self
            


        # 
        #self['ID']=self.ID              # god dammit

        # set self.id as ...something.  why?
        self.id=os.path.basename(self.ID)
        self['id']=self.id

        return self
Exemplo n.º 14
0
    def write_sh_script(self, **kwargs):
        script=self.sh_script(**kwargs)

        script_filename=os.path.join(self.readset.working_dir, self.scriptname())
        try:
            os.makedirs(self.readset.working_dir)
        except OSError:
            pass                    # already exists, that's ok (fixme: could be permissions error)
        with open(script_filename, "w") as f:
            f.write(script)
            if RnaseqGlobals.conf_value('verbose'): print "%s written" % script_filename
        return script_filename
Exemplo n.º 15
0
    def evoque_fields(self):
        vars=self.__dict__
        vars.update(RnaseqGlobals.conf_value('rnaseq'))
        
        for a in dir(self):
            if a.startswith('__'): continue
            attr=getattr(self,a)
            if type(attr) != type(''): continue
            if not re.search('\$\{', attr): continue

            try: setattr(self, a, evoque_template(attr, vars))
            except NameError: pass

        return self
Exemplo n.º 16
0
    def verify_exe(self):
        if not hasattr(self,'exe'): return True
        
        dir_list=RnaseqGlobals.conf_value('rnaseq', 'path').split(":")
        dir_list.extend([os.path.join(RnaseqGlobals.root_dir(),'programs')])

        if exists_on_path(self.exe, dir_list, os.X_OK): return True
        
        # didn't find executable directly, see if there's an interpreter:
        if hasattr(self,'interpreter'):
            return exists_on_path(self.interpreter, dir_list, os.X_OK) and \
                   exists_on_path(self.exe, dir_list, os.R_OK)

        # couldn't find self.exe, no self.interpreter:
        return False
Exemplo n.º 17
0
    def qsub_script(self, script_filename, out_filename=None, err_filename=None):
        if out_filename==None: out_filename=self.out_filename()
        if err_filename==None: err_filename=self.err_filename()
        qsub=templated(name='qsub', type='sh_template', suffix='tmpl')
        vars={}
        vars.update(self.__dict__)
        vars['name']=path_helpers.sanitize(self.name)
        vars['cmd']=script_filename
        vars['out_filename']=out_filename
        vars['err_filename']=err_filename
        qsub_script=qsub.eval_tmpl(vars=vars)

        qsub_script_file=path_helpers.sanitize(os.path.join(self.readset.working_dir, "%s.%s.qsub" % (self.name, self.readset.label)))
        f=open(qsub_script_file,"w")
        f.write(qsub_script)
        f.close()
        if RnaseqGlobals.conf_value('verbose'): print("%s written" % qsub_script_file)
        return qsub_script_file
Exemplo n.º 18
0
    def __init__(self,**kwargs):
        Step.__init__(self,**kwargs)

        try: aligner=kwargs['aligner']
        except: aligner=RnaseqGlobals.conf_value('rnaseq','aligner')
        self.set_aligner(aligner)
Exemplo n.º 19
0
    def sh_script(self, **kwargs):

        script="#!/bin/sh\n\n"
        session=RnaseqGlobals.get_session()
        verbose=RnaseqGlobals.conf_value('verbose')
        
        # determine if provenance is desired:
        try:
            pipeline_run=kwargs['pipeline_run']
            step_runs=kwargs['step_runs']
            include_provenance=True
        except KeyError:
            include_provenance=False
        
        # create auxillary steps:
        if include_provenance:
            step_factory=StepFactory()

            pipeline_start=step_factory.new_step(self,
                                                 'pipeline_start',
                                                 pipeline_run_id=pipeline_run.id,
                                                 step_run_id=None,
                                                 next_step_run_id=self.context.step_runs[self.steps[0].name].id)
            mid_step=step_factory.new_step(self, 'mid_step', pipeline_run_id=pipeline_run.id)
            pipeline_end=step_factory.new_step(self, 'pipeline_end', pipelinerun_id=pipeline_run.id,
                                               step_run_id=None, next_step_run_id=None)
            script+=pipeline_start.sh_script(self.context)

        # iterate through steps; 
        errors=[]
        for step in self.steps:
            try:
                if step.skip:
                    if verbose: print "skipping step %s (already current)" % step.name
                    continue  # in a try block in case step.skip doesn't even exist
            except:                     # really? step.skip doesn't exist, so assume it's True???
                pass
                
            
            # append step.sh_script(self.context)
            step_script=step.sh_script(self.context, echo_name=True)
            try: step_script=step.sh_script(self.context, echo_name=True)
            except Exception as e:
                errors.append("%s: %s" % (step.name,str(e)))
                errors.append("Exception in pipeline.sh_script(step %s): %s (%s)" % (step.name, e, type(e)))
                continue

            script+=step_script
            script+="\n"

            # insert check success step:
            if include_provenance:
                try: skip_check=step['skip_success_check'] 
                except: skip_check=False
                if not skip_check:
                    step_run=step_runs[step.name]
                    step_run.cmd=step_script
                    mid_step.stepname=step.name
                    mid_step.step_run_id=step_run.id
                    next_step=self.step_after(step.name)
                    if next_step:
                        mid_step.next_step_run_id=self.context.step_runs[next_step.name].id
                    else:
                        mid_step.next_step_run_id=0
                        
                    script+=mid_step.sh_script(self.context)

            if RnaseqGlobals.conf_value('verbose'):
                print "step %s added" % step.name

        # record finish:
        if include_provenance:
            pipeline_end.last_step_id=step_runs[self.steps[-1].name].id
            script+=pipeline_end.sh_script(self.context)

        # check for continuity and raise exception on errors:
        errors.extend(self.verify_continuity(self.context))
        if len(errors)>0:
            raise ConfigError("\n".join(errors))
            

        session.commit()

        return script