Exemplo n.º 1
0
 def setUp(self):
     templated.template_dir=os.path.normpath(os.path.abspath(__file__)+"/../../fixtures/templates")
     RnaseqGlobals.initialize(__file__, testing=True)
     readset_file=RnaseqGlobals.root_dir()+'/t/fixtures/readsets/paired1.syml'
     self.readset=Readset.load(filename=readset_file)[0]
     
     self.pipeline=Pipeline(name='test_newstep', readset=self.readset)
Exemplo n.º 2
0
    def get_pipeline(self,**kwargs):
        session=RnaseqGlobals.get_session()
        use_template=RnaseqGlobals.conf_value('use_template')
        found=False

        assert(kwargs['name'])
        assert(kwargs['readset'])
        db_pipeline=session.query(Pipeline).filter_by(name=kwargs['name']).first()
        found=db_pipeline!=None

        if use_template or not found:   # build pipeline using template
            t_pipeline=Pipeline(name=kwargs['name'], readset=kwargs['readset']).load()

        if found:
            if use_template:      # replace existing template with newly generated one:
                assert(db_pipeline.id != None)
                session.delete(db_pipeline)
                session.commit()
                session.add(t_pipeline)
                session.commit()
                pipeline=t_pipeline
            else:
                pipeline=db_pipeline
                pipeline.readset=kwargs['readset']

        else:                           # found==False
            t_pipeline.template_file()    # sets pipeline.path
            session.add(t_pipeline)
            session.commit()
            pipeline=t_pipeline

        assert(hasattr(pipeline,'readset'))
        pipeline.set_defaults()
        return pipeline    
Exemplo n.º 3
0
    def setUp(self):
        usage=""
        RnaseqGlobals.initialize(usage)

        self.db_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),RnaseqGlobals.conf_value('testing','test_db'))
        readset=Readset(name='readset', db_file=self.db_file).load()

        self.readset=readset
        if not readset.table_exists():
            raise ProgrammerGoof("table %s doesn't exist" % readset.tablename())
Exemplo n.º 4
0
    def setUp(self):
        usage="testing: "+__file__
        RnaseqGlobals.initialize(usage)

        self.db_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),RnaseqGlobals.conf_value('testing','test_db'))
        readset=Readset(name='readset', db_file=self.db_file).load()
        self.readset=readset
        #print "readset is %s" % readset
        

        readset.execute(sql)
        print "table %s created" % readset.tablename()
Exemplo n.º 5
0
    def setUp(self):
        templated.template_dir=os.path.normpath(os.path.abspath(__file__)+"/../../fixtures/templates")
        RnaseqGlobals.initialize(__file__, testing=True)

        readset_file=os.path.join(RnaseqGlobals.root_dir(),'t','fixtures','readsets','readset1.syml')
        self.readset=Readset.load(readset_file)[0]
        self.pipeline=Pipeline(name='juan', readset=self.readset).load_steps()

        session=RnaseqGlobals.get_session()
        ps=session.query(Pipeline).all()
        for p in ps:
            session.delete(p)
        session.commit()
Exemplo n.º 6
0
    def make_run_objects(self, session):
        #self=self.store_db()
        try: verbose=os.environ['DEBUG']
        except: debug=False
        
        # create the pipeline_run object:
        try: 
            label=RnaseqGlobals.conf_value('label') or self.readset.label
        except AttributeError as ae:
            raise UserError("No label defined.  Please specify a label for the pipeline run, either in the readset or using the '--label' command line option")

        pipeline_run=PipelineRun(status='standby',
                                 input_file=', '.join(self.readset.reads_files),
                                 user=RnaseqGlobals.conf_value('user'),
                                 label=label,
                                 working_dir=self.readset.working_dir)

        self.pipeline_runs.append(pipeline_run)
#        print "mro: self.pipeline_runs is %s" % ", ".join(str(x.id) for x in self.pipeline_runs)
#        try: warn("pipeline.id is %s" % pipeline.id)
#        except: warn("pipeline has no id")
        self=session.merge(self)
        session.commit()                
        if pipeline_run.id==None:
            raise ProgrammerGoof("no id in %s" % pipeline_run)

        self.context.pipeline_run_id=pipeline_run.id
        RnaseqGlobals.set_conf_value('pipeline_run_id',pipeline_run.id)
        
        # create step_run objects:
        step_runs={}
        for step in self.steps:
            if step.is_prov_step: continue
            step_run=StepRun(step_name=step.name, status='standby')
            for output in step.output_list():
                output=evoque_template(output, step, self.readset)
                step_run.file_outputs.append(FileOutput(path=output))

            if step.skip:               # as set by self.set_steps_current()
                if debug: print "step %s is current, skipping" % step.name
                step_run.status='skipped'
                step_run.success=True

            pipeline_run.step_runs.append(step_run)
            session.commit()
            pipeline_run.step_runs.append(step_run) # maintains list in db as well
            step_runs[step.name]=step_run
            self.context.step_runs[step.name]=step_run

        session.commit()
        return (pipeline_run, step_runs)
Exemplo n.º 7
0
    def verify_exes(self):
        dirs=RnaseqGlobals.conf_value('rnaseq', 'path').split(":")
        dirs.extend([os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),'programs')])
            
        errors=[]
        for step in self.steps:
            if not step.verify_exe():
                errors.append("Missing executable in step %s: %s" %(step.name, step.exe))
                
        if len(errors)>0:
            errors.append("Please link these executables from the %s/programs directory, or make sure they are on the path defined in the config file." \
                          % RnaseqGlobals.conf_value('rnaseq', 'root_dir'))

        return errors
Exemplo n.º 8
0
    def setUp(self):
        usage=""
        RnaseqGlobals.initialize(usage)

        self.db_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),RnaseqGlobals.conf_value('testing','test_db'))
        readset=Readset(name='readset', db_file=self.db_file).load()

        self.readset=readset
        #print "readset is %s" % readset
        
        sql="DROP TABLE IF EXISTS %s" % readset.tablename()
        readset.execute(sql)
        sql="CREATE TABLE %s (id INTEGER PRIMARY KEY AUTOINCREMENT, name VARCHAR[255], description TEXT)" % readset.tablename()
        readset.execute(sql)
Exemplo n.º 9
0
    def __init__(self,**args):
        dict_like.__init__(self,**args)
        try:
            self.db_file=args['db_file']
        except KeyError:
            a=RnaseqGlobals.conf_value('rnaseq', 'root_dir')
            b=RnaseqGlobals.conf_value('db','db_name')
            if (a==None or b==None):
                raise ProgrammerGoof("RnaseqGlobals not initialized")
            self.db_file=os.path.join(a,b)

        self.connect()                  # should this really be called in the constructor?
        self.cursor=self.dbh.cursor()
        assert(self.columns)
Exemplo n.º 10
0
    def setUp(self):
        RnaseqGlobals.initialize(__file__, testing=True)
        templated.template_dir=RnaseqGlobals.root_dir()+"/t/fixtures/templates"

        readset_file=RnaseqGlobals.root_dir()+"/t/fixtures/readsets/readset1.syml"
        self.readset=Readset.load(readset_file)[0]
        self.pipeline=Pipeline(name='juan', readset=self.readset).load_steps()
        session=RnaseqGlobals.get_session()

        # delete all pre-existing pipeline objects from the db:
        plist=session.query(Pipeline)
        for p in plist:
            session.delete(p)
        session.commit()
Exemplo n.º 11
0
    def verify_exe(self):
        if not hasattr(self,'exe'): return True
        
        dir_list=RnaseqGlobals.conf_value('rnaseq', 'path').split(":")
        dir_list.extend([os.path.join(RnaseqGlobals.root_dir(),'programs')])

        if exists_on_path(self.exe, dir_list, os.X_OK): return True
        
        # didn't find executable directly, see if there's an interpreter:
        if hasattr(self,'interpreter'):
            return exists_on_path(self.interpreter, dir_list, os.X_OK) and \
                   exists_on_path(self.exe, dir_list, os.R_OK)

        # couldn't find self.exe, no self.interpreter:
        return False
Exemplo n.º 12
0
    def usage(self, context):
        if self.aligner=='bowtie':
            bowtie_index=RnaseqGlobals.conf_value('rnaseq','bowtie_indexes')
            if self.paired_end():

                script='''
export BOWTIE_INDEXES=%(bowtie_index)s
bowtie ${ewbt} -1 ${inputs[0]} -2 ${inputs[1]} ${args} | perl -lane 'print unless($$F[1] == 4)' > $${ID}.${name}_BAD.$${format}
''' % {'bowtie_index': bowtie_index}

            else:
                script='''
export BOWTIE_INDEXES=%(bowtie_index)s
bowtie ${ewbt} ${args} ${inputs[0]} | perl -lane 'print unless($$F[1] == 4)' > $${ID}.${name}_BAD.$${format}
''' % {'bowtie_index': bowtie_index}
                restore_indent=True

                

        elif self.aligner=='blat':
            # fixme: need to implement this (NYI)
            raise ProgrammerGoof("step %s doesn't work for aligner==blat yet (NYI)" % self.name)
        else:
            raise ConfigError("Unknown alignment program '%s'" % self.aligner)


        return script
Exemplo n.º 13
0
    def sh_cmdline(self):
        try:
            usage=self['usage']
            if usage==None:
                usage=''
        except KeyError:
            usage=''

        # look for exe in path, unless exe is an absolute path
        try:
            if os.path.abspath(self['exe'])!=self['exe']:
                self['exe']=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'), 'programs', self['exe'])
        except KeyError as ae:          # not all steps have self['exe']; eg header, footer
            pass


        try:
            return usage % self   

        # fixme: you don't really know what you're doing in these except blocks...
        except KeyError as e:
            raise ConfigError("Missing value %s in\n%s" % (e.args, self.name))
        except AttributeError as e:
            raise ConfigError("Missing value %s in\n%s" % (e.args, self.name))
        except ValueError as e:
            warn(e)
            warn("%s.usage: %s" % (self.name,usage))
            raise "%s.keys(): %s" % (self.name, ", ".join(self.__dict__.keys()))
        except TypeError as te:
            raise ConfigError("step %s: usage='%s': %s" % (self.name, usage, te))
Exemplo n.º 14
0
    def sh_script(self, **kwargs):
        if 'sh_template' in self.dict:
            template_dir=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'),"templates","sh_template")

            domain=Domain(template_dir, errors=4)
            sh_template=self['sh_template']
            template=domain.get_template(sh_template)

            vars={}
            vars.update(self)
            vars.update(self.dict)
            vars['readset']=self.pipeline.readset # fixme: really?
            vars['sh_cmd']=self.sh_cmdline() 
            vars['config']=RnaseqGlobals.config
            vars['pipeline']=self.pipeline
            vars['ID']=self.pipeline.ID()
            vars.update(kwargs)
            #print vars

            try:
                script=template.evoque(vars)
                return script
            except NameError as ne:
                raise ConfigError("%s while processing step '%s'" %(ne,self.name))
        else:
            return None
Exemplo n.º 15
0
    def is_current(self):
        if self.force: return False
        latest_input=0
        earliest_output=time.time()

        for input in self.inputs():
            try:
                mtime=os.stat(input).st_mtime
            except OSError as ose:
                return False            # missing/unaccessible inputs constitute not being current
            
            if mtime > latest_input:
                latest_input=mtime

            try:
                exe_file=os.path.join(RnaseqGlobals.conf_value('rnaseq','root_dir'), 'programs', self['exe'])
                exe_mtime=os.stat(exe_file).st_mtime
                if exe_mtime > latest_input:
                    latest_input=exe_mtime
            except OSError as oe:
                raise ConfigError("%s: %s" %(exe_file, oe))

        for output in self.outputs():
            try:
                stat_info=os.stat(output)
                if (stat_info.st_mtime < earliest_output):
                    earliest_output=stat_info.st_mtime
            except OSError as ose:
                return False            # missing/unaccessible outputs definitely constitute not being current

        #print "final: latest_input is %s, earliest_output is %s" % (latest_input, earliest_output)
        return latest_input<earliest_output
Exemplo n.º 16
0
    def sh_script(self, context, **args):
        
        if 'echo_name' in args and args['echo_name']:
            echo_part="\n# step %s:\n" % self.name
            echo_part+="echo step %s 1>&2" % self.name
        else:
            echo_part=''
            
        try: usage=self.usage(context) 
        except KeyError as ke:
            raise ConfigError("missing config item '%s'" % ke)

        
        # check for missing attrs after calling step.usage()
        missing_attrs=self.missing_required_attrs()
        if len(missing_attrs) > 0:
            raise ConfigError("step %s: missing attributes: %s" % (self.name, ', '.join(missing_attrs)))


        vars={}
        vars.update(self.__dict__)
        vars.update(self.pipeline.readset)
        if not self.is_prov_step:
            vars.update(self.pipeline[self.name])
            vars['inputs']=context.inputs[self.name]
            vars['outputs']=context.outputs[self.name]

        vars['pipeline']=self.pipeline
        vars['pipeline_run_id']=context.pipeline_run_id
        #vars['step_run_id']=context.step_runs[self.name].id
        #vars['next_step_run_id']=context.step_runs[self.pipeline.step_after(step.name)].id
        vars['config']=RnaseqGlobals.config
        vars['readset']=self.pipeline.readset

        # need to add shell variables for 'set': (in cufflinks.s_?.sh scripts)
        # currently root_dir, programs, reads_file, ID, format, readlen
        # but really, the pipeline should specify these?
        # or only things that are truly universal
        vars['root_dir']=RnaseqGlobals.root_dir()

        # add readset exports:
        readset=self.pipeline.readset
        for attr in readset.exports:
            try: vars[attr]=getattr(readset, attr)
            except AttributeError:
                vars[attr]=''
                #warn("%s.sh_script: no '%s' readset attribute!" % (self.name, attr))

        # add self.exports:
        try: export_list=self.exports
        except: export_list=[]
        for attr in export_list:
            vars[attr]=getattr(self,attr)

        try: script_part=evoque_template(usage, vars)
        except Exception as e: raise ConfigError("step %s: %s" % (self.name, e))
        
        script="\n".join([echo_part,script_part]) # tried using echo_part+sh_script, got weird '>' -> '&gt;' substitutions

        return script
Exemplo n.º 17
0
 def check_label_unique(self, session, label):
     other_pr=session.query(PipelineRun).filterBy(label=label).first()
     if other_pr:
         if RnaseqGlobals.conf_value('force'):
             session.delete(other_pr) # delete existing run, will get over written
             session.commit()
         else:
             raise UserError("The label '%s' is already in use.\n  Please provide a new label (either in the readset or by use of the '--label' command line option), or use the '--force' option to fully override the old pipeline run.  \n  This will cause all steps to be run, also." % label)
Exemplo n.º 18
0
 def setUp(self):
     templated.template_dir=os.path.normpath(os.path.abspath(__file__)+"/../../fixtures/templates")
     RnaseqGlobals.initialize(__file__, testing=True)
     RnaseqGlobals.set_conf_value('force',True)
     RnaseqGlobals.set_conf_value('silent',True)
     readset=Readset.load(RnaseqGlobals.root_dir()+'/t/fixtures/readsets/readset1.syml')[0]
     self.pipeline=Pipeline(name='filter', readset=readset)
Exemplo n.º 19
0
    def set_ID(self, *ID):
        # try to assign self.ID from ID[0], which might not be there:
        try: self.ID=ID[0]
        except IndexError: pass

        # see if self.ID exists, and if it does, is it an absolute path.  If so, do nothing
        try:
            if os.path.isabs(self.ID): pass
            else: self.ID=os.path.join(self.working_dir, self.ID) # self.ID exists and is relative

        except AttributeError: 
            # self.ID didn't exist, set to combination of working_dir and basename of reads_file
            if len(self.reads_files)==1:
                ID=os.path.join(self.working_dir,os.path.basename(self.reads_file))
                ID=re.sub('\..*$', '', ID)
                self.ID=ID
            elif len(self.reads_files)==2 and self.paired_end:
                # check that file names are of proper form:
                mg=re.search('^(.*)_[12]\.[\w_]+$', os.path.basename(self.reads_files[0])) # works of self.reads_files[0]...
                error_msg="'%s' isn't a well-formed filename for paired_end data: must match '_[12].<ext>'" % self.reads_files[0]
                try:
                    self.ID=os.path.join(self.working_dir, mg.groups()[0])
                except IndexError:
                    raise ConfigError(error_msg)
                except AttributeError:
                    raise ConfigError(error_msg)
                
            else:
                if RnaseqGlobals.conf_value('verbose') or RnaseqGlobals.conf_value('debug'):
                    print >>sys.stderr, "Cannot set ID: too many files (%d), paired_end=%s" % (len(self.reads_files), self.paired_end)
                return self
            


        # 
        #self['ID']=self.ID              # god dammit

        # set self.id as ...something.  why?
        self.id=os.path.basename(self.ID)
        self['id']=self.id

        return self
Exemplo n.º 20
0
    def write_sh_script(self, **kwargs):
        script=self.sh_script(**kwargs)

        script_filename=os.path.join(self.readset.working_dir, self.scriptname())
        try:
            os.makedirs(self.readset.working_dir)
        except OSError:
            pass                    # already exists, that's ok (fixme: could be permissions error)
        with open(script_filename, "w") as f:
            f.write(script)
            if RnaseqGlobals.conf_value('verbose'): print "%s written" % script_filename
        return script_filename
Exemplo n.º 21
0
    def test_list(self):
        readset_file=RnaseqGlobals.root_dir()+'/t/fixtures/readsets/readset12.syml'
        rlist=Readset.load(readset_file)
        self.assertEqual(type(rlist),type([]))
        self.assertEqual(len(rlist),6)

        for rs in rlist:
            self.assertEqual(rs.org, 'mouse')
            self.assertEqual(rs.readlen, 75)
            self.assertEqual(os.path.dirname(rs.reads_file), self.readset_dir)
            self.assertTrue(re.match('s_\d\d?_\d_sequence.txt',os.path.basename(rs.reads_file)))
            self.assertTrue(re.search(rs.label,os.path.basename(rs.reads_file)))
            self.assertEqual(rs.label,rs.description)
Exemplo n.º 22
0
    def test_query(self):
        session=RnaseqGlobals.get_session()
        readset=self.readset
        session.add(self.pipeline)
        session.commit()

        pipeline=self.pipeline
        l=session.query(Pipeline).filter_by(name=pipeline.name).all()
        self.assertEqual(len(l), 1)

        pl=l[0]
        for a in ['name', 'description']:
            self.assertEqual(getattr(pl,a), getattr(pipeline,a))
        self.assertEqual(pl.id,1)
Exemplo n.º 23
0
    def evoque_fields(self):
        vars=self.__dict__
        vars.update(RnaseqGlobals.conf_value('rnaseq'))
        
        for a in dir(self):
            if a.startswith('__'): continue
            attr=getattr(self,a)
            if type(attr) != type(''): continue
            if not re.search('\$\{', attr): continue

            try: setattr(self, a, evoque_template(attr, vars))
            except NameError: pass

        return self
Exemplo n.º 24
0
    def run(self, *argv, **args):
        session = RnaseqGlobals.get_session()
        for klass in self.classes:

            # Drop the table first.  Don't try to use Table.drop(), it's a pain if the table doesn't already exist.
            # engine.execute("DROP TABLE IF EXISTS %s" % tablename)

            try:
                ct = getattr(klass, "create_table")
            except AttributeError as ae:
                raise UserError("%s doesn't define 'create_table'" % klass.__name__)

            ct(RnaseqGlobals.metadata, RnaseqGlobals.engine)
            print "%s created" % klass.__tablename__
Exemplo n.º 25
0
 def test_glob_rel(self):
     readset_file=RnaseqGlobals.root_dir()+'/t/fixtures/readsets/readset_rel_glob.syml'
     rlist=Readset.load(readset_file)
     self.assertEqual(len(rlist),1)
     filelist=rlist[0].reads_files
     self.assertEqual(len(filelist), 3)
     for i in range(1,3):
         filename="s_%d_export.txt" % i
         found=False
         for f in filelist:
             if re.search(filename, f):
                 found=True
                 break
         self.assertTrue(found)
Exemplo n.º 26
0
    def test_pipeline_id(self):
        pipeline=self.pipeline
        try: id=pipeline.id 
        except AttributeError: self.assertTrue(True)
            
        session=RnaseqGlobals.get_session()
        

        session.add(pipeline)
        session.commit()
        session.flush()

        self.assertEqual(pipeline.id, 1)

        mpipeline=session.merge(pipeline)
        self.assertEqual(mpipeline.id, 1)
Exemplo n.º 27
0
    def store_db(self):
        session=RnaseqGlobals.get_session()
        other_self=session.query(Pipeline).filter_by(name=self.name).first()

        if other_self==None:
            session.add(self)
            session.commit()
            #if RnaseqGlobals.conf_value('debug'): warn("s_db: added pipeline %s: id=%d" % (self.name, self.id))
            #if RnaseqGlobals.conf_value('debug'): warn("s_db: %s.path: %s" % (self.name, self.path))
            
        else:
            #if RnaseqGlobals.conf_value('debug'): warn("s_db: found pipeline %s: id=%d" % (self.name, other_self.id))
            self.id=other_self.id
            self=other_self

        return self
Exemplo n.º 28
0
    def test_dict(self):
        dir=RnaseqGlobals.root_dir()+'/t/fixtures/readsets'
        os.chdir(dir)
        filename=os.path.join(dir,'readset_rel_glob.syml')
        rlist=Readset.load(filename)
        readset=rlist[0]
        
        self.assertRegexpMatches(readset['reads_file'], dir+'/s_\d_export.txt')
        self.assertEqual(readset['description'],'this is a sample readset (fixture)')
        self.assertEqual(readset['org'],'mouse')
        self.assertEqual(readset['readlen'],75)
        self.assertEqual(readset['working_dir'],os.path.join(dir,'rnaseq_wf'))
        

        vars={'this': 'that'}
        readset.update(vars)
        self.assertEqual(readset['this'],'that')
Exemplo n.º 29
0
    def run(self, *argv, **args):
        try: classname=argv[0][2]                # [0] is script name, [1] is command
        except IndexError: raise UserError(self.usage())
        
        try: klass=globals()[classname]
        except KeyError: raise UserError("%s: unknown class" % classname)

        obj_hash={}
        paired=[p for p in argv[0] if re.match("\w+=\w+", p)]

        for pair in paired:
            k,v=re.split("=",pair)
            obj_hash[k]=v
        o=klass(obj_hash)
        
        session=RnaseqGlobals.get_session()
        session.add(o)
        session.commit()
Exemplo n.º 30
0
    def qsub_script(self, script_filename, out_filename=None, err_filename=None):
        if out_filename==None: out_filename=self.out_filename()
        if err_filename==None: err_filename=self.err_filename()
        qsub=templated(name='qsub', type='sh_template', suffix='tmpl')
        vars={}
        vars.update(self.__dict__)
        vars['name']=path_helpers.sanitize(self.name)
        vars['cmd']=script_filename
        vars['out_filename']=out_filename
        vars['err_filename']=err_filename
        qsub_script=qsub.eval_tmpl(vars=vars)

        qsub_script_file=path_helpers.sanitize(os.path.join(self.readset.working_dir, "%s.%s.qsub" % (self.name, self.readset.label)))
        f=open(qsub_script_file,"w")
        f.write(qsub_script)
        f.close()
        if RnaseqGlobals.conf_value('verbose'): print("%s written" % qsub_script_file)
        return qsub_script_file