def run(self):
        logger.info('CycleStep4 object running, %s' % self.thisDir)
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.start.xml'))
        # lsc.subTypeTimestamp(self.thisDir, 'cycle', 'CycleStep4_start')

        outname = os.path.join(self.thisDir, 'logs', 'gene_deactivation.log')
        if not os.path.exists(outname):
            if not self.options.noGeneDeactivation:
                # by default gene deactivation is turned on.
                cmd = lsc.evolverGeneDeactivationStep(self.thisDir, self.thisParentDir)
                p = subprocess.Popen(cmd, cwd = self.getLocalTempDir(), 
                                     stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
                out = p.communicate()[0]
                f=open(outname + '.tmp', 'w')
                f.write(out)
                f.close()
                os.rename(outname + '.tmp', outname)
            else:
                # this could cause a proliferation of gene creation.
                cmd = [lsc.which('cp')]
                cmd.append(os.path.join(thisDir, 'intra', 'evannots.gff'))
                cmd.append(os.path.join(thisDir, 'annots.gff'))
                cmds = [cmd]
                cmds.append([lsc.which('touch'), outname])
                lsc.runCommands(cmds, self.getLocalTempDir())
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step4.end.xml'))
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]\n\n'
             '%prog takes in a simulation directory and then extracts\n'
             'the sequence of each leaf node in fasta format and stores them\n'
             'in the respective step\'s directory.')
    parser = OptionParser(usage = usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    
    cycles = glob.glob(os.path.join(options.simDir, '*'))
    cycles = directoriesOnly(cycles)
    leafs = {}
    nt = newickTreeParser(options.inputNewick, 0.0)
    extractLeafs(nt, leafs)
    for d in cycles:
        if not options.allCycles and not os.path.basename(d) in leafs:
            continue

        cmds = []
        outPipes = []
        inPipes = []
        nameA     = os.path.basename(d)
        nameA     = nameA.replace('[','')
        nameA     = nameA.replace(']','')
        cleanName = nameA.replace('\'','')
        
        cmd = [lsc.which('evolver_cvt')]
        cmd.append('-fromrev')
        cmd.append(os.path.join(d,'seq.rev'))
        cmd.append('-tofasta')
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        cmd = [lsc.which('sed')]
        cmd.append(r"s/^>/>%s./;" % cleanName)
        inPipes.append(os.path.join(d, 'seq.fa'))
        outPipes.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmds.append(cmd)
        
        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.name.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)
        
        lsc.runCommands(cmds, os.curdir, outPipes = outPipes, inPipes = inPipes, mode = 's')
 def run(self):
     logger.info('Extract object running, thisDir: %s' % (self.thisDir))
     if self.isLeaf:
         ext = '.maf'
     else:
         ext = '.tmp.maf'
     outname = os.path.join(self.thisDir, self.alignName + ext)
     if not os.path.exists(outname):
         cmd = [lsc.which('evolver_cvt')]
         cmd.append('-fromrev')
         cmd.append(os.path.join(self.thisDir, 'aln.rev'))
         cmd.append('-tomaf')
         cmd.append(outname + '.tmp')
         cmds = [cmd]
         cmds.append([lsc.which('mv'), outname + '.tmp', outname])
         lsc.runCommands(cmds, self.getLocalTempDir())
    def run(self):
        logger.info('CycleStep3 object running, %s' % self.thisDir)
        lsc.verifyDirExists(self.thisDir)
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.start.xml'))

        # trf
        regex = r'^(chr\S+)\.outseq\.fa.*\.dat'
        pat = re.compile(regex)
        files = glob.glob(os.path.join(self.thisDir, 'intra', '*.dat'))
        cmds = []
        outPipes = []
        followCmds = []
        followPipes = []
        for f in files:
            # each file is the trf output for one chromosome
            m = re.match(regex, os.path.basename(f))
            if m is None:
                raise RuntimeError('Regex "%s" failed on filename %s' % (regex, os.path.basename(f)))
            outname = os.path.join(self.thisDir, 'intra', m.group(1) + '.trfannots.gff')
            if not os.path.exists(outname):
                # convert the .dat to .gff
                cmd = [lsc.which('evolver_trf2gff.py'), f]
                cmds.append(cmd)
                outPipes.append(outname + '.tmp')
                # atomic files
                followCmds.append([lsc.which('mv'), outname + '.tmp', outname])
                followPipes.append(None)
        lsc.runCommands(cmds, self.getLocalTempDir(), outPipes = outPipes, mode = 'p')
        lsc.runCommands(followCmds, self.getLocalTempDir(), outPipes = followPipes, mode = 'p')
        
        catCmd, evoCmd, cvtCmd, followCmds = lsc.evolverIntraMergeCmds(self.thisDir, self.theChild)
        
        lsc.runCommands([catCmd, evoCmd, cvtCmd], self.getLocalTempDir(),
                         outPipes = [os.path.join(self.thisDir, 'intra', 'evannots.gff.tmp'), None, None], 
                         mode = 'p')
        lsc.runCommands(followCmds, self.getLocalTempDir())
        
        lsc.createTimestamp(os.path.join(self.thisDir, 'xml', 'cycle.step3.end.xml'))
        self.setFollowOnTarget(CycleStep4(self.thisNewickStr, self.thisParentDir,
                                            self.thisStepLength, self.options))
Exemplo n.º 5
0
def main():
    usage = ('usage: %prog --simDir path/to/dir [options]\n\n'
             '%prog takes in a simulation directory and then extracts\n'
             'the sequence of each leaf node in fasta format and stores them\n'
             'in the respective step\'s directory.')
    parser = OptionParser(usage=usage)
    initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)

    cycles = glob.glob(os.path.join(options.simDir, '*'))
    cycles = directoriesOnly(cycles)
    leafs = {}
    nt = newickTreeParser(options.inputNewick, 0.0)
    extractLeafs(nt, leafs)
    for d in cycles:
        if not options.allCycles and not os.path.basename(d) in leafs:
            continue

        cmds = []
        outPipes = []
        inPipes = []
        nameA = os.path.basename(d)
        nameA = nameA.replace('[', '')
        nameA = nameA.replace(']', '')
        cleanName = nameA.replace('\'', '')

        cmd = [lsc.which('evolver_cvt')]
        cmd.append('-fromrev')
        cmd.append(os.path.join(d, 'seq.rev'))
        cmd.append('-tofasta')
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        cmd = [lsc.which('sed')]
        cmd.append(r"s/^>/>%s./;" % cleanName)
        inPipes.append(os.path.join(d, 'seq.fa'))
        outPipes.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmds.append(cmd)

        cmd = [lsc.which('mv')]
        cmd.append(os.path.join(d, 'seq.name.fa.tmp'))
        cmd.append(os.path.join(d, 'seq.name.fa'))
        inPipes.append(None)
        outPipes.append(None)
        cmds.append(cmd)

        lsc.runCommands(cmds,
                        os.curdir,
                        outPipes=outPipes,
                        inPipes=inPipes,
                        mode='s')