Exemplo n.º 1
0
    workingDir=os.getcwd()
    chkPointName = None

    outFile = file(os.path.join(workingDir,outFastaName),'a')

    qsubOptions=''

    # complete restart
    timer.mark('fastaSplit')
    if not RESUME:
        startName = originalFasta
        inFileName = startName
        seqSize = os.stat(originalFasta).st_size
        partCt = seqSize/seqSliceSize +1
        seqSliceNames = fasta.splitFasta(originalFasta,partCt,tmpDir=workingDir)
        tmpf=file(faNameFile,'w')
        tmpf.write('\n'.join(seqSliceNames))
        tmpf.close()
        
    # reuse split files    
    if RESUME:
        seqSliceNames = []
        tmpf = file(faNameFile)
        for l in tmpf:
            try:
                print 
                seqSliceNames.append(
                    os.path.join(os.getcwd(),
                                 os.path.split(l.rstrip())[1]))
            except:
Exemplo n.º 2
0
    if argProblem:
        print __doc__
        return(-1)

    if maxRec != None:
        fn,=args
        recCt=fasta.fastaCount(fn)
        count = recCt/maxRec + int(recCt%maxRec!=0)
    else:
        count,fn= args
        count = int(count)

    nG = niceName(fn,count)

    fasta.splitFasta(fn,splitCt=count,nameGenerator=nG)
        
    return(0)  # we did it!

def niceName(rootName,maxN):
    
    n=0
    fmt = '_%%0%dd' % int(ceil(log10(maxN)))
    base,ext = os.path.splitext(rootName)
    while True:
        name=base+ fmt% (n) + ext
        yield file(name,'w'),name
        n+=1

    
Exemplo n.º 3
0
    def submitBlast( self, thingToBlast, dbPath, N=12, parameters="-p blastn", formatParameters="-m 8", prefix="GrBl"):
        """Initiates a BLAST job with the given parameters.

        thingToBlast can be:
        -   a path to a FASTA file
        -   anything or list of things that evaluates to a valid FASTA string(s) when the str() method is applied. (this includes fasta.Record objects)

        N is the maximum number of parallel BLASTs to execute.
        (the sequences will be split up into at most N files.)
        
        parameters can be either an explicit parameter string to append to the blast call (ie. '-p blastn -b 3')
        or a dictionary of { parameter:value } pairs, which will be translated to '-parameter1 value1 -parameter2 value2'

        submitBlast returns a list of the names of the threads which are running this Blast.
        """

        # Store for database use
        self.parameters = parameters
        self.dbPath = dbPath
        
        ############################
        # Build the query

        queryPath = None
        if isinstance( thingToBlast, str ) and os.path.lexists( thingToBlast ): # path to a FASTA file
            queryPath = thingToBlast
            self.queryFile = queryPath

        elif isinstance( thingToBlast, (tuple, list) ):
            ( of, queryPath ) = fasta.mystemp( suffix='.fasta', dir=self.tmpDir )
            for elt in thingToBlast:
                of.write(str(elt) + "\n")
            of.close()
            self.tempFiles.append( queryPath )
        else:
            ( of, queryPath ) = fasta.mystemp( suffix='.fasta', dir=self.tmpDir )
            of.write(str(thingToBlast))
            of.close()
            self.tempFiles.append( queryPath )

        ############################
        # Split the query
        queryFiles = fasta.splitFasta( queryPath, N, tmpDir=self.tmpDir )
        self.tempFiles = self.tempFiles + queryFiles

        ############################
        # Build the BLAST line
        blastLine = "%s %s -d %s" % ( self.blastPath, formatParameters, dbPath )
        if isinstance( parameters, str ):
            blastLine = blastLine + " " + parameters
        else:
            parameterStrings = map( lambda x: "-%s %s" % ( x, parameters[x] ), parameters.keys() )
            blastLine = blastLine + " " + " ".join( parameterStrings )

        threads = []
        for qf in queryFiles:
            outfile = qf.replace(".fasta",".br")
            self.tempFiles.append( outfile )
            command = "%s -i %s -o %s" % ( blastLine, qf, outfile )
            thread = self.submitThread( command, qrshArgs="-l arch='fbsd-amd64'", prefix=prefix )
            #thread = self.submitThread( command, qrshArgs="-l arch='fbsd-amd64' -l mf=1.0G" )
            self.activeQueries.append( ( thread, outfile ) )
            threads.append( thread )

        return threads