Пример #1
0
def runGeneFasta(argNamespace):
    paftolTargetSet = paftol.PaftolTargetSet()
    paftolTargetSet.readFasta(argNamespace.infile)
    fastaFnameList = []
    for geneName in paftolTargetSet.paftolGeneDict:
        srList = paftolTargetSet.getSeqRecordSelection(organismNameList=None,
                                                       geneNameList=[geneName])
        fastaFname = argNamespace.outFastaFormat % geneName
        Bio.SeqIO.write(srList, fastaFname, 'fasta')
        fastaFnameList.append(fastaFname)
    sys.stdout.write('%s\n' % ' '.join(fastaFnameList))
Пример #2
0
def runRetrieveTargets(argNamespace):
    paftolTargetSet = paftol.PaftolTargetSet()
    if argNamespace.targetsfile is None:
        paftolTargetSet.readFasta(sys.stdin)
    else:
        paftolTargetSet.readFasta(argNamespace.targetsfile)
    blastnRunner = argToBlastnRunner(argNamespace)
    paftolTargetSeqRetriever = paftol.PaftolTargetSeqRetriever()
    targetList = paftolTargetSeqRetriever.retrievePaftolTargetList(
        argNamespace.genomeName, argNamespace.fastaFname, paftolTargetSet,
        blastnRunner)
    if argNamespace.outfile is None:
        Bio.SeqIO.write(targetList, sys.stdout, 'fasta')
    else:
        with open(argNamespace.outfile, 'w') as f:
            Bio.SeqIO.write(targetList, f, 'fasta')
Пример #3
0
def runGeneSetStats(argNamespace):
    paftolTargetSet = paftol.PaftolTargetSet()
    sampleId = 'unknown'
    if argNamespace.sampleId is not None:
        sampleId = argNamespace.sampleId
    if argNamespace.targetsfile is None:
        raise StandardError, 'no targets file specified'
    else:
        paftolTargetSet.readFasta(argNamespace.targetsfile)
    if argNamespace.seqfile is None:
        geneSetStatsDataFrame = paftol.makeGeneSetStatsDataFrame(
            sys.stdin, sampleId, paftolTargetSet)
    else:
        with open(argNamespace.seqfile, 'r') as f:
            geneSetStatsDataFrame = paftol.makeGeneSetStatsDataFrame(
                f, sampleId, paftolTargetSet)
    if argNamespace.outfile is None:
        geneSetStatsDataFrame.writeCsv(sys.stdout)
    else:
        with open(argNamespace.outfile, 'w') as f:
            geneSetStatsDataFrame.writeCsv(f)
Пример #4
0
def runTargetGeneScan(argNamespace):
    paftolTargetSet = paftol.PaftolTargetSet()
    if argNamespace.targetsfile is None:
        paftolTargetSet.readFasta(sys.stdin)
    else:
        paftolTargetSet.readFasta(argNamespace.targetsfile)
    sys.stderr.write('read target set with %d genes and %d organisms\n' % (len(
        paftolTargetSet.paftolGeneDict), len(paftolTargetSet.organismDict)))
    # FIXME: hack to use scanMethod as the genome name as well
    referenceGenome = paftol.ReferenceGenome(argNamespace.scanMethod,
                                             argNamespace.refFasta,
                                             argNamespace.refGenbank)
    referenceGenome.scanGenes(argNamespace.scanMethod)
    sys.stderr.write('read reference genome and scanned %d genes\n' %
                     len(referenceGenome.geneList))
    targetGeneTable, cdsList = referenceGenome.blastTargetSet(paftolTargetSet)
    if argNamespace.outfile is None:
        targetGeneTable.writeCsv(sys.stdout)
    else:
        with open(argNamespace.outfile, 'w') as csvFile:
            targetGeneTable.writeCsv(csvFile)
    if argNamespace.cdsFasta is not None:
        Bio.SeqIO.write(cdsList, argNamespace.cdsFasta, 'fasta')
Пример #5
0
def runSelectgenes(argNamespace):
    organismNameSet = None
    if argNamespace.organism is not None:
        organismNameSet = set(argNamespace.organism)
    geneNameSet = []
    if argNamespace.gene is not None:
        geneNameSet = set(argNamespace.gene)
    if argNamespace.genefile is not None:
        with open(argNamespace.genefile) as f:
            for line in f:
                geneNameSet.append(line.strip())
    if len(geneNameSet) == 0:
        geneNameSet = None
    paftolTargetSet = paftol.PaftolTargetSet()
    if argNamespace.targetsfile is None:
        paftolTargetSet.readFasta(sys.stdin)
    else:
        paftolTargetSet.readFasta(argNamespace.targetsfile)
    srList = paftolTargetSet.getSeqRecordSelection(organismNameSet,
                                                   geneNameSet)
    if argNamespace.outfile is None:
        Bio.SeqIO.write(srList, sys.stdout, 'fasta')
    else:
        Bio.SeqIO.write(srList, argNamespace.outfile, 'fasta')