Python Fasta.saveSequences примеры использования

Язык программирования: Python

Пространство имен/Пакет: AGBio.io

Класс/Тип: Fasta

Метод/Функция: saveSequences

Примеров на hotexamples.com: 2

Python Fasta.saveSequences - 2 примера найдено. Это лучшие примеры Python кода для AGBio.io.Fasta.saveSequences, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

loadSequences(6)

saveSequences(2)

Основные методы

loadSequences (6)

saveSequences (2)

Пример #1

Показать файл

Файл: searchSelenoproteins.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='fasta file in which selenoproteins should be looked for.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='fasta file containing the selenoproteins',
                       metavar='FILE' )

    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )

    parser.set_defaults( verbosity = '1' )

    (options, args) = parser.parse_args()

    stdoutflag = False

    verbosity = int( options.verbosity )

    if options.inputfilename:

        inputfilenames = options.inputfilename.split(',')
        infiles = []
        for i in inputfilenames:
            infiles.append( open( i, 'r' ) )

    else: sys.exit( 'You must provide an input filename.')

    if options.outputfilename:

        outfile = open( options.outputfilename, 'w' )
        stdoutflag = True

    else: outfile = sys.stdout

    for f in infiles:
        if verbosity >= 1:
            print
            print '>>> Searching for selenoproteins in file ' + f.name
            print

        if verbosity >= 2:
            print '>>> Loading sequences ...'

        sequences = FastaLib.loadSequences( f )

        if verbosity >= 2:
            print '>>> ... Done.'
            print

        if verbosity >= 2:
            print '>>> Searching for U containing sequences ...'

        selenoproteins = findSelenoproteins( sequences )

        if verbosity >= 2:
            print '>>> ... Done.'
            print

        FastaLib.saveSequences(selenoproteins, outfile)

        for selP in selenoproteins:

            if verbosity >= 3 and stdoutflag:
                print selP.header.strip()
                print selP.sequence.strip()

        if verbosity >= 1:
            print
            print 'Found ' + str( len( selenoproteins ) ) + ' selenoproteins'
            print

    for i in infiles:
        i.close()
    outfile.close()

Пример #2

Показать файл

Файл: alignHugeFamilies.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='file containing the alignments that will be used to build the PSSM using prepare_alignment_selenoprofiles.py.',
                       metavar='FILE' )

    parser.add_option( '-r', '--datadir',
                       dest='datadir',
                       help='directory containing, for each familly FAM, a directory FAM.blast and a directory FAM.selenoprofiles.prep',
                       metavar='DIR' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base name used for outputs',
                       metavar='NAME' )

    parser.add_option( '-a', '--n_core',
                       dest='ncore',
                       type='int',
                       help='number of cores to use during the various operations.',
                       metavar='INTEGER' )

    parser.add_option( '-M', '--mafft',
                       action='store_true', dest='domafft', default=False,
                       help='do the mafft step.')
    
    parser.add_option( '-T', '--trimal',
                       action='store_true', dest='dotrimal', default=False,
                       help='do the trimal step.')

    parser.add_option( '-C', '--tcoffee',
                       action='store_true', dest='dotcoffee', default=False,
                       help='do the t_coffee step.')

    parser.add_option( '-B', '--headers',
                       action='store_true', dest='doheaders', default=False,
                       help='do the addheaders step.')

    parser.add_option( '-p', '--patternfile',
                       dest='patternfile',
                       help='pattern file to use if the -D option is used.',
                       metavar='FILE' )

    parser.add_option( '-F', '--filter',
                       action='store_true', dest='dofilter', default=False,
                       help='do the filter step.')

    parser.add_option( '-P', '--prepare',
                       action='store_true', dest='doprepal', default=False,
                       help='do the prepare_alignment_selenoprofiles step.')

    parser.add_option( '-g', '--tag_threshold',
                       dest='tagthreshold',
                       type='float',
                       help='tag threshold to use if the -P or --prepare is used.',
                       metavar='FLOAT' )

    parser.add_option( '-A', '--all',
                       action='store_true', dest='doall', default=False,
                       help='do all steps.')

    parser.add_option( '-Y', '--dry',
                       action='store_true', dest='dryrun', default=False,
                       help="Prints the commands without executing them.")
    
    parser.add_option( '-D', '--debug',
                       action='store_true', dest='debug', default=False,
                       help="Debug mode. Nothing is cleaned.")

    parser.add_option( '-t', '--temp',
                       dest='temp',
                       help='set the temp folder to use.',
                       metavar='FOLDER' )

    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       type='int',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )



    parser.set_defaults( verbosity = 1,
                         ncore = 1,
                         tagthreshold = 0.5,
                         temp = '/tmp/',
                         patternfile = 'None' )

    (options, args) = parser.parse_args()

    if options.doall:
        options.doheaders = True
        options.dofilter = True
        options.domafft = True
        options.dotrimal = True
        options.dotcoffee = True
        
    infile = options.inputfilename
    tmpinitfilename = genTempfilename(options.temp, 'ungapped_')
    with open(infile, 'r') as iff:
        tmpseqs = Fasta.loadSequences(iff)
    with open(tmpinitfilename, 'w') as ugf:
        for seq in tmpseqs:
            removeGaps(seq).prints(ugf)
    tmpinfile = tmpinitfilename

    mafftoutfile = ''.join((options.outputfilename, '_mafft.fasta'))
    trimaloutfile1 = ''.join((options.outputfilename, '_trimmed_native.fasta'))
    trimaloutfile2 = ''.join((options.outputfilename, '_trimmed_spadded.fasta'))
    trimaloutfile = trimaloutfile1
    tcoffeeoutfile = ''.join((options.outputfilename, '_tcoffee.fasta'))
    fullheadoutfile = ''.join((options.outputfilename, '.det.fasta'))
#    patternfile = ''.join(('.'.join(options.inputfilename.split('.')[:2]), '.index.0'))
    patternfile = options.patternfile
    filteroutfile = ''.join((options.outputfilename, '.filt.fasta'))

    ncore = options.ncore
    verbosity = options.verbosity
    temp = options.temp

    addheaders = UtilityWrappers.AddFullHeadersWrapper2(tmpinfile,
                                                        fullheadoutfile,
                                                        patternfile)

    filterseqs = UtilityWrappers.FilterWrapper(tmpinfile,
                                               filteroutfile,
                                               inverse=True,
                                               titlematch=('PREDICTED', 'predicted', 'hypothetical'))

    mafft = UtilityWrappers.MafftWrapper(tmpinfile,
                                         mafftoutfile,
                                         auto=True)

    trimal = UtilityWrappers.TrimalWrapper(tmpinfile,
                                           trimaloutfile1,
                                           clusters=100)

    tcoffee = UtilityWrappers.TcoffeeWrapper(trimaloutfile2,
                                             tcoffeeoutfile,
                                             ncore=ncore)

    prepsp = UtilityWrappers.SelenoprofilesPreWrapper(tmpinfile,
                                                      options.outputfilename,
                                                      all=True,
                                                      tagthreshold=options.tagthreshold,
                                                      temp=temp)

    try:

        if options.dryrun:
            print('\nThis is a dry run. Relaunch the command without the option -Y to do the actual stuff.\n')



        ## Add full headers
    ##     if options.doheaders:
    ##         addheader.infile = tmpinfile
    ##         tmpinfile = fullheadoutfile
    ##         if options.dryrun:
    ##             print addheaders.cline
    ##         else:
    ##             if verbosity >= 1:
    ##                 sys.stderr.write('\n    >>> Adding headers\n\n')
    ##             addheaders.run()

        ## Filter out the 'fake' proteins
        if options.dofilter:
            time.sleep(0.5)
            filterseqs.infile = tmpinfile
            tmpinfile = filteroutfile
            if options.dryrun:
                print filterseqs.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Filtering out\n\n')
                filterseqs.run()

        ## run mafft
        numseqinmafftoutput = 0 
        if options.domafft:
            time.sleep(0.5)
            mafft.infile = tmpinfile
            tmpinfile = mafftoutfile
            if options.dryrun:
                print mafft.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running Mafft\n\n')
                mafft.run()
                with open(mafftoutfile, 'r') as mfo:
                    seqs = Fasta.loadSequences(mfo)
                    numseqinmafftoutput = len(seqs)

        ## run trimal
        if options.dotrimal and numseqinmafftoutput > 200:
            time.sleep(0.5)
            trimal.infile = tmpinfile
            tmpinfile = trimaloutfile1
            if options.dryrun:
                print trimal.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running Trimal\n\n')
                trimal.run()

        if not options.dryrun and options.dotcoffee and options.dotrimal:
            if verbosity >= 1:
                sys.stderr.write('\n    >>> Removing gaps\n\n')

            ti = open(tmpinfile, 'r')
            tmpinfile = trimaloutfile2
            to = open(tmpinfile, 'w')

            si = Fasta.loadSequences(ti)
            ti.close()
            refs = Fasta.SequenceList()

            ## saves the sequences with no gaps
            for s in si:
                refs.append(removeGaps(s))
            Fasta.saveSequences(refs, to)

            if options.dotrimal and numseqinmafftoutput > 200:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Adding ommited selenoproteins\n')
                ## Gather the non intersecting proteins from the 2 files
                diffSelenoproteins = spDiff( mafftoutfile,
                                             trimaloutfile1 )

                spDiffr = Fasta.SequenceList()
                ## remove gaps from selenoproteins
                for s in diffSelenoproteins:
                    spDiffr.append(removeGaps(s))
                ## append to the file the selenoproteins that were not present
                Fasta.saveSequences(spDiffr, to)
            to.close()

        ## run t_coffee
        if options.dotcoffee:
            time.sleep(0.5)
            tcoffee.infile = tmpinfile
            tmpinfile = tcoffeeoutfile
            if options.dryrun:
                print tcoffee.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running T_coffee\n\n')
                tcoffee.run()

        ## Add full headers
        if options.doheaders:
            time.sleep(0.5)
            addheaders.infile = tmpinfile
            tmpinfile = fullheadoutfile
            if options.dryrun:
                print addheaders.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Adding headers\n\n')
                addheaders.run()

        ## prepare alignments for selenoprofiles
        if options.doprepal:
            time.sleep(0.5)
            prepsp.infile = tmpinfile
            if options.dryrun:
                print prepsp.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> preparing for selenoprofiles\n\n')
                prepsp.run()

    except KeyboardInterrupt:
        sys.exit('manual exit.')
    finally:
        if not options.debug:
            if verbosity >= 2:
                sys.stderr.write('\n    >>> Removing temporary file ' + tmpinitfilename +'\n\n')
            os.remove(tmpinitfilename)