Python Fasta примеры использования

Язык программирования: Python

Пространство имен/Пакет: AGBio.io

Класс/Тип: Fasta

Примеров на hotexamples.com: 6

Python Fasta - 6 примеров найдено. Это лучшие примеры Python кода для AGBio.io.Fasta, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

loadSequences(6)

saveSequences(2)

Пример #1

Показать файл

Файл: alignHugeFamilies.py Проект: agrimaldi/crg

def spDiff( fileA, fileB ):

    sfileA = open( fileA, 'r' )
    sfileB = open( fileB, 'r' )

    seqsA = Fasta.loadSequences( sfileA )
    seqsB = Fasta.loadSequences( sfileB )

    sfileA.close()
    sfileB.close()

    spA = seqsA.findPattern('U', mode='full')
    spB = seqsB.findPattern('U', mode='full')
    
    return  spA.symetric_difference(spB, method='raw')

Пример #2

Показать файл

Файл: processBlastoutput2.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='blast output file, in xml format.',
                       metavar='FILE.xml' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base output filename',
                       metavar='FILE' )

    parser.add_option( '-d', '--db',
                       dest='database',
                       help='database from which the sequences should be fetched.',
                       metavar='FILE' )

    parser.add_option( '-e', '--evalue',
                       dest='evalue',
                       type='float',
                       help='e-value threshold.',
                       metavar='FLOAT' )

    parser.add_option( '-E', '--start_expo_evalue',
                       dest='startexpoeval',
                       type='int',
                       help='exponent of the evalue threshold used when refiltering.',
                       metavar='INT' )
    
    parser.add_option( '-b', '--blast_version',
                       dest='blastversion',
                       help='set the blast version to use, either `legacy` or `plus`.',
                       metavar='VERSION' )
    
    parser.add_option( '-f', '--filter',
                       action='store_true', dest='dofilter', default=False,
                       help='do the filter step.')

    parser.add_option( '-p', '--keep_patterns_iff',
                       dest='keeppatiff',
                       help='Keep only if patterns match exactly. The patterns should be coma seperated.',
                       metavar='keyword1:pat1,pat2,pat3,,keyword2:pat1,pat2' )

    parser.add_option( '-q', '--keep_patterns',
                       dest='keeppat',
                       help='Keep patterns that match exactly, no matter what. The patterns should be coma seperated.',
                       metavar='keyword1:pat1,pat2,pat3,,keyword2:pat1,pat2' )

    parser.add_option( '-g', '--gis',
                       dest='gis',
                       help='pickle file containing the gis that should match',
                       metavar='FILE')

    parser.add_option( '-F', '--format',
                       dest='formatop',
                       help='format of the output. default is `header,evalue`',
                       metavar='INTEGER' )
                       
    parser.add_option( '-M', '--max_num_start_seq',
                       dest='maxnumstartseq',
                       type='int',
                       help='maximum number of sequences in the first alignement to be' +\
                       'processed. If set, a new input file with the top sequences ordered' +\
                       'by evalue is created and used.',
                       metavar='INTEGER' )
    
    parser.add_option( '-k', '--keep_U',
                       action='store_true', dest='keepu', default=False,
                       help='Should U containing sequences be kept regardless of their evalues ?.'+\
                       'Use in conjunction of -M')

    parser.add_option( '-T', '--temp',
                       dest='temp',
                       help='set the temp folder to use.',
                       metavar='FOLDER' )

    parser.add_option( '-P', '--parse',
                       dest='parse', action='store_true', default=False,
                       help='do not do extra fancy steps. Just parse the file and return the disired output in a file.' )

    parser.add_option( '-U', '--uniq',
                       dest='uniq', action='store_true', default=False,
                       help='remove duplicates.' )
    
    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       type='int',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )

    parser.set_defaults( verbosity = 1,
                         database = 'nr',
                         evalue = 10,
                         startexpoeval = -10,
                         keeppat = None,
                         blastversion = 'legacy',
                         temp = '/tmp/',
                         maxnumstartseq = None,
                         formatop = 'header,evalue')

    (options, args) = parser.parse_args()

    verbosity = options.verbosity
    database = options.database
    evalue = options.evalue
    temp = options.temp
    maxnumstartseq = options.maxnumstartseq

    blastindexfile = ''.join(( options.outputfilename, '.index.0' ))
    blastfastafile = ''.join(( options.outputfilename, '.fasta.0' ))

    os.system(' '.join(( 'touch', blastindexfile )))
    os.system(' '.join(( 'touch', blastfastafile )))

    if options.blastversion == 'legacy':
        fetcher = FastaCmdWrapper( entry=[],
                                   db=database,
                                   outfile=blastfastafile )
    else:
        fetcher = BlastDbCmdWrapper( entry=[],
                                     db=database,
                                     outfile=blastfastafile )

    ## Parse the blast output file.
    if options.parse:
        if verbosity >= 1:
            sys.stderr.write( '\n' )
            sys.stderr.write( '>>> Parsing blast output : ' +\
                              options.inputfilename + '\n' )
        with open(options.inputfilename, 'r') as infile:
            blastparser = PsiBlastXMLParser(infile)
            blastparser.parse()
            if verbosity >= 2:
                sys.stderr.write('    >>> Extracting required data.\n')
            if options.dofilter:
                sequences = blastparser.extractData( evalue=evalue,
                                                     fmt=options.formatop,
                                                     outfile=blastindexfile,
                                                     includepatternsiff=fmtOptPat(options.keeppatiff),
                                                     includepatterns=fmtOptPat(options.keeppat),
                                                     excludepatterns=({'title':['hypothetical', 'predicted', 'PREDICTED']}))
            else:
                sequences = blastparser.extractData( evalue=evalue,
                                                     fmt=options.formatop,
                                                     outfile=blastindexfile )
        
    ## Only keep one copy of a header, the one with the best evalue.
    if options.uniq:
        if verbosity >= 1:
            sys.stderr.write( '\n' )
            sys.stderr.write( '>>> Keeping only best evalues.\n' )
        uniq(blastindexfile)
    
    ## Gather all GIs in list
    if verbosity >= 2:
        sys.stderr.write( '\n' )
        sys.stderr.write( '>>> Gathering all Gis.\n' )
    entries = []
    with open(blastindexfile, 'r') as bif:
        for line in bif:
            entries.append(line.split('|')[1])
    fetcher.entry = entries

    ## Fetch the sequences from the local databases.
    ## TODO : Fetch failed from the web.
    if verbosity >= 1:
        sys.stderr.write( '\n' )
        sys.stderr.write( '>>> Building fasta.0 file by fetching sequences from local database.\n' )
    fetcher.run()

    ## Apply final filters : keep only top evalues and U containing until a threshold is reached
    if maxnumstartseq:
        if verbosity >= 1:
            sys.stderr.write( '\n' )
            sys.stderr.write( '>>> Applying final filters on ' + \
                              blastfastafile + '.\n' )
        if verbosity >= 2:
            sys.stderr.write( '    >>> Adding evalue to headers.\n' )
        ### TODO : use .fasta.fh in tmp dir.
        tmpfullheadfasta = blastfastafile + '.fh'
        addheaders = AddFullHeadersWrapper2(blastfastafile,
                                           tmpfullheadfasta,
                                           blastindexfile)
        addheaders.run()
        if verbosity >= 3:
            sys.stderr.write( '        >>> Loading sequences.\n' )
        with open(tmpfullheadfasta, 'r') as ff:
            allseqs = Fasta.loadSequences(ff)
        if verbosity >= 2:
            sys.stderr.write( '    >>> Keeping valid sequences.\n' )
        tmppat = None
        if options.keepu:
            tmppat = 'U'
        validseqs = getTopSeqs(seqs=allseqs,
                               maxnumseqs=maxnumstartseq,
                               startevalue=options.startexpoeval,
                               pattern=tmppat,
                               verbose=verbosity>=4 )
        keptseqs = '.'.join(( options.outputfilename,
                              str(validseqs[1]),
                              str(len(validseqs[0])),
                              'fasta' ))
        if verbosity >= 2:
            sys.stderr.write( '    >>> Found ' + str(len(validseqs[0])) + \
                              ' sequences with evalue <= 1e' + \
                              str(validseqs[1]) + '\n' )
        with open(keptseqs, 'w') as ff:
            validseqs[0].save(ff)

    sys.stderr.write( '\n' )

Пример #3

Показать файл

Файл: searchSelenoproteins.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='fasta file in which selenoproteins should be looked for.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='fasta file containing the selenoproteins',
                       metavar='FILE' )

    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )

    parser.set_defaults( verbosity = '1' )

    (options, args) = parser.parse_args()

    stdoutflag = False

    verbosity = int( options.verbosity )

    if options.inputfilename:

        inputfilenames = options.inputfilename.split(',')
        infiles = []
        for i in inputfilenames:
            infiles.append( open( i, 'r' ) )

    else: sys.exit( 'You must provide an input filename.')

    if options.outputfilename:

        outfile = open( options.outputfilename, 'w' )
        stdoutflag = True

    else: outfile = sys.stdout

    for f in infiles:
        if verbosity >= 1:
            print
            print '>>> Searching for selenoproteins in file ' + f.name
            print

        if verbosity >= 2:
            print '>>> Loading sequences ...'

        sequences = FastaLib.loadSequences( f )

        if verbosity >= 2:
            print '>>> ... Done.'
            print

        if verbosity >= 2:
            print '>>> Searching for U containing sequences ...'

        selenoproteins = findSelenoproteins( sequences )

        if verbosity >= 2:
            print '>>> ... Done.'
            print

        FastaLib.saveSequences(selenoproteins, outfile)

        for selP in selenoproteins:

            if verbosity >= 3 and stdoutflag:
                print selP.header.strip()
                print selP.sequence.strip()

        if verbosity >= 1:
            print
            print 'Found ' + str( len( selenoproteins ) ) + ' selenoproteins'
            print

    for i in infiles:
        i.close()
    outfile.close()

Пример #4

Показать файл

Файл: alignHugeFamilies.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='file containing the alignments that will be used to build the PSSM using prepare_alignment_selenoprofiles.py.',
                       metavar='FILE' )

    parser.add_option( '-r', '--datadir',
                       dest='datadir',
                       help='directory containing, for each familly FAM, a directory FAM.blast and a directory FAM.selenoprofiles.prep',
                       metavar='DIR' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base name used for outputs',
                       metavar='NAME' )

    parser.add_option( '-a', '--n_core',
                       dest='ncore',
                       type='int',
                       help='number of cores to use during the various operations.',
                       metavar='INTEGER' )

    parser.add_option( '-M', '--mafft',
                       action='store_true', dest='domafft', default=False,
                       help='do the mafft step.')
    
    parser.add_option( '-T', '--trimal',
                       action='store_true', dest='dotrimal', default=False,
                       help='do the trimal step.')

    parser.add_option( '-C', '--tcoffee',
                       action='store_true', dest='dotcoffee', default=False,
                       help='do the t_coffee step.')

    parser.add_option( '-B', '--headers',
                       action='store_true', dest='doheaders', default=False,
                       help='do the addheaders step.')

    parser.add_option( '-p', '--patternfile',
                       dest='patternfile',
                       help='pattern file to use if the -D option is used.',
                       metavar='FILE' )

    parser.add_option( '-F', '--filter',
                       action='store_true', dest='dofilter', default=False,
                       help='do the filter step.')

    parser.add_option( '-P', '--prepare',
                       action='store_true', dest='doprepal', default=False,
                       help='do the prepare_alignment_selenoprofiles step.')

    parser.add_option( '-g', '--tag_threshold',
                       dest='tagthreshold',
                       type='float',
                       help='tag threshold to use if the -P or --prepare is used.',
                       metavar='FLOAT' )

    parser.add_option( '-A', '--all',
                       action='store_true', dest='doall', default=False,
                       help='do all steps.')

    parser.add_option( '-Y', '--dry',
                       action='store_true', dest='dryrun', default=False,
                       help="Prints the commands without executing them.")
    
    parser.add_option( '-D', '--debug',
                       action='store_true', dest='debug', default=False,
                       help="Debug mode. Nothing is cleaned.")

    parser.add_option( '-t', '--temp',
                       dest='temp',
                       help='set the temp folder to use.',
                       metavar='FOLDER' )

    parser.add_option( '-v', '--verbose',
                       dest='verbosity',
                       type='int',
                       help='verbosity level : 0=none ; 1=standard ; 2=detailed ; 3=full',
                       metavar='INTEGER' )



    parser.set_defaults( verbosity = 1,
                         ncore = 1,
                         tagthreshold = 0.5,
                         temp = '/tmp/',
                         patternfile = 'None' )

    (options, args) = parser.parse_args()

    if options.doall:
        options.doheaders = True
        options.dofilter = True
        options.domafft = True
        options.dotrimal = True
        options.dotcoffee = True
        
    infile = options.inputfilename
    tmpinitfilename = genTempfilename(options.temp, 'ungapped_')
    with open(infile, 'r') as iff:
        tmpseqs = Fasta.loadSequences(iff)
    with open(tmpinitfilename, 'w') as ugf:
        for seq in tmpseqs:
            removeGaps(seq).prints(ugf)
    tmpinfile = tmpinitfilename

    mafftoutfile = ''.join((options.outputfilename, '_mafft.fasta'))
    trimaloutfile1 = ''.join((options.outputfilename, '_trimmed_native.fasta'))
    trimaloutfile2 = ''.join((options.outputfilename, '_trimmed_spadded.fasta'))
    trimaloutfile = trimaloutfile1
    tcoffeeoutfile = ''.join((options.outputfilename, '_tcoffee.fasta'))
    fullheadoutfile = ''.join((options.outputfilename, '.det.fasta'))
#    patternfile = ''.join(('.'.join(options.inputfilename.split('.')[:2]), '.index.0'))
    patternfile = options.patternfile
    filteroutfile = ''.join((options.outputfilename, '.filt.fasta'))

    ncore = options.ncore
    verbosity = options.verbosity
    temp = options.temp

    addheaders = UtilityWrappers.AddFullHeadersWrapper2(tmpinfile,
                                                        fullheadoutfile,
                                                        patternfile)

    filterseqs = UtilityWrappers.FilterWrapper(tmpinfile,
                                               filteroutfile,
                                               inverse=True,
                                               titlematch=('PREDICTED', 'predicted', 'hypothetical'))

    mafft = UtilityWrappers.MafftWrapper(tmpinfile,
                                         mafftoutfile,
                                         auto=True)

    trimal = UtilityWrappers.TrimalWrapper(tmpinfile,
                                           trimaloutfile1,
                                           clusters=100)

    tcoffee = UtilityWrappers.TcoffeeWrapper(trimaloutfile2,
                                             tcoffeeoutfile,
                                             ncore=ncore)

    prepsp = UtilityWrappers.SelenoprofilesPreWrapper(tmpinfile,
                                                      options.outputfilename,
                                                      all=True,
                                                      tagthreshold=options.tagthreshold,
                                                      temp=temp)

    try:

        if options.dryrun:
            print('\nThis is a dry run. Relaunch the command without the option -Y to do the actual stuff.\n')



        ## Add full headers
    ##     if options.doheaders:
    ##         addheader.infile = tmpinfile
    ##         tmpinfile = fullheadoutfile
    ##         if options.dryrun:
    ##             print addheaders.cline
    ##         else:
    ##             if verbosity >= 1:
    ##                 sys.stderr.write('\n    >>> Adding headers\n\n')
    ##             addheaders.run()

        ## Filter out the 'fake' proteins
        if options.dofilter:
            time.sleep(0.5)
            filterseqs.infile = tmpinfile
            tmpinfile = filteroutfile
            if options.dryrun:
                print filterseqs.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Filtering out\n\n')
                filterseqs.run()

        ## run mafft
        numseqinmafftoutput = 0 
        if options.domafft:
            time.sleep(0.5)
            mafft.infile = tmpinfile
            tmpinfile = mafftoutfile
            if options.dryrun:
                print mafft.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running Mafft\n\n')
                mafft.run()
                with open(mafftoutfile, 'r') as mfo:
                    seqs = Fasta.loadSequences(mfo)
                    numseqinmafftoutput = len(seqs)

        ## run trimal
        if options.dotrimal and numseqinmafftoutput > 200:
            time.sleep(0.5)
            trimal.infile = tmpinfile
            tmpinfile = trimaloutfile1
            if options.dryrun:
                print trimal.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running Trimal\n\n')
                trimal.run()

        if not options.dryrun and options.dotcoffee and options.dotrimal:
            if verbosity >= 1:
                sys.stderr.write('\n    >>> Removing gaps\n\n')

            ti = open(tmpinfile, 'r')
            tmpinfile = trimaloutfile2
            to = open(tmpinfile, 'w')

            si = Fasta.loadSequences(ti)
            ti.close()
            refs = Fasta.SequenceList()

            ## saves the sequences with no gaps
            for s in si:
                refs.append(removeGaps(s))
            Fasta.saveSequences(refs, to)

            if options.dotrimal and numseqinmafftoutput > 200:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Adding ommited selenoproteins\n')
                ## Gather the non intersecting proteins from the 2 files
                diffSelenoproteins = spDiff( mafftoutfile,
                                             trimaloutfile1 )

                spDiffr = Fasta.SequenceList()
                ## remove gaps from selenoproteins
                for s in diffSelenoproteins:
                    spDiffr.append(removeGaps(s))
                ## append to the file the selenoproteins that were not present
                Fasta.saveSequences(spDiffr, to)
            to.close()

        ## run t_coffee
        if options.dotcoffee:
            time.sleep(0.5)
            tcoffee.infile = tmpinfile
            tmpinfile = tcoffeeoutfile
            if options.dryrun:
                print tcoffee.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Running T_coffee\n\n')
                tcoffee.run()

        ## Add full headers
        if options.doheaders:
            time.sleep(0.5)
            addheaders.infile = tmpinfile
            tmpinfile = fullheadoutfile
            if options.dryrun:
                print addheaders.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> Adding headers\n\n')
                addheaders.run()

        ## prepare alignments for selenoprofiles
        if options.doprepal:
            time.sleep(0.5)
            prepsp.infile = tmpinfile
            if options.dryrun:
                print prepsp.cline
            else:
                if verbosity >= 1:
                    sys.stderr.write('\n    >>> preparing for selenoprofiles\n\n')
                prepsp.run()

    except KeyboardInterrupt:
        sys.exit('manual exit.')
    finally:
        if not options.debug:
            if verbosity >= 2:
                sys.stderr.write('\n    >>> Removing temporary file ' + tmpinitfilename +'\n\n')
            os.remove(tmpinitfilename)

Пример #5

Показать файл

Файл: add_detail_to_titles3.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='file with incomplete headers.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='outputfile.',
                       metavar='FILE' )

    parser.add_option( '-p', '--pattern',
                       dest='patternfilename',
                       help='pattern file containing the complete headers.',
                       metavar='FILE' )

    parser.add_option( '-m', '--method',
                       dest='method',
                       help='Method to use when filling the headers.' \
                       'gi means match will be done by gi. inplace means that' \
                       'header substitution is made by following the order.',
                       metavar='{gi}|inplace' )

    parser.set_defaults( outputfilename = None,
                         method = 'gi')

    (options, args) = parser.parse_args()

    if not (options.inputfilename and options.patternfilename):
        parser.error('You have to provide two files, check help.')

    with open(options.inputfilename, 'r') as iff:
        inlines = Fasta.loadSequences(iff)
    with open(options.patternfilename, 'r') as pff:
        patlines = [line for line in pff.readlines() \
                    if line.startswith('>')]

    if not options.outputfilename:
        outfile = sys.stdout
    else:
        outfile = open(options.outputfilename, 'w')

    if options.method == 'gi':
        GI_REGEX = re.compile(r'gi\|(\d+)\|')

        for iseq in inlines:
            nofound = True
            for phead in patlines:
                try:
                    giq = GI_REGEX.search(iseq.header).group(1)
                    gis = GI_REGEX.search(phead).group(1)
                    if giq == gis:
                        tmpseq = Fasta.Sequence(phead, iseq.sequence)
                        tmpseq.prints(outfile)
                        nofound = False
                        break
                except AttributeError as e:
                    sys.stderr.write(iseq.header + ' ' + phead)
                    sys.exit(-1)
                except IndexError as e:
                    sys.stderr.write( '\nError while processing the files:\n' )
                    sys.stderr.write( pline + '\n' )
                    sys.stderr.write( line + '\n' )
                    break
            if nofound:
                sys.stderr.write('\n' + iseq.header + '\n')
    elif options.method == 'inplace':
        if len(inlines) != len(patlines):
            raise Exception, 'Different number of sequences'
        for seq, pat in zip(inlines, patlines):
            Fasta.Sequence(pat, seq.sequence).prints(outfile, 60)
    else:
        parser.error('Wrong method')

    outfile.close()

Пример #6

Показать файл

Файл: manualRemoveSequences.py Проект: agrimaldi/crg

def main():

    parser = optparse.OptionParser()

    parser.add_option( '-i', '--inputfile',
                       dest='inputfilename',
                       help='fasta file in which selenoproteins should be looked for.',
                       metavar='FILE' )

    parser.add_option( '-a', '--alignmentfile',
                       dest='alfilename',
                       help='alignment file used when details are requested.',
                       metavar='FILE' )

    parser.add_option( '-o', '--outputfile',
                       dest='outputfilename',
                       help='base output filename',
                       metavar='FILE' )

    parser.add_option( '-f', '--keep_prefilter',
                       dest='keepprefilter',
                       help='prefilters all sequences that have the given pattern in their name and keep them.',
                       metavar='PATTERN' )
    
    parser.add_option( '-F', '--throw_prefilter',
                       dest='throwprefilter',
                       help='prefilters all sequences that have the given pattern in their name and throw them.',
                       metavar='PATTERN' )

    parser.add_option( '-b', '--autothrow_abscents',
                       action='store_true', dest='atabscent', default=False,
                       help='Throw all sequences not present in the alignment provided.')

    parser.set_defaults( keepprefilter = False,
                         throwprefilter = False,
                         alfilename = False )

    (options, args) = parser.parse_args()

    with open(options.inputfilename, 'r') as inf:
        sequences = Fasta.loadSequences(inf)

    if options.alfilename:
        with open(options.alfilename, 'r') as alf:
            alignment = Fasta.Alignment(Fasta.loadSequences(alf))
        nrdetail = alignment.findPositions(('U','C','-'), False)
        rdetail = alignment.findPositions(('U','C','-'), True)
        
    if options.keepprefilter:
        kpatterns = options.keepprefilter.split(',')
    if options.throwprefilter:
        tpatterns = options.throwprefilter.split(',')

    kept_seq = Fasta.SequenceList()
    thrown_seq = Fasta.SequenceList()
    man_check_list = Fasta.SequenceList()

    for seq in sequences:
        kept = False
        thrown = False
        if options.keepprefilter:
            for pattern in kpatterns:
                if pattern in seq.header:
                    kept_seq.append(seq)
                    kept = True
        if options.throwprefilter:
            for pattern in tpatterns:
                if pattern in seq.header:
                    thrown_seq.append(seq)
                    thrown = True
        if not kept and not thrown:
            man_check_list.append(seq)

    idx = 0
    while idx < len(man_check_list):
        seq = man_check_list[idx]
        gi = seq.header.split('|')[1]
        choice = 'r'
        decided = False
        print seq.header
        while not decided:
            print len(kept_seq), len(thrown_seq)
            choice = getch('# '+str(idx+1)+' / '+str(len(man_check_list))+' -- Keep ? [Y/n]')
            if choice == 'b':
                if idx > 0:
                    idx -= 1
                    seq = man_check_list[idx]
                    gi = seq.header.split('|')[1]
                    print seq.header
                    try:
                        thrown_seq.remove(seq)
                    except:
                        pass
                    try:
                        kept_seq.remove(seq)
                    except:
                        pass
            elif choice in ('y', '\n'):
                kept_seq.append(seq)
                decided = True
                idx += 1
            elif choice == 'n':
                thrown_seq.append(seq)
                decided = True
                idx += 1
            elif choice == 's':
                os.system('fetch_seq.g -v TITLE="'+gi+'" -v ALL=1 '+options.inputfilename )
            elif choice == 'd' and options.alfilename:
                print
                print 'General Detail :'
                for pos in rdetail['U']:
                    sys.stdout.write('    '+str(pos) + ' ')
                    for xpos in rdetail:
                        try:
                            sys.stdout.write(str(xpos)+': ')
                            sys.stdout.write(str(len(rdetail[xpos][pos])) + ' ; ')
                        except KeyError:
                            sys.stdout.write('0 ; ')
                    sys.stdout.write('\n')
                print
                tmpseq = None
                for seqal in alignment:
                    if seqal.header == seq.header:
                        tmpseq = seqal
                if tmpseq:
                    tmppos = [i for i, x in enumerate(tmpseq.sequence) if x == 'U']
                    print 'In the sequence provided :'
                    print '    U :', tmppos
                    print '    U in those positions :', [len(rdetail['U'][(l,)]) for l in tmppos]
                    print '    C in those positions:', [len(rdetail['C'][(l,)]) for l in tmppos]
                    print '    - in those positions:', [len(rdetail['-'][tuple((l,))]) for l in tmppos]
                    print
                    print '    Symbols present at the positions of each U :'
                    for pos in [p for p in rdetail['U'] if p != ()]:
                        spos = str(pos[0])
                        print '        Position :', spos, '---', tmpseq.sequence[int(spos)]
                else:
                    print 'Not present in the alignment provided'
                print
            elif choice == 'q':
                cc = 'r'
                while cc not in ('y', 'n'):
                    cc = raw_input('Manual quit. Would you like to save your changes ? [y/N]')
                    if cc in 'y':
                        pass
                    if cc in 'n':
                        sys.exit('Quiting without saving.')
            else:
                print 'Wrong command'

    with open(options.outputfilename, 'w') as of:
        kept_seq.prints(of, 80)