示例#1
0
def main():
    (opts, args) = getoptions()

    # Load PWMs
    pssms = load_motifs(opts.pwm_dir, opts.pseudocount)

    if opts.testseq is not None:
        if opts.seqtype == 'RNA':
            seq = Seq(opts.testseq,
                      IUPAC.IUPACUnambiguousRNA()).back_transcribe()
            seq.alphabet = IUPAC.IUPACUnambiguousDNA()
        else:
            seq = Seq(opts.testseq, IUPAC.IUPACUnambiguousDNA())
        final = scan_all(pssms, seq, opts)
        print final.to_csv(sep="\t", index=False)
    else:
        # Scan in sequence
        print >> sys.stderr, "Scanning sequences ",
        tic = time.time()
        for seqrecord in SeqIO.parse(open(args[0]), "fasta"):

            seq = seqrecord.seq
            if opts.seqtype == "RNA":
                seq = seq.back_transcribe()
            seq.alphabet = IUPAC.IUPACUnambiguousDNA()

            final = scan_all(pssms, seq, opts)
            print final.to_csv(sep="\t", index=False)

        toc = time.time()
        print >> sys.stderr, "done in %0.2f seconds!" % (float(toc - tic))
示例#2
0
    def _guess_consensus_alphabet(self, ambiguous):
        """Pick an (ungapped) alphabet for an alignment consesus sequence (PRIVATE).

        This just looks at the sequences we have, checks their type, and
        returns as appropriate type which seems to make sense with the
        sequences we've got.
        """
        # Start with the (un-gapped version of) the alignment alphabet
        a = Alphabet._get_base_alphabet(self.alignment._alphabet)

        # Now check its compatible with all the rest of the sequences
        for record in self.alignment:
            # Get the (un-gapped version of) the sequence's alphabet
            alt = Alphabet._get_base_alphabet(record.seq.alphabet)
            if not isinstance(alt, a.__class__):
                raise ValueError(
                    "Alignment contains a sequence with an incompatible alphabet."
                )

        # Check the ambiguous character we are going to use in the consensus
        # is in the alphabet's list of valid letters (if defined).
        if (
            hasattr(a, "letters")
            and a.letters is not None
            and ambiguous not in a.letters
        ):
            # We'll need to pick a more generic alphabet...
            if isinstance(a, IUPAC.IUPACUnambiguousDNA):
                if ambiguous in IUPAC.IUPACUnambiguousDNA().letters:
                    a = IUPAC.IUPACUnambiguousDNA()
                else:
                    a = Alphabet.generic_dna
            elif isinstance(a, IUPAC.IUPACUnambiguousRNA):
                if ambiguous in IUPAC.IUPACUnambiguousRNA().letters:
                    a = IUPAC.IUPACUnambiguousRNA()
                else:
                    a = Alphabet.generic_rna
            elif isinstance(a, IUPAC.IUPACProtein):
                if ambiguous in IUPAC.ExtendedIUPACProtein().letters:
                    a = IUPAC.ExtendedIUPACProtein()
                else:
                    a = Alphabet.generic_protein
            else:
                a = Alphabet.single_letter_alphabet
        return a
    def test_compute_background_1(self):
        target = ms.compute_background(self.fastas,
                                       IUPAC.IUPACUnambiguousRNA(),
                                       verbose=False)
        expected = {'A': 0.1944,
                    'C': 0.1388,
                    'U': 0.5277,
                    'G': 0.1388}

        for key,value in expected.items():
            self.assertAlmostEqual(target[key], value, 3)
示例#4
0
def main():
    tic = time.time()
    args = getoptions()
    seq_type = _guess_seq_type(args)
    bg = None

    if args.testseq:
        testseq_stack = args.testseq.split(',')[::-1]  # make a stack

    ## Sequence
    if seq_type in ['RNA', 'RNASS']:
        if args.testseq:
            seq_file = SeqRecord(Seq(testseq_stack.pop()))
        else:
            seq_file = args.fastafiles[0]

        if not args.testseq:
            bg = load_background(args.bg_seq,
                                 args.uniform_background, seq_file,
                                 IUPAC.IUPACUnambiguousRNA(), not args.bgonly)

        if not args.bgonly:
            pssm = load_motif(args.pfm_seq, args.pseudocount,
                              IUPAC.IUPACUnambiguousRNA(), bg)
            seq_results = scan_main(seq_file, pssm,
                                    IUPAC.IUPACUnambiguousRNA(), bg, args)
        else:
            print(dict(bg))
            sys.exit()

    ## Structure
    if seq_type in ['SS', 'RNASS']:
        if args.testseq:
            struct_file = SeqRecord(Seq(testseq_stack.pop()))
        elif seq_type == 'SS':
            struct_file = args.fastafiles[0]
        else:
            struct_file = args.fastafiles[1]

        if not args.testseq:
            bg = load_background(args.bg_struct, args.uniform_background,
                                 struct_file, ContextualSecondaryStructure(),
                                 not args.bgonly)

        if not args.bgonly:
            pssm = load_motif(args.pfm_struct, args.pseudocount,
                              ContextualSecondaryStructure(), bg)
            struct_results = scan_main(struct_file, pssm,
                                       ContextualSecondaryStructure(), bg,
                                       args)
        else:
            print(dict(bg))
            sys.exit()

    if seq_type == 'RNASS':
        combined_results = combine(seq_results, struct_results)
        combined_results.reset_index(drop=True)
        _add_match_id(combined_results)
        combined_results.to_csv(sys.stdout, sep="\t", index=False)
    elif seq_type == 'RNA':
        seq_results.reset_index(drop=True)
        _add_match_id(seq_results)
        seq_results.to_csv(sys.stdout, sep="\t", index=False)
    else:
        struct_results.reset_index(drop=True)
        _add_match_id(struct_results)
        struct_results.to_csv(sys.stdout, sep="\t", index=False)

    toc = time.time()

    runtime = float(toc - tic)
    if runtime > 60:
        eprint("Done in %0.4f minutes!" % (runtime / 60))
    else:
        eprint("Done in %0.4f seconds!" % (runtime))
示例#5
0
 def test_preprocessSeq_6(self):
     '''Test preprocess_seq() on RNA alphabet'''
     seqrec = SeqRecord(Seq('KHIL', ContextualSecondaryStructure()))
     target = ms.preprocess_seq(seqrec, IUPAC.IUPACUnambiguousRNA())
     expected = 'KHIL'
     self.assertEqual(str(target), expected)
示例#6
0
 def test_preprocessSeq_5(self):
     '''Test preprocess_seq() on RNA alphabet'''
     seqrec = SeqRecord(Seq('GAUUACA', SingleLetterAlphabet()))
     target = ms.preprocess_seq(seqrec, IUPAC.IUPACUnambiguousRNA())
     expected = 'GAUUACA'
     self.assertEqual(str(target), expected)
示例#7
0
print IUPAC.unambiguous_dna.letters    # letras de bases de adn
print IUPAC.unambiguous_rna.letters    # letras de bases de arn
print IUPAC.ambiguous_dna.letters    # letras IUPAC de bases de adn
print IUPAC.ExtendedIUPACProtein.letters    # letras de todas las proteínas existentes
print IUPAC.ExtendedIUPACDNA.letters    # letras de todas las bases existentes

from Bio.Seq import Seq
seq = Seq('CCGGTT',IUPAC.unambiguous_dna)
print seq
seq=seq.transcribe()	#must be DNA to transcribe to RNA
print seq
seq=seq.translate()		#must be DNA to translate to protein
print seq

#tipo de dato secuencia
seq=Seq('CCGGUU',IUPAC.IUPACUnambiguousRNA())	#constructor class IUPAC...RNA
print seq
print seq.back_transcribe()	#must be RNA to backtranscribe to DNA

seq=Seq('ATGGTCTTTCCAGACGCG',IUPAC.unambiguous_dna)
print Seq.transcribe(seq)	#as function, up is as method

print seq[:5]	#methods as string
print len(seq)
#seq[0]='C'	#aren't mutables
st=str(seq)		#toString
print st

#tipo de dato secuencia editable
from Bio.Seq import MutableSeq
mut_seq=seq.tomutable()	#convertirlo a tipo seq mutable