示例#1
0
def matchtrans(dnaseqs, pepseqs, gapin, verbose, mtx, allinternal,
               readthroughstop):
    dnaref = {}
    dnaref_extra = {}
    result = {}

    # NOTICE:     We need the handle the situation where more than one DNA
    #             sequence translates to the same peptide sequence.
    #
    # ASSUMPTION: Identical peptide sequences will align exactly the same
    #             way.
    #
    #
    # EXAMPLE:
    #
    #             dnaSeq17 -> pepSeq17
    #             dnaSeq32 -> pepSeq32
    #
    #             *) dnaSeq17 and dnaSeq32 differs by a few nucleotides
    #
    #             *) pepSeq17 and pepSeq32 are exactly the same
    #
    #             Given the assmuption mentioned above, it does NOT
    #             matter if dnaSeq17 gets paired with pepSeq32

    for key in dnaseqs.keys():
        dna, note = dnaseqs[key]
        dna = degap(dna, gapin)
        newpep = mod_translate.translate(dna, mtx, not allinternal,
                                         readthroughstop)

        # Strip terminal stop-codon
        if newpep.endswith("*"):
            newpep = newpep[:-1]

        if verbose > 2:
            warn("DNA sequence " + key + " translated to:\n" + newpep)

        if dnaref.has_key(newpep):
            dnaref_extra[key] = newpep
        else:
            dnaref[newpep] = key

    for key in pepseqs.keys():
        pep, note = pepseqs[key]
        pep = degap(pep, gapin).upper()

        # Strip terminal stop-codon
        if pep.endswith("*"):
            pep = pep[:-1]

        if verbose > 2:
            warn("Pep sequence " + key + " degapped: \n" + pep)

        if dnaref.has_key(pep):
            result[key] = dnaref.pop(pep)
        else:
            for dnakey in dnaref_extra.keys():
                if pep == dnaref_extra[dnakey]:
                    result[key] = dnakey
                    dnaref_extra.pop(dnakey)
                    break

    return result
示例#2
0
def revtrans(dnaseqs, pepseqs, crossref, gapin, gapout, verbose):
    if verbose:
        warn("gapin: '" + gapin + "'")
        warn("gapout: '" + gapout + "'")

    newdnaseqs = {}
    error = 0
    for key in pepseqs.keys():
        try:
            # Find the corresponding sequences
            dna, pep, newdna = "", "", ""  # Just in the case of an exception
            if not key in crossref.keys():
                warn("No cross-reference, skipping peptide sequence " + key)
                continue


#			print key,crossref[key]
            dna, noted = dnaseqs[crossref[key]]
            #			dnaName = d_dnames[dna]
            dnaName = crossref[key]

            #print dna
            dna = degap(dna, gapin)
            pep, notep = pepseqs[key]
            newdna = ""
            dnap = 0

            # +++++++  Start Modifications +++++++

            newpep = ""
            degapped = degap(pep, gapin)
            dnap = -3 * initialgaps(
                pep, gapin)  # correct start if pep starts with gap characters

            newpep = mod_translate.translate(dna, None, True, False)
            # Strip terminal stop-codon
            if newpep.endswith("*"):
                newpep = newpep[:-1]

            offset = string.find(newpep,
                                 degapped)  # offset start of rev translation
            if offset < 0:
                warn("Could not match pep:" + key)
            dnap = dnap + 3 * offset

            # +++++++  End Modifications +++++++

            # Do the reverse translation for this seq
            l_dna = []
            for i in range(0, len(pep)):
                c = pep[i]
                if c in gapin:
                    l_dna.append(gapout * 3)
                else:
                    # Extract codon - keep case from the amino acid
                    codon = dna[dnap:dnap + 3]
                    if c.isupper():
                        codon = codon.upper()
                    else:
                        codon = codon.lower()

                    l_dna.append(codon)
                    dnap = dnap + 3

            # Everything's cool - add the new seq to the result
            newdna = string.join(l_dna, "")
            #			newdnaseqs[key] = (newdna,noted)
            newdnaseqs[dnaName] = (newdna, noted)
        except:
            if verbose:
                warn("Error rev-translating seq:" + key)
                warn("\nLen dna:" + str(len(dna)) + " pep:" + str(len(pep)) +
                     " newdna:" + str(len(newdna)) + "\n")
            error = error + 1
    return (newdnaseqs, error)
def matchtrans(dnaseqs,pepseqs,gapin,verbose,mtx,allinternal,readthroughstop):
	dnaref = {}
	dnaref_extra = {}
	result = {}
	
	# NOTICE:     We need the handle the situation where more than one DNA
	#             sequence translates to the same peptide sequence.
	#
	# ASSUMPTION: Identical peptide sequences will align exactly the same
	#             way. 
	#
	#
	# EXAMPLE:
	#
	#             dnaSeq17 -> pepSeq17
	#             dnaSeq32 -> pepSeq32   
	#
	#             *) dnaSeq17 and dnaSeq32 differs by a few nucleotides
	# 
	#             *) pepSeq17 and pepSeq32 are exactly the same
	#
	#             Given the assmuption mentioned above, it does NOT
	#             matter if dnaSeq17 gets paired with pepSeq32
	 
	for key in dnaseqs.keys():
		dna,note = dnaseqs[key]
		dna = degap(dna,gapin)
		newpep = mod_translate.translate(dna,mtx,not allinternal,readthroughstop)
		
		# Strip terminal stop-codon
		if newpep.endswith("*"):
			newpep = newpep[:-1]
		
		if verbose > 2:
			warn("DNA sequence "+key+" translated to:\n"+newpep);
		
		if dnaref.has_key(newpep):
			dnaref_extra[key] = newpep
		else:
			dnaref[newpep] = key
			
	for key in pepseqs.keys():
	 	pep,note = pepseqs[key]
		pep = degap(pep,gapin).upper()

		# Strip terminal stop-codon
		if pep.endswith("*"):
			pep = pep[:-1]
		
		if verbose > 2:
			warn("Pep sequence "+key+" degapped: \n"+pep);
		
		if dnaref.has_key(pep):
			result[key] = dnaref.pop(pep)
		else:
			for dnakey in dnaref_extra.keys():
				if pep == dnaref_extra[dnakey]:
					result[key] = dnakey
					dnaref_extra.pop(dnakey)
					break
					
	return result
示例#4
0
					qseq = dna
				elif rf == "2":
					qseq = dna[1:]
				elif rf == "3":
					qseq = dna[2:]
				elif rf == "-1":
					qseq = revCom(dna)
				elif rf == "-2":
					qseq = revCom(dna)[1:]
				elif rf == "-3":
					qseq = revCom(dna)[2:]
				else:
					qseq = dna

				# Do the actual translation
				pep = mod_translate.translate(qseq,mtx,not opt.allinternal,opt.readthroughstop)
				pa  = mod_translate.annotate(qseq,mtx)
				
				# The annotation string may be longer that the peptide, if the -x more is not used
				pa = pa[:len(pep)]				

				# Store translated sequence
				if echo_rf:
					cname = name+"_rframe"+rf
				else:
					cname = name
				
				data = ( cname,pep,pa,qseq )
				d_collect[rf] = data
			
			# Do ORF finding?
示例#5
0
					qseq = dna
				elif rf == "2":
					qseq = dna[1:]
				elif rf == "3":
					qseq = dna[2:]
				elif rf == "-1":
					qseq = revCom(dna)
				elif rf == "-2":
					qseq = revCom(dna)[1:]
				elif rf == "-3":
					qseq = revCom(dna)[2:]
				else:
					qseq = dna

				# Do the actual translation
				pep = mod_translate.translate(qseq,mtx,not opt.allinternal,opt.readthroughstop)
				pa  = mod_translate.annotate(qseq,mtx)

				# The annotation string may be longer that the peptide, if the -x more is not used
				pa = pa[:len(pep)]

				# Store translated sequence
				if echo_rf:
					cname = name+"_rframe"+rf
				else:
					cname = name

				data = ( cname,pep,pa,qseq )
				d_collect[rf] = data

			# Do ORF finding?
示例#6
0
            continue

        if arg == "--allinternal":
            firstIsStart = False
            continue

        if arg == "-mtx":
            mtxfn = argv[0]
            argv = argv[1:]
            continue

        fn = arg

    mtx = mod_translate.parseMatrixFile(mtxfn)
    try:
        seqs = mod_seqfiles.readfileauto(fn)

        newseqs = {}

        if not maxseqs: maxseqs = len(seqs.keys())
        for key in seqs.keys()[0:maxseqs]:
            seq, note = seqs[key]
            newseqs[key] = mod_translate.translate(seq, mtx, firstIsStart,
                                                   readThroughStop), note

        mod_seqfiles.writestream(sys.stdout, newseqs, "fasta", "P")

    except Exception, e:
        sys.stderr.write("Translation error: %s\n" % str(e))
        sys.exit(1)
			readThroughStop = True
			continue

		if arg == "--allinternal":
			firstIsStart = False
			continue

		if arg == "-mtx":
			mtxfn = argv[0]
			argv = argv[1:]
			continue

		fn = arg
			
	mtx = mod_translate.parseMatrixFile(mtxfn)
	try:
		seqs = mod_seqfiles.readfileauto(fn)
	
		newseqs = {}
	
		if not maxseqs: maxseqs = len(seqs.keys())
		for key in seqs.keys()[0:maxseqs]:
			seq, note = seqs[key]
			newseqs[key] = mod_translate.translate(seq,mtx,firstIsStart,readThroughStop) , note
		
		mod_seqfiles.writestream(sys.stdout,newseqs,"fasta","P")
		
	except Exception, e:
		sys.stderr.write("Translation error: %s\n" % str(e))
		sys.exit(1)