import BioLib dna = BioLib.readFASTA(BioLib.getFile("grph")) for x in dna: for y in dna: if (x.dna != y.dna and x.dna.endswith(y.dna[:3])): print x.name, y.name
##Given: A collection of n (n≤10) DNA strings s1,…,sn of equal length (at most 1 kbp). ## Strings are given in FASTA format. ## ##Return: The matrix D corresponding to the p-distance dp on the given strings. ## As always, note that your answer is allowed an absolute error of 0.001. import BioLib dna = BioLib.readFASTA(BioLib.getFile("pdst")) dmatrix = list() length = len(dna[0].dna) for x in dna: dmatrix.append(map(lambda y: float(BioLib.hammingDistance(x,y))/length, dna)) for x in dmatrix: for y in x: print y, print
import BioLib file = open("C:\\Users\\Aaron\\Downloads\\rosalind_cons (1).txt","r") NameDNA = BioLib.readFASTA(file) pMatrix = {"A": list(), "C": list(), "G": list(), "T": list()} consensus = str() for i in range(len(NameDNA[0][1])): a = c = g = t = 0 for x in NameDNA: if(x[1][i]=="A"): a+=1 elif(x[1][i]=="C"): c+=1 elif(x[1][i]=="G"): g+=1 elif(x[1][i]=="T"): t+=1 pMatrix["A"].append(a) pMatrix["C"].append(c) pMatrix["G"].append(g) pMatrix["T"].append(t) if (a >= c and a >= g and a >=t): consensus+="A" elif(c >= a and c >= g and c >=t): consensus+="C" elif(g >= a and g >= c and g >=t): consensus+="G" else: consensus+="T" print consensus for y in "ACGT": print y+": ", print " ".join(map(str,pMatrix[y]))
regex = str() i = 0 while i < len(motif): if(motif[i]=="{"): regex+="[^"+motif[i+1]+"]" i+=3 elif(motif[i]=="["): regex+="("+motif[i+1]+"|"+motif[i+2]+")" i+=4 else: regex+=motif[i] i+=1 return re.compile(regex) s = motifToRegex("N{P}[ST]{P}") proteins = BioLib.getFile("mprt").read().split() for x in proteins: urllib.urlretrieve("http://www.uniprot.org/uniprot/"+x+".fasta",x) file = open(x,"r") file.readline() protein = file.read().replace("\n","") match = s.search(protein) matches = list() if match: print x while match: i = match.start()+1 matches.append(str(i)) match = s.search(protein, i) print " ".join(matches)
import BioLib dna = BioLib.readFASTA(BioLib.getFile("grph")) for x in dna: for y in dna: if(x.dna != y.dna and x.dna.endswith(y.dna[:3])): print x.name, y.name
import BioLib def getCodonFrequencies(): strings = open("RNACodonTable.txt").read().split() frequencies = dict() for i in range(1, len(strings), 2): if strings[i] not in frequencies: frequencies[strings[i]] = 1 else: frequencies[strings[i]] += 1 return frequencies file = BioLib.getFile("mrna") freq = getCodonFrequencies() nums = 1 for x in file.readline(): for y in x: if y != "\n": nums *= freq[x] nums *= freq["Stop"] nums = nums % 1000000 print nums
import BioLib def readSet(string): newset = set() for x in string: if (x != "{" or x != "}" or x != " " or x != ","): newset.add(x) return newset file = BioLib.getFile("seto") n = int(file.readline()) a = readSet(file.readline()) b = readSet(file.readline()) print a print b
import BioLib def getMonoMassTable(): values = open("MonoIsoMassTable.txt", "r").read().split("\n") monotable = list() for x in values: x = x.split() monotable.append((float(x[1]), x[0])) return monotable def findFragment(monotable, weight): for x in monotable: if (x[0] - 0.001 < weight < x[0] + 0.001): return x[1] numbers = map(lambda x: float(x), BioLib.getFile("spec").read().split()) mono = getMonoMassTable() protein = str() for i in range(1, len(numbers)): diff = numbers[i] - numbers[i - 1] protein += findFragment(mono, diff) print protein
import BioLib basepairs = {"A":"T","C":"G","G":"C","T":"A"} dna = BioLib.readFASTA(BioLib.getFile("revp"))[0].dna results = list() for i in range(len(dna)): it = 1 while i - it >= 0 and i + it < len(dna) and dna[i+1-it] == basepairs[dna[i+it]]: print dna[i+1-it], dna[i+it] print it it+=1 if 4 <= it*2 <= 12: results.append((i-it+1, it*2)) #for x in results: # print x[0], x[1], dna[x[0]:x[0]+x[1]]
import BioLib import math import itertools file = BioLib.getFile("prob") dna = file.readline() cglist = map(lambda x: float(x), file.readline().split()) problist = list() for x in cglist: CG = math.log10(x / 2) AT = math.log10((1 - x) / 2) prob = 0 for y in dna: if (y == "C" or y == "G"): prob += CG elif (y == "A" or y == "T"): prob += AT problist.append(prob) s = reduce(lambda x, y: str(x) + " " + str(y), problist) print s
i = 0 while i < len(motif): if (motif[i] == "{"): regex += "[^" + motif[i + 1] + "]" i += 3 elif (motif[i] == "["): regex += "(" + motif[i + 1] + "|" + motif[i + 2] + ")" i += 4 else: regex += motif[i] i += 1 return re.compile(regex) s = motifToRegex("N{P}[ST]{P}") proteins = BioLib.getFile("mprt").read().split() for x in proteins: urllib.urlretrieve("http://www.uniprot.org/uniprot/" + x + ".fasta", x) file = open(x, "r") file.readline() protein = file.read().replace("\n", "") match = s.search(protein) matches = list() if match: print x while match: i = match.start() + 1 matches.append(str(i)) match = s.search(protein, i) print " ".join(matches)
import BioLib def getCodonFrequencies(): strings = open("RNACodonTable.txt").read().split() frequencies = dict() for i in range(1,len(strings),2): if strings[i] not in frequencies: frequencies[strings[i]] = 1 else: frequencies[strings[i]]+=1 return frequencies file = BioLib.getFile("mrna") freq = getCodonFrequencies() nums = 1 for x in file.readline(): for y in x: if y !="\n": nums*=freq[x] nums*=freq["Stop"] nums = nums%1000000 print nums
import BioLib file = BioLib.getFile("splc") introns = map(lambda x: x.DNA, BioLib.readFASTA(file)) codons = BioLib.codonTable(True) exon = introns.pop(0) for x in introns: if x in exon: exon = exon.replace(x,"") blocks = BioLib.DNAtoCodon(exon) protein = str() for x in blocks: if(codons[x]=="Stop"): break protein+=codons[x] print protein
import BioLib def getMonoMassTable(): values = open("MonoIsoMassTable.txt","r").read().split("\n") monotable = list() for x in values: x = x.split() monotable.append((float(x[1]),x[0])) return monotable def findFragment(monotable, weight): for x in monotable: if(x[0]-0.001 < weight < x[0]+0.001): return x[1] numbers = map(lambda x: float(x), BioLib.getFile("spec").read().split()) mono = getMonoMassTable() protein = str() for i in range(1, len(numbers)): diff = numbers[i]-numbers[i-1] protein+=findFragment(mono,diff) print protein
import BioLib file = BioLib.getFile("orf") fasta = BioLib.readFASTA(file)[0] dnastrings = (fasta.reverseComplement(), fasta.DNA) codons = BioLib.codonTable(True) proteins = list() it = 0 for x in dnastrings: while("ATG" in x): it = x.find("ATG", it) x = x[it:] protein = str() for i in range(0, len(x),3): if i+3 >= len(x): break elif codons[x[i:i+3]]=="Stop": proteins.append(protein) break; else: protein+=codons[x[i:i+3]] it=+3 proteins = set(proteins) for x in proteins: print x
import BioLib import itertools file = BioLib.getFile("lexf") alphabet = file.readline().split() #n = int(file.readline()) lists = list() n = 4 for x in itertools.product("TACG",repeat=n): kmer = str() for y in x: kmer+=y lists.append(kmer) #print kmer print len(lists)
import BioLib file = BioLib.getFile("tree") n = int(file.readline()) edges = 0 for x in file.readlines(): edges+=len(x.split())-1 print n-edges-1
import BioLib def readSet(string): newset = set() for x in string: if(x != "{" or x != "}" or x != " " or x != ","): newset.add(x) return newset file = BioLib.getFile("seto") n = int(file.readline()) a = readSet(file.readline()) b = readSet(file.readline()) print a print b
import BioLib file = BioLib.getFile("tree") n = int(file.readline()) edges = 0 for x in file.readlines(): edges += len(x.split()) - 1 print n - edges - 1
import BioLib s = BioLib.readFASTA(BioLib.getFile("kmp (1)")).pop().DNA f = open("answer", "w") j = 0 i = 1 failure = [0] string = "0" while i < len(s): if s[i] == s[j]: j += 1 failure.append(j) string += " " + str(j) i += 1 elif j > 0: j = failure[j - 1] else: j = 0 i += 1 failure.append(j) string += " 0" f.write(string) f.close() print "end"
##Given: A collection of n (n≤10) DNA strings s1,…,sn of equal length (at most 1 kbp). ## Strings are given in FASTA format. ## ##Return: The matrix D corresponding to the p-distance dp on the given strings. ## As always, note that your answer is allowed an absolute error of 0.001. import BioLib dna = BioLib.readFASTA(BioLib.getFile("pdst")) dmatrix = list() length = len(dna[0].dna) for x in dna: dmatrix.append( map(lambda y: float(BioLib.hammingDistance(x, y)) / length, dna)) for x in dmatrix: for y in x: print y, print
import BioLib import collections dna = BioLib.readFASTA(BioLib.getFile("kmer"))[0].dna kmers1 = BioLib.getKmers("ACGT",4) kmers = dict(zip(kmers1,range(len(kmers1)))) kmercount = dict() i = 0 while i+4 <= len(dna): s = dna[i:i+4] kmercount[kmers[s]] = kmercount.get(kmers[s],0)+1 i+=1 for i in range(len(kmers)): print kmercount.get(i,0),
import BioLib file = BioLib.getFile("iev") couples = map(lambda x: int(x), file.readline().split()) probs = [1,1,1,0.75, 0.5,0] ex = 0 for i in range(len(couples)): ex+=couples[i]*probs[i]*2 print ex
import BioLib dna = map(lambda x: x.dna, BioLib.readFASTA(BioLib.getFile("lcsm"))) dna1 = dna.pop(0) substrings =
import BioLib import math import itertools file = BioLib.getFile("prob") dna = file.readline() cglist = map(lambda x: float(x), file.readline().split()) problist = list() for x in cglist: CG = math.log10(x/2) AT = math.log10((1-x)/2) prob = 0 for y in dna: if(y=="C" or y=="G"): prob+=CG elif(y=="A" or y=="T"): prob+=AT problist.append(prob) s = reduce(lambda x,y: str(x)+" "+str(y), problist) print s
import BioLib basepairs = {"A": "T", "C": "G", "G": "C", "T": "A"} dna = BioLib.readFASTA(BioLib.getFile("revp"))[0].dna results = list() for i in range(len(dna)): it = 1 while i - it >= 0 and i + it < len(dna) and dna[i + 1 - it] == basepairs[dna[i + it]]: print dna[i + 1 - it], dna[i + it] print it it += 1 if 4 <= it * 2 <= 12: results.append((i - it + 1, it * 2)) #for x in results: # print x[0], x[1], dna[x[0]:x[0]+x[1]]
import BioLib s = BioLib.readFASTA(BioLib.getFile("kmp (1)")).pop().DNA f = open("answer","w") j = 0 i = 1 failure = [0] string = "0" while i < len(s): if s[i]==s[j]: j+=1 failure.append(j) string+=" "+str(j) i+=1 elif j > 0: j = failure[j-1] else: j = 0 i+=1 failure.append(j) string+=" 0" f.write(string) f.close() print "end"
import BioLib file = BioLib.getFile("orf") fasta = BioLib.readFASTA(file)[0] dnastrings = (fasta.reverseComplement(), fasta.DNA) codons = BioLib.codonTable(True) proteins = list() it = 0 for x in dnastrings: while ("ATG" in x): it = x.find("ATG", it) x = x[it:] protein = str() for i in range(0, len(x), 3): if i + 3 >= len(x): break elif codons[x[i:i + 3]] == "Stop": proteins.append(protein) break else: protein += codons[x[i:i + 3]] it = +3 proteins = set(proteins) for x in proteins: print x