示例#1
0
import BioLib

dna = BioLib.readFASTA(BioLib.getFile("grph"))
for x in dna:
    for y in dna:
        if (x.dna != y.dna and x.dna.endswith(y.dna[:3])):
            print x.name, y.name
示例#2
0
##Given: A collection of n (n≤10) DNA strings s1,…,sn of equal length (at most 1 kbp).
##        Strings are given in FASTA format.
##
##Return: The matrix D corresponding to the p-distance dp on the given strings.
##        As always, note that your answer is allowed an absolute error of 0.001.


import BioLib


dna = BioLib.readFASTA(BioLib.getFile("pdst"))
dmatrix = list()
length = len(dna[0].dna)
for x in dna:
    dmatrix.append(map(lambda y: float(BioLib.hammingDistance(x,y))/length, dna))
for x in dmatrix:
    for y in x:
        print y,
    print
        
示例#3
0
import BioLib
file = open("C:\\Users\\Aaron\\Downloads\\rosalind_cons (1).txt","r")
NameDNA = BioLib.readFASTA(file)
pMatrix = {"A": list(), "C": list(), "G": list(), "T": list()}
consensus = str()
for i in range(len(NameDNA[0][1])):
    a = c = g = t = 0
    for x in NameDNA:
        if(x[1][i]=="A"): a+=1
        elif(x[1][i]=="C"): c+=1
        elif(x[1][i]=="G"): g+=1
        elif(x[1][i]=="T"): t+=1
    pMatrix["A"].append(a)
    pMatrix["C"].append(c)
    pMatrix["G"].append(g)
    pMatrix["T"].append(t)
    if  (a >= c and a >= g and a >=t): consensus+="A"
    elif(c >= a and c >= g and c >=t): consensus+="C"
    elif(g >= a and g >= c and g >=t): consensus+="G"
    else: consensus+="T"

print consensus
for y in "ACGT":
    print y+": ",
    print " ".join(map(str,pMatrix[y]))
    

    
    
示例#4
0
    regex = str()
    i = 0
    while i < len(motif):
        if(motif[i]=="{"):
            regex+="[^"+motif[i+1]+"]"
            i+=3
        elif(motif[i]=="["):
            regex+="("+motif[i+1]+"|"+motif[i+2]+")"
            i+=4
        else:
            regex+=motif[i]
            i+=1
    return re.compile(regex)

s = motifToRegex("N{P}[ST]{P}")
proteins = BioLib.getFile("mprt").read().split()
for x in proteins:
    urllib.urlretrieve("http://www.uniprot.org/uniprot/"+x+".fasta",x)
    file = open(x,"r")
    file.readline()
    protein = file.read().replace("\n","")
    match = s.search(protein)
    matches = list()
    if match:
        print x
    while match:
        i = match.start()+1
        matches.append(str(i))
        match = s.search(protein, i)
    print " ".join(matches)
示例#5
0
import BioLib

dna = BioLib.readFASTA(BioLib.getFile("grph"))
for x in dna:
    for y in dna:
        if(x.dna != y.dna and x.dna.endswith(y.dna[:3])):
            print x.name, y.name

示例#6
0
import BioLib


def getCodonFrequencies():
    strings = open("RNACodonTable.txt").read().split()
    frequencies = dict()
    for i in range(1, len(strings), 2):
        if strings[i] not in frequencies:
            frequencies[strings[i]] = 1
        else:
            frequencies[strings[i]] += 1
    return frequencies


file = BioLib.getFile("mrna")
freq = getCodonFrequencies()
nums = 1
for x in file.readline():
    for y in x:
        if y != "\n":
            nums *= freq[x]
nums *= freq["Stop"]
nums = nums % 1000000
print nums
示例#7
0
import BioLib


def readSet(string):
    newset = set()
    for x in string:
        if (x != "{" or x != "}" or x != " " or x != ","):
            newset.add(x)
    return newset


file = BioLib.getFile("seto")
n = int(file.readline())
a = readSet(file.readline())
b = readSet(file.readline())
print a
print b
示例#8
0
import BioLib


def getMonoMassTable():
    values = open("MonoIsoMassTable.txt", "r").read().split("\n")
    monotable = list()
    for x in values:
        x = x.split()
        monotable.append((float(x[1]), x[0]))
    return monotable


def findFragment(monotable, weight):
    for x in monotable:
        if (x[0] - 0.001 < weight < x[0] + 0.001):
            return x[1]


numbers = map(lambda x: float(x), BioLib.getFile("spec").read().split())
mono = getMonoMassTable()
protein = str()
for i in range(1, len(numbers)):
    diff = numbers[i] - numbers[i - 1]
    protein += findFragment(mono, diff)
print protein
示例#9
0
import BioLib

basepairs = {"A":"T","C":"G","G":"C","T":"A"}
dna = BioLib.readFASTA(BioLib.getFile("revp"))[0].dna
results = list()
for i in range(len(dna)):
    it = 1
    while i - it >= 0  and i + it < len(dna) and dna[i+1-it] == basepairs[dna[i+it]]:
        print dna[i+1-it], dna[i+it]
        print it
        it+=1
    if 4 <= it*2 <= 12:
        results.append((i-it+1, it*2))
#for x in results:
#    print x[0], x[1], dna[x[0]:x[0]+x[1]]
                                           
    
        
示例#10
0
import BioLib
import math
import itertools

file = BioLib.getFile("prob")
dna = file.readline()
cglist = map(lambda x: float(x), file.readline().split())
problist = list()
for x in cglist:
    CG = math.log10(x / 2)
    AT = math.log10((1 - x) / 2)
    prob = 0
    for y in dna:
        if (y == "C" or y == "G"): prob += CG
        elif (y == "A" or y == "T"): prob += AT
    problist.append(prob)
s = reduce(lambda x, y: str(x) + " " + str(y), problist)
print s
示例#11
0
    i = 0
    while i < len(motif):
        if (motif[i] == "{"):
            regex += "[^" + motif[i + 1] + "]"
            i += 3
        elif (motif[i] == "["):
            regex += "(" + motif[i + 1] + "|" + motif[i + 2] + ")"
            i += 4
        else:
            regex += motif[i]
            i += 1
    return re.compile(regex)


s = motifToRegex("N{P}[ST]{P}")
proteins = BioLib.getFile("mprt").read().split()
for x in proteins:
    urllib.urlretrieve("http://www.uniprot.org/uniprot/" + x + ".fasta", x)
    file = open(x, "r")
    file.readline()
    protein = file.read().replace("\n", "")
    match = s.search(protein)
    matches = list()
    if match:
        print x
    while match:
        i = match.start() + 1
        matches.append(str(i))
        match = s.search(protein, i)
    print " ".join(matches)
示例#12
0
import BioLib

def getCodonFrequencies():
    strings = open("RNACodonTable.txt").read().split()
    frequencies = dict()
    for i in range(1,len(strings),2):
        if strings[i] not in frequencies:
            frequencies[strings[i]] = 1
        else:
            frequencies[strings[i]]+=1
    return frequencies

file = BioLib.getFile("mrna")
freq = getCodonFrequencies()
nums = 1
for x in file.readline():
    for y in x:
        if y !="\n":
            nums*=freq[x]
nums*=freq["Stop"]
nums = nums%1000000
print nums
    
示例#13
0
import BioLib
    
file = BioLib.getFile("splc")
introns = map(lambda x: x.DNA, BioLib.readFASTA(file))
codons = BioLib.codonTable(True)
exon = introns.pop(0)
for x in introns:
    if x in exon:
        exon = exon.replace(x,"")
blocks = BioLib.DNAtoCodon(exon)
protein = str()
for x in blocks:
    if(codons[x]=="Stop"):
        break
    protein+=codons[x]
print protein


示例#14
0
import BioLib

def getMonoMassTable():
    values = open("MonoIsoMassTable.txt","r").read().split("\n")
    monotable = list()
    for x in values:
        x = x.split()
        monotable.append((float(x[1]),x[0]))
    return monotable

def findFragment(monotable, weight):
    for x in monotable:
        if(x[0]-0.001 < weight < x[0]+0.001):
            return x[1]
    

numbers = map(lambda x: float(x), BioLib.getFile("spec").read().split())
mono = getMonoMassTable()
protein = str()
for i in range(1, len(numbers)):
    diff = numbers[i]-numbers[i-1]
    protein+=findFragment(mono,diff)
print protein
    

    
示例#15
0
import BioLib

file = BioLib.getFile("orf")
fasta = BioLib.readFASTA(file)[0]
dnastrings = (fasta.reverseComplement(), fasta.DNA)
codons = BioLib.codonTable(True)
proteins = list()
it = 0
for x in dnastrings:
    while("ATG" in x):
        it = x.find("ATG", it)
        x = x[it:]
        protein = str()
        for i in range(0, len(x),3):
            if i+3 >= len(x):
                break
            elif codons[x[i:i+3]]=="Stop":
                proteins.append(protein)
                break;
            else:
                protein+=codons[x[i:i+3]]            
        it=+3
proteins = set(proteins)
for x in proteins:
    print x

    
    
    

        
示例#16
0
import BioLib
import itertools

file = BioLib.getFile("lexf")
alphabet = file.readline().split()
#n = int(file.readline())
lists = list()
n = 4
for x in itertools.product("TACG",repeat=n):
    kmer = str()
    for y in x:
        kmer+=y
    lists.append(kmer)
    #print kmer
print len(lists)
示例#17
0
import BioLib

file = BioLib.getFile("tree")
n = int(file.readline())
edges = 0
for x in file.readlines():
    edges+=len(x.split())-1
print n-edges-1
    
    
示例#18
0
import BioLib
def readSet(string):
    newset = set()
    for x in string:
      if(x != "{" or x != "}" or x != " " or x != ","):
        newset.add(x)
    return newset

file = BioLib.getFile("seto")
n = int(file.readline())
a = readSet(file.readline())
b = readSet(file.readline())
print a
print b
        
示例#19
0
import BioLib

file = BioLib.getFile("tree")
n = int(file.readline())
edges = 0
for x in file.readlines():
    edges += len(x.split()) - 1
print n - edges - 1
示例#20
0
import BioLib

s = BioLib.readFASTA(BioLib.getFile("kmp (1)")).pop().DNA
f = open("answer", "w")
j = 0
i = 1
failure = [0]
string = "0"
while i < len(s):
    if s[i] == s[j]:
        j += 1
        failure.append(j)
        string += " " + str(j)
        i += 1
    elif j > 0:
        j = failure[j - 1]
    else:
        j = 0
        i += 1
        failure.append(j)
        string += " 0"
f.write(string)
f.close()
print "end"
示例#21
0
##Given: A collection of n (n≤10) DNA strings s1,…,sn of equal length (at most 1 kbp).
##        Strings are given in FASTA format.
##
##Return: The matrix D corresponding to the p-distance dp on the given strings.
##        As always, note that your answer is allowed an absolute error of 0.001.

import BioLib

dna = BioLib.readFASTA(BioLib.getFile("pdst"))
dmatrix = list()
length = len(dna[0].dna)
for x in dna:
    dmatrix.append(
        map(lambda y: float(BioLib.hammingDistance(x, y)) / length, dna))
for x in dmatrix:
    for y in x:
        print y,
    print
示例#22
0
import BioLib
import collections

dna = BioLib.readFASTA(BioLib.getFile("kmer"))[0].dna
kmers1 = BioLib.getKmers("ACGT",4)
kmers = dict(zip(kmers1,range(len(kmers1))))
kmercount = dict()
i = 0
while i+4 <= len(dna):
    s = dna[i:i+4]   
    kmercount[kmers[s]] = kmercount.get(kmers[s],0)+1
    i+=1
    
for i in range(len(kmers)):
    print kmercount.get(i,0),

示例#23
0
import BioLib

file = BioLib.getFile("iev")
couples = map(lambda x: int(x), file.readline().split())
probs = [1,1,1,0.75, 0.5,0]
ex = 0
for i in range(len(couples)):
    ex+=couples[i]*probs[i]*2
print ex
示例#24
0
import BioLib

dna = map(lambda x: x.dna, BioLib.readFASTA(BioLib.getFile("lcsm")))
dna1 = dna.pop(0)
substrings = 
示例#25
0
import BioLib
import math
import itertools

file = BioLib.getFile("prob")
dna = file.readline()
cglist = map(lambda x: float(x), file.readline().split())
problist = list()
for x in cglist:
    CG = math.log10(x/2)
    AT = math.log10((1-x)/2)
    prob = 0
    for y in dna:
        if(y=="C" or y=="G"): prob+=CG
        elif(y=="A" or y=="T"): prob+=AT
    problist.append(prob)
s = reduce(lambda x,y: str(x)+" "+str(y), problist)
print s
示例#26
0
import BioLib

basepairs = {"A": "T", "C": "G", "G": "C", "T": "A"}
dna = BioLib.readFASTA(BioLib.getFile("revp"))[0].dna
results = list()
for i in range(len(dna)):
    it = 1
    while i - it >= 0 and i + it < len(dna) and dna[i + 1 -
                                                    it] == basepairs[dna[i +
                                                                         it]]:
        print dna[i + 1 - it], dna[i + it]
        print it
        it += 1
    if 4 <= it * 2 <= 12:
        results.append((i - it + 1, it * 2))
#for x in results:
#    print x[0], x[1], dna[x[0]:x[0]+x[1]]
示例#27
0
文件: KMP.py 项目: ackellyb/Bioinfo
import BioLib

s = BioLib.readFASTA(BioLib.getFile("kmp (1)")).pop().DNA
f = open("answer","w")
j = 0
i = 1
failure = [0]
string = "0"
while i < len(s):
    if s[i]==s[j]:
        j+=1
        failure.append(j)
        string+=" "+str(j)
        i+=1
    elif j > 0:
        j = failure[j-1]
    else:
        j = 0
        i+=1
        failure.append(j)
        string+=" 0"
f.write(string)
f.close()
print "end"
        
示例#28
0
import BioLib

file = BioLib.getFile("orf")
fasta = BioLib.readFASTA(file)[0]
dnastrings = (fasta.reverseComplement(), fasta.DNA)
codons = BioLib.codonTable(True)
proteins = list()
it = 0
for x in dnastrings:
    while ("ATG" in x):
        it = x.find("ATG", it)
        x = x[it:]
        protein = str()
        for i in range(0, len(x), 3):
            if i + 3 >= len(x):
                break
            elif codons[x[i:i + 3]] == "Stop":
                proteins.append(protein)
                break
            else:
                protein += codons[x[i:i + 3]]
        it = +3
proteins = set(proteins)
for x in proteins:
    print x