示例#1
0
def lcsq():
    recs = rosalind_utils.read_fasta("rosalind_lcsq.txt")
    seqa, seqb = recs[0][1], recs[1][1]
    # return the set of all longest common subsesquences
    C = rosalind_utils.lcsq(seqa, seqb)
    print rosalind_utils.lcsq_len(C)
    print rosalind_utils.lcsq_backtrack(C, seqa, seqb, len(seqa), len(seqb))
示例#2
0
def kmer():
    seq = rosalind_utils.read_fasta("rosalind_kmer.txt")[0][1]
    rev_comp = rosalind_utils.reverse_complement(seq)
    for mer in itertools.product("ACGT", repeat=4):
        s = ''.join(mer)
        print overlapping_count(seq, s, 0),
    print ""
示例#3
0
def lcsq():
    recs = rosalind_utils.read_fasta("rosalind_lcsq.txt")
    seqa, seqb = recs[0][1], recs[1][1]
    # return the set of all longest common subsesquences
    C = rosalind_utils.lcsq(seqa, seqb)
    print rosalind_utils.lcsq_len(C)
    print rosalind_utils.lcsq_backtrack(C, seqa, seqb, len(seqa), len(seqb))
示例#4
0
def gc():
    records = rosalind_utils.read_fasta("rosalind_gc.txt")
    gc_contents = [(desc, rosalind_utils.gc_content(seq))
                   for desc, seq in records]
    max_gc_content = max(gc_contents, key=lambda x: x[1])
    print max_gc_content[0]
    print max_gc_content[1] * 100
示例#5
0
def kmer():
    seq = rosalind_utils.read_fasta("rosalind_kmer.txt")[0][1]
    rev_comp = rosalind_utils.reverse_complement(seq)
    for mer in itertools.product("ACGT", repeat=4):
        s = ''.join(mer)
        print overlapping_count(seq, s, 0),
    print ""
示例#6
0
def pmch():
    seq = rosalind_utils.read_fasta("rosalind_pmch.txt")[0][1]
    #seq = "AGCUAGUCAU"
    num_a = seq.count('A')
    num_g = seq.count('G')
    #print num_possible_matchings(num_a)
    #print num_possible_matchings(num_g)
    return num_possible_matchings(num_a) * num_possible_matchings(num_g)
示例#7
0
def grph():
    k = 3
    recs = rosalind_utils.read_fasta("rosalind_grph.txt")
    for ena in recs:
        for enb in recs:
            if ena == enb: continue
            if ena[1][-k:] == enb[1][:k]:
                print ena[0], enb[0]
示例#8
0
def pmch():
    seq = rosalind_utils.read_fasta("rosalind_pmch.txt")[0][1]
    #seq = "AGCUAGUCAU"
    num_a = seq.count('A')
    num_g = seq.count('G')
    #print num_possible_matchings(num_a)
    #print num_possible_matchings(num_g)
    return num_possible_matchings(num_a) * num_possible_matchings(num_g)
示例#9
0
def grph():
    k = 3
    recs = rosalind_utils.read_fasta("rosalind_grph.txt")
    for ena in recs:
        for enb in recs:
            if ena == enb: continue
            if ena[1][-k:] == enb[1][:k]:
                print ena[0], enb[0]
示例#10
0
def revp():
    # get the sequence of the only entry
    seq = rosalind_utils.read_fasta("rosalind_revp.txt")[0][1]
    for l in xrange(4, 13):
        # find all reverse palindromes of length l
        for i in range(len(seq) - l + 1):
            # if reverse palindrome, report the position and length
            if seq[i:i + l] == rosalind_utils.reverse_complement(seq[i:i + l]):
                print i + 1, l
示例#11
0
def sseq():
    recs = rosalind_utils.read_fasta("rosalind_sseq.txt")
    s = recs[0][1]
    t = recs[1][1]
    last_index = 0
    # assuming t is a substring of s (not necessarily contiguously)
    for tlet in t:
        idx = s[last_index:].find(tlet)
        print last_index + idx + 1,
        last_index += idx + 1
    print ""
示例#12
0
def splc():
    recs = rosalind_utils.read_fasta("rosalind_splc.txt")
    seqs = [rec[1] for rec in recs]

    exon = seqs[0]
    introns = sorted(seqs[1:], key=lambda (s): len(s), reverse=True)
    #print introns
    for intron in introns:
        exon = exon.replace(intron, "", 1)
    prot = rosalind_utils.translate(rosalind_utils.transcribe(exon))
    return prot[:-1]
示例#13
0
def splc():
    recs = rosalind_utils.read_fasta("rosalind_splc.txt")
    seqs = [rec[1] for rec in recs]

    exon = seqs[0]
    introns = sorted(seqs[1:], key=lambda(s): len(s), reverse=True)
    #print introns
    for intron in introns:
        exon = exon.replace(intron, "", 1)
    prot =  rosalind_utils.translate(rosalind_utils.transcribe(exon))
    return prot[:-1]
示例#14
0
def long():
    # recs contain the list of tuples (desc, sequence)
    recs = rosalind_utils.read_fasta("rosalind_long.txt")
    next = {}
    for reca in recs:
        for recb in recs:
            if reca == recb: continue
            min_overlap_req = min(len(seq(reca)), len(seq(recb)))
            if overlap(seq(reca), seq(recb)) > min_overlap_req / 2:
                next[reca] = recb
    # find the starting string (the one that has not in next.values())
    sub = head([rec for rec in recs if rec not in next.values()])
    merged = seq(sub)
    while sub in next:
        overlap_len = overlap(seq(sub), seq(next[sub]))
        merged += seq(next[sub])[overlap_len:]
        sub = next[sub]
    print merged
示例#15
0
def cons():
    # read sequences
    recs = rosalind_utils.read_fasta("rosalind_cons.txt")
    seqs = [rec[1] for rec in recs]
    matrix = []
    for i in xrange(len(seqs[0])):
        d = {'A': 0, 'C': 0, 'G': 0, 'T': 0}
        for seq in seqs:
            d[seq[i]] += 1
        matrix.append(d)

    # print consensus
    consensus = ''.join(
        max(col.iteritems(), key=operator.itemgetter(1))[0] for col in matrix)
    print consensus
    # print matrix
    print 'A:', ' '.join(str(col['A']) for col in matrix)
    print 'C:', ' '.join(str(col['C']) for col in matrix)
    print 'G:', ' '.join(str(col['G']) for col in matrix)
    print 'T:', ' '.join(str(col['T']) for col in matrix)
示例#16
0
def cons():
    # read sequences
    recs = rosalind_utils.read_fasta("rosalind_cons.txt")
    seqs = [rec[1] for rec in recs]
    matrix = []
    for i in xrange(len(seqs[0])):
        d = {'A':0, 'C':0, 'G':0, 'T':0}
        for seq in seqs:
            d[seq[i]] += 1
        matrix.append(d)

    # print consensus
    consensus = ''.join(max(col.iteritems(), key=operator.itemgetter(1))[0]
                        for col in matrix)
    print consensus
    # print matrix
    print 'A:', ' '.join(str(col['A']) for col in matrix)
    print 'C:', ' '.join(str(col['C']) for col in matrix)
    print 'G:', ' '.join(str(col['G']) for col in matrix)
    print 'T:', ' '.join(str(col['T']) for col in matrix)
示例#17
0
def tran():
    recs = rosalind_utils.read_fasta("rosalind_tran.txt")
    seqs = [rec[1] for rec in recs]

    purines = "AG"
    pyrimidines = "CT"
    
    transition = 0
    transversion = 0
    for a,b in zip(seqs[0], seqs[1]):
        if a==b:
            continue
        
        elif ((a in purines and b in purines) or
            (a in pyrimidines and b in pyrimidines)):
            transition += 1
        else:
            transversion += 1

    return float(transition) / transversion
示例#18
0
def tran():
    recs = rosalind_utils.read_fasta("rosalind_tran.txt")
    seqs = [rec[1] for rec in recs]

    purines = "AG"
    pyrimidines = "CT"

    transition = 0
    transversion = 0
    for a, b in zip(seqs[0], seqs[1]):
        if a == b:
            continue

        elif ((a in purines and b in purines)
              or (a in pyrimidines and b in pyrimidines)):
            transition += 1
        else:
            transversion += 1

    return float(transition) / transversion
示例#19
0
def gc():
    records = rosalind_utils.read_fasta("rosalind_gc.txt")
    gc_contents = [(desc, rosalind_utils.gc_content(seq)) for desc,seq in records]
    max_gc_content = max(gc_contents, key=lambda x: x[1])
    print max_gc_content[0]
    print max_gc_content[1]*100
示例#20
0
def pdst():
    recs = rosalind_utils.read_fasta("rosalind_pdst.txt")
    for reca in recs:
        for recb in recs:
            print p_distance(reca[1], recb[1]),
        print ""
示例#21
0
def pdst():
    recs = rosalind_utils.read_fasta("rosalind_pdst.txt")
    for reca in recs:
        for recb in recs:
            print p_distance(reca[1], recb[1]),
        print ""