def question1(): human = read_protein(HUMAN_EYELESS_URL) fly = read_protein(FRUITFLY_EYELESS_URL) print(len(human), len(fly)) scoring = read_scoring_matrix(PAM50_URL) local_align = compute_alignment_matrix(human, fly, scoring, False) score, xs, ys = compute_local_alignment(human, fly, scoring, local_align) print('Question 1') print(score) print(xs) print(ys) print() print('Question 2') consensus = read_protein(CONSENSUS_PAX_URL) human_nodash = ''.join([x for x in xs if x != '-']) fly_nodash = ''.join([x for x in ys if x != '-']) hc_global_align = compute_alignment_matrix(human_nodash, consensus, scoring, True) fc_global_align = compute_alignment_matrix(fly_nodash, consensus, scoring, True) hc_agree = agreement(human_nodash, consensus, scoring, hc_global_align) fc_agree = agreement(fly_nodash, consensus, scoring, fc_global_align) print('Human vs Consensus agree = %s%%' % hc_agree) print('Fly vs Consensus agree = %s%%' % fc_agree)
def generate_null_distribution(seq_x, seq_y, scoring_matrix, num_trials): distr = {} raw = [] try: with open('distr.json') as f: pair = loads(f.read()) return pair['distr'], pair['raw'] except Exception as e: print('cant open file', str(e)) for _ in range(num_trials): temp = list(seq_y) shuffle(temp) rand_y = ''.join(temp) align = compute_alignment_matrix(seq_x, rand_y, scoring_matrix, False) score, _, _ = compute_local_alignment(seq_x, rand_y, scoring_matrix, align) if score not in distr: distr[score] = 0 distr[score] += 1 raw.append(score) with open('distr.json', 'w') as f: f.write(dumps({'distr': distr, 'raw': raw})) return distr, raw
def compare(n, nh, nf, alpha, cons, scoring, align): ag1, ag2 = [], [] for i in range(n): xs, ys = rprot(nh, alpha), rprot(nf, alpha) _, xs, ys = compute_local_alignment(xs, ys, scoring, align) xs_nodash = ''.join([x for x in xs if x != '-']) ys_nodash = ''.join([x for x in ys if x != '-']) ag1.append(agreement(xs_nodash, cons, scoring, align)) ag2.append(agreement(ys_nodash, cons, scoring, align)) hc_agree = sum(ag1) / float(n) fc_agree = sum(ag2) / float(n) print('Random Human vs Consensus agree = %s%%' % hc_agree) print('Random Fly vs Consensus agree = %s%%' % fc_agree)