def cor(ent1, ent2): ls1 = [] ls2 = [] for elm in utils_distance.get_elements(ent1, ent2): if elm in ent2 and elm in ent1: ls1.append(ent1[elm]) ls2.append(ent2[elm]) #elif elm in ent1: # ls1.append(ent1[elm]) # ls2.append(float(0)) #else: # ls1.append(float(0)) # ls2.append(ent2[elm]) return utils_stats.pcc(ls1, ls2)
human_host_file = "working/Jun29/elmdict_H_sapiens.init" chicken_host_file = "working/Jun29/elmdict_Gallus_gallus.init" human_host_freqs = get_host_freqs(human_host_file) chicken_host_freqs = get_host_freqs(chicken_host_file) human_freqs = get_freqs(freq_file_human) chicken_freqs = get_freqs(freq_file_chicken) v1 = [] v2 = [] for key in human_freqs: if key in uniq and key in chicken_freqs: hf = human_freqs[key] cf = chicken_freqs[key] if abs(hf - cf) > 5: sp = key.split(":") elmseq = sp[1] + ":" + sp[2] hhf = 0 chf = 0 if elmseq in human_host_freqs: hhf = human_host_freqs[elmseq] if elmseq in chicken_host_freqs: chf = chicken_host_freqs[elmseq] if chf != 0 and hhf != 0: v1.append(hf / cf) v2.append(float(hhf) / float(chf)) print key print utils_stats.pcc(v1, v2) with open("working/vals", "w") as f: for a, b in zip(v1, v2): f.write(str(a) + "\t" + str(b) + "\n")