# mouse) # print utils_distance.distance_species(monkey, # mouse) species2dict = {} virus2dict = {} virus2dict['swineFlu'] = utils.get_seq2count_dict('results/flu_elmdict_swine', float(.4)) virus2dict['chickenFlu'] = utils.get_seq2count_dict('results/flu_elmdict_chicken', float(.4)) virus2dict['humanFlu'] = utils.get_seq2count_dict('results/flu_elmdict_human', float(.4)) for g in ('H_sapiens', 'Gallus_gallus', 'Sus_scrofa'): species2dict[g] = utils.get_seq2count_dict_for_seqs('results/elmdict_' + g + '.txt', float(0), virus2dict) for v in virus2dict: species2dict[v] = virus2dict[v] d = utils_distance.distance_matrix(species2dict) elm_d = utils_distance.elm_distance_matrix(species2dict) #for elm in elm_d: # for species_pair in elm_d[elm]: # print elm + '\t' + species_pair + '\t' + str(elm_d[elm][species_pair]) for s1, s2 in itertools.combinations(d.keys(), 2): print s1 + '\t' + s2 + '\t' + str(d[s1][s2]) utils_plot.distance_heatmap(elm_d, 'test.png')
for line in f: [protein, elm, cons] = line.strip().split('\t') d[protein][elm] = float(cons)/float(100) return d #genomes = ('H5N1', 'H9N2') #species = 'chicken' genomes = ('H1N1', 'H3N2') species = 'swine' conserved = {} all_conserved = {} for g in genomes: conserved[g] = get_conserved(species + '.' + g + '.elms.90') all_conserved[g] = get_all_conserved(species + '.' + g + '.elms.conservation') for protein in conserved['H1N1']: d = defaultdict(dict) elms = {} for g in genomes: for elm in conserved[g][protein]: elms[elm] = True for g in genomes: for elm in elms: if elm in conserved[g][protein]: d[elm][g] = float(1) elif elm in all_conserved[g][protein]: d[elm][g] = all_conserved[g][protein][elm] else: d[elm][g] = float(0) utils_plot.distance_heatmap(d, species + '.' + protein + '.png')
def get_all_conserved(afile): d = defaultdict(dict) with open(afile) as f: for line in f: [protein, elm, cons] = line.strip().split('\t') d[protein][elm] = float(cons)/float(100) return d genomes = ('human', 'swine', 'chicken', 'equine') conserved = {} all_conserved = {} for g in genomes: conserved[g] = get_conserved('results/' + g + '.elms.90') all_conserved[g] = get_all_conserved('results/' + g + '.elms.conservation') for protein in conserved['human']: d = defaultdict(dict) elms = {} for g in genomes: for elm in conserved[g][protein]: elms[elm] = True for g in genomes: for elm in elms: if elm in conserved[g][protein]: d[elm][g] = float(1) elif elm in all_conserved[g][protein]: d[elm][g] = all_conserved[g][protein][elm] else: d[elm][g] = float(0) utils_plot.distance_heatmap(d, protein + '.all.png')