def main(args): file_species_pairs = [] i = 1 while i < len(args)-2: file_species_pairs.append([args[i], args[i+1]]) i += 2 cutoff = float(sys.argv[-2]) plot_dir = sys.argv[-1] species2elms = {} virus2elms = {} # first grab virus ELMs for file, species in file_species_pairs: if file.find('flu') != -1: virus2elms[species] = utils.get_seq2count_dict(file, cutoff) else: species2elms[species] = True elms = {} for species in virus2elms: for elm in virus2elms[species]: elms[elm] = True for species in species2elms: species2elms[species] = utils.get_seq2count_dict_for_seqs(file, cutoff, virus2elms) for virus in virus2elms: species2elms[virus] = virus2elms[virus] for elm in elms: if utils.check_ones(species2elms, elm): if utils_distance.distance_elms(species2elms['Sus_scrofa'][elm], species2elms['H_sapiens'][elm]) > float(-1) or utils_distance.distance_elms(species2elms['Sus_scrofa'][elm], species2elms['Gallus_gallus'][elm]) > float(0): utils_plot.elm_host_barplot(species2elms, elm, os.path.join(plot_dir, elm + '.virus_hosts.png'))
virus2dict['human'] = utils.get_seq2count_dict('results/flu_elmdict_human', float(0.05)) genomes = ('H_sapiens', 'Gallus_gallus', 'Sus_scrofa') species2dict = {} for g in genomes: species2dict[g] = utils.get_seq2count_dict('results/elmdict_' + g + '.txt', float(0.01)) use_elms = {} for elm in virus2dict['human']: if elm in virus2dict['chicken'] and elm in virus2dict['swine']: distance_is_0 = False for v1, v2 in itertools.combinations(virus2dict.keys(), 2): distance = utils_distance.distance_elms(virus2dict[v1][elm], virus2dict[v2][elm]) if distance == float(0): distance_is_0 = True if not distance_is_0: use_elms[elm] = True host_elms = {} for elm in species2dict['H_sapiens']: if elm in species2dict['Gallus_gallus'] and elm in species2dict['Sus_scrofa']: distance_is_0 = False for h1, h2 in itertools.combinations(genomes, 2): distance = utils_distance.distance_elms(species2dict[h1][elm], species2dict[h2][elm]) if distance == float(0): distance_is_0 = True if not distance_is_0: