Пример #1
0
def main(args):
    file_species_pairs = []
    i = 1
    while i < len(args)-2:
        file_species_pairs.append([args[i], args[i+1]])
        i += 2

    cutoff = float(sys.argv[-2])
    plot_dir = sys.argv[-1]

    species2elms = {}
    virus2elms = {}
    # first grab virus ELMs
    for file, species in file_species_pairs:
        if file.find('flu') != -1:
            virus2elms[species] = utils.get_seq2count_dict(file, cutoff)
        else:
            species2elms[species] = True
    elms = {}
    for species in virus2elms:
        for elm in virus2elms[species]:
            elms[elm] = True
    for species in species2elms:
        species2elms[species] = utils.get_seq2count_dict_for_seqs(file, 
                                                                  cutoff,
                                                                  virus2elms)
    for virus in virus2elms:
        species2elms[virus] = virus2elms[virus]
    for elm in elms:
        if utils.check_ones(species2elms, elm):
            if utils_distance.distance_elms(species2elms['Sus_scrofa'][elm],
                                            species2elms['H_sapiens'][elm]) > float(-1) or utils_distance.distance_elms(species2elms['Sus_scrofa'][elm],
                                                                                                                        species2elms['Gallus_gallus'][elm]) > float(0):
                utils_plot.elm_host_barplot(species2elms, elm,
                                            os.path.join(plot_dir,
                                                         elm + '.virus_hosts.png'))
Пример #2
0
virus2dict['human'] = utils.get_seq2count_dict('results/flu_elmdict_human',
                                                  float(0.05))

genomes = ('H_sapiens', 'Gallus_gallus', 'Sus_scrofa')
species2dict = {}
for g in genomes:
    species2dict[g] = utils.get_seq2count_dict('results/elmdict_'
                                               + g + '.txt',
                                               float(0.01))

use_elms = {}
for elm in virus2dict['human']:
    if elm in virus2dict['chicken'] and elm in virus2dict['swine']:
        distance_is_0 = False
        for v1, v2 in itertools.combinations(virus2dict.keys(), 2):
            distance = utils_distance.distance_elms(virus2dict[v1][elm],
                                                    virus2dict[v2][elm])
            if distance == float(0):
                distance_is_0 = True
        if not distance_is_0:
            use_elms[elm] = True

host_elms = {}
for elm in species2dict['H_sapiens']:
    if elm in species2dict['Gallus_gallus'] and elm in species2dict['Sus_scrofa']:
        distance_is_0 = False
        for h1, h2 in itertools.combinations(genomes, 2):
            distance = utils_distance.distance_elms(species2dict[h1][elm],
                                                    species2dict[h2][elm])
            if distance == float(0):
                distance_is_0 = True
        if not distance_is_0: