Exemplo n.º 1
0
        #inverse variance
        vars[elm][seq] = numpy.var(numpy.array(seq_vals[seq]))

#tmp_input = 'tmp_input' + str(random.randint(0,100))
tmp_input = 'plots/for_aydin/cos_host_virus' + suffix + '.tab'
with open(tmp_input, 'w') as f:
    f.write('Virus_Host\tELM\tDistance\n')
    for virus in viruses:
        virus2elmFreqs[virus] = utils.get_seq2count_dict(os.path.join(local_settings.RESULTSDIR,
                                                                      'flu_elmdict_' + virus), float(0))
        for elm in virus2conservedELMs[virus]:
            if 'FAIL' not in elm:
                for host in hosts:
                    if elm in use_seqs:
                        dis = utils.klDistance(virus2elmFreqs[virus][elm], 
                                               host2elmFreqs[host][elm],
                                               use_seqs[elm])
                    else:
                        dis = numpy.NaN
                    f.write('%s\t%s\t%.10f\n'
                            % (viruses[virus] + hosts[host], elm, dis))
out_file = 'plots/for_aydin/cos_dis_heatmap' + suffix + '.png'
tmp_r = 'tmp_r' + str(random.randint(0,100))
with open(tmp_r, 'w') as f:
    f.write('library(ggplot2)\n')
    f.write("d<-read.delim('"
            + tmp_input + "', header=T, sep='\\t')\n")
    f.write("png('" + out_file + "')\n")
    f.write("ggplot(d,aes(Virus_Host,ELM)) + opts(axis.text.y = theme_blank()) + geom_tile(aes(fill=Distance),colour='white') + scale_fill_gradient(low='red',high='steelblue')\n")
    f.write('dev.off()\n')
os.system('R < ' + tmp_r + ' --no-save')
Exemplo n.º 2
0
import utils


def get_counts(afile):
    d = {}
    with open(afile) as f:
        for line in f:
            elm, seq, count, freq = line.strip().split("\t")
            if elm == "MOD_CK1_1":
                d[seq] = float(freq)
    return d


flu_counts = get_counts("results/flu_elmdict_human")
host_counts = get_counts("results/elmdict_Sus_scrofa.redo")
print utils.klDistance(flu_counts, host_counts)