#inverse variance vars[elm][seq] = numpy.var(numpy.array(seq_vals[seq])) #tmp_input = 'tmp_input' + str(random.randint(0,100)) tmp_input = 'plots/for_aydin/cos_host_virus' + suffix + '.tab' with open(tmp_input, 'w') as f: f.write('Virus_Host\tELM\tDistance\n') for virus in viruses: virus2elmFreqs[virus] = utils.get_seq2count_dict(os.path.join(local_settings.RESULTSDIR, 'flu_elmdict_' + virus), float(0)) for elm in virus2conservedELMs[virus]: if 'FAIL' not in elm: for host in hosts: if elm in use_seqs: dis = utils.klDistance(virus2elmFreqs[virus][elm], host2elmFreqs[host][elm], use_seqs[elm]) else: dis = numpy.NaN f.write('%s\t%s\t%.10f\n' % (viruses[virus] + hosts[host], elm, dis)) out_file = 'plots/for_aydin/cos_dis_heatmap' + suffix + '.png' tmp_r = 'tmp_r' + str(random.randint(0,100)) with open(tmp_r, 'w') as f: f.write('library(ggplot2)\n') f.write("d<-read.delim('" + tmp_input + "', header=T, sep='\\t')\n") f.write("png('" + out_file + "')\n") f.write("ggplot(d,aes(Virus_Host,ELM)) + opts(axis.text.y = theme_blank()) + geom_tile(aes(fill=Distance),colour='white') + scale_fill_gradient(low='red',high='steelblue')\n") f.write('dev.off()\n') os.system('R < ' + tmp_r + ' --no-save')
import utils def get_counts(afile): d = {} with open(afile) as f: for line in f: elm, seq, count, freq = line.strip().split("\t") if elm == "MOD_CK1_1": d[seq] = float(freq) return d flu_counts = get_counts("results/flu_elmdict_human") host_counts = get_counts("results/elmdict_Sus_scrofa.redo") print utils.klDistance(flu_counts, host_counts)