def test_get_mit_pc1(self): """returns the correct MI terms when the pseudocount=1""" mit = get_mit(self.ref_data, pseudocount=1, check_valid=True).round(decimals=5) expect_mit = array([[0.02377, 0.02377], [0, -0.02388], [0.09895, -0.01639], [0.09895, 0.22805]]) numpy.testing.assert_equal(mit, expect_mit)
def test_get_mit_pc0(self): """returns the correct MI terms when the pseudocount=0""" mit = get_mit(self.ref_data, pseudocount=0, check_valid=True).round(decimals=5) expect_mit = array([[0.03561, 0.03561], [0.05782, 0], [0.16781, -0.02109], [0.16781, 0.0]]) numpy.testing.assert_equal(mit, expect_mit)
def mi(json_path, plot_cfg, no_type3, figpath, format, sample_size, force_overwrite, dry_run): """draws conventional sequence logo, using MI, from first order effects""" # the following is for logging json_path = util.abspath(json_path) args = locals() if no_type3: util.exclude_type3_fonts() if not figpath: dirname = os.path.dirname(json_path) figpath = os.path.join(dirname, "MI.%s" % format) log_file_path = os.path.join(dirname, "MI.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) LOGGER.log_file_path = log_file_path if plot_cfg: LOGGER.input_file(plot_cfg) LOGGER.log_message(str(args), label='vars') data = util.load_loglin_stats(json_path) positions = list(data.keys()) positions.sort() num_pos = len(positions) + 1 mp = num_pos // 2 counts_array = numpy.zeros((4, num_pos), int) for i, pos in enumerate(positions): if i >= mp: i += 1 pos_stats = data[pos]['stats'] counts = pos_stats[pos_stats['mut'] == 'M'][["base", "count"]] counts = dict(zip(counts['base'], counts['count'])) for base in counts: base_index = DNA.alphabet.index(base) counts_array[base_index, i] = counts[base] freq_matrix = entropy.counts_to_freq_matrix(counts_array) mit = entropy.get_mit(freq_matrix, freq_matrix=True) mi = mit.sum(axis=0) char_hts = get_mi_char_heights(numpy.fabs(mit), mi) plot_cfg = util.get_plot_configs(cfg_path=plot_cfg) figsize = plot_cfg.get('1-way plot', 'figsize') ytick_font = plot_cfg.get('1-way plot', 'ytick_fontsize') xtick_font = plot_cfg.get('1-way plot', 'xtick_fontsize') ylabel_font = plot_cfg.get('1-way plot', 'ylabel_fontsize') xlabel_font = plot_cfg.get('1-way plot', 'xlabel_fontsize') fig = logo.draw_multi_position(char_hts.T, characters=[list(DNA)] * num_pos, position_indices=list(range(num_pos)), figsize=figsize, figwidth=figsize[0], xtick_fontsize=xtick_font, ytick_fontsize=ytick_font, sort_data=True) ax = fig.gca() ax.tick_params(axis='y', labelsize=ytick_font) ax.tick_params(axis='x', labelsize=xtick_font) ax.set_ylabel("MI", fontsize=ylabel_font) ax.set_xlabel("Position", fontsize=xlabel_font) fig.tight_layout() fig.savefig(figpath) LOGGER.output_file(figpath) click.secho("Wrote %s" % figpath, fg="green")