def get_four_position_fig(four_pos_results, positions, figsize, group_label=None, group_ref=None, figwidth=None, xtick_fontsize=14, ytick_fontsize=14): position_sets = list(combinations(positions, 4)) assert len(position_sets) == 1 rel_entropies = [four_pos_results[position_sets[0]]['rel_entropy']] ylim = logo.est_ylim(rel_entropies) rel_entropy = rel_entropies[0] fig = pyplot.figure(figsize=figsize) ax = fig.gca() num_pos = len(positions) + 1 mid_pos = num_pos // 2 position_re = numpy.zeros((num_pos,), float) characters = numpy.zeros((num_pos, 256), str) rets = numpy.zeros((256, num_pos), float) indices = list(range(4)) # now adjust indices to reflect position along sequence for i in range(len(indices)): if indices[i] >= mid_pos: indices[i] += 1 position_re.put(indices, rel_entropy) stats = four_pos_results[position_sets[0]]['stats'] mut_stats = stats[ get_selected_indices(stats, group_label=group_label, group_ref=group_ref)][['base1', 'base2', 'base3', 'base4', 'ret']] mut_stats = mut_stats.sort_values(by='ret') characters[indices[0]] = list(mut_stats['base1']) characters[indices[1]] = list(mut_stats['base2']) characters[indices[2]] = list(mut_stats['base3']) characters[indices[3]] = list(mut_stats['base4']) for index in indices: rets[:, index] = mut_stats['ret'] heights = get_re_char_heights(rets, re_positionwise=position_re) logo.draw_multi_position(char_heights=heights.T, characters=characters, position_indices=indices, ax=ax, ylim=ylim, xtick_fontsize=xtick_fontsize, ytick_fontsize=ytick_fontsize) return fig
def draw_position_grid(directions, sample_size=False, width=8, height=8, title_space=1.1, axis_font_size=20, tick_font_size=10, ylim=None): """docstring for draw_position_grid""" f, axes = pyplot.subplots(4, 4, sharex=True, sharey=True, figsize=(width, height)) bases = list('CTAG') positions = None plottables = [] adaptive_y = 0 for direction in directions: data = directions[direction] if positions is None: positions = list(data.keys()) positions.sort() number = data[positions[0]]['stats']["count"].sum() // 2 heights, characters, indices = get_plot_data(data, positions) adaptive_y = max(adaptive_y, logo.est_ylim(heights)) plottables.append([direction, heights, characters, indices, number]) if ylim is None: ylim = adaptive_y for direction, heights, characters, indices, number in plottables: fr, to = list(map(bases.index, direction.split('to'))) ax = axes[fr, to] fig = logo.draw_multi_position(heights, characters=characters, position_indices=indices, ylim=ylim, ax=ax, figwidth=width, verbose=False) if sample_size: y = ax.get_ylim()[1] ax.text(0.2, y * 0.85, "N={:,}".format(number), fontsize=10) xformat = FuncFormatter(format_float(1e-3, float_places=2)) for i in range(4): top_ax = axes[0, i] top_ax.set_title( bases[i], fontsize=axis_font_size, weight="bold", y=1.1) lft_ax = axes[i, 0] for yticklabel in lft_ax.get_yticklabels(): yticklabel.set_fontsize(tick_font_size) yticklabel.set_rotation(0) lft_ax.yaxis.set_major_formatter(FuncFormatter(xformat)) lft_ax.set_ylabel(bases[i], rotation=0, fontsize=axis_font_size, weight="bold") lft_ax.yaxis.labelpad = axis_font_size btm_ax = axes[-1, i] for xticklabel in btm_ax.get_xticklabels(): xticklabel.set_fontsize(tick_font_size) xticklabel.set_rotation(0) f.tight_layout() return f
def get_single_position_fig(single_results, positions, figsize, group_label=None, group_ref=None, figwidth=None, xlabel_fontsize=14, ylabel_fontsize=14, xtick_fontsize=14, ytick_fontsize=14): num_pos = len(positions) + 1 mid = num_pos // 2 position_re = numpy.zeros((num_pos,), float) rets = numpy.zeros((4, num_pos), float) characters = [list('ACGT') for i in range(num_pos)] for index, pos in enumerate(positions): if index >= mid: index += 1 stats = single_results[pos]['stats'] position_re[index] = single_results[pos]['rel_entropy'] mut_stats = stats[get_selected_indices(stats, group_label=group_label, group_ref=group_ref)][['base', 'ret']] mut_stats = mut_stats.sort_values(by='ret') characters[index] = list(mut_stats['base']) rets[:, index] = mut_stats['ret'] heights = get_re_char_heights(rets, re_positionwise=position_re) fig = logo.draw_multi_position(heights.T, characters=characters, position_indices=list(range(num_pos)), figsize=figsize, figwidth=figwidth, verbose=False) if figwidth: fig.set_figwidth(figwidth) ax = fig.gca() ax.set_xlabel('Position', fontsize=xlabel_fontsize) ax.set_ylabel('RE', rotation='vertical', fontsize=ylabel_fontsize) ax.tick_params(axis='x', labelsize=xtick_fontsize, pad=xtick_fontsize // 2, length=0) ax.tick_params(axis='y', labelsize=ytick_fontsize, pad=ytick_fontsize // 2) return fig
def mi(json_path, plot_cfg, no_type3, figpath, format, sample_size, force_overwrite, dry_run): """draws conventional sequence logo, using MI, from first order effects""" # the following is for logging json_path = util.abspath(json_path) args = locals() if no_type3: util.exclude_type3_fonts() if not figpath: dirname = os.path.dirname(json_path) figpath = os.path.join(dirname, "MI.%s" % format) log_file_path = os.path.join(dirname, "MI.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) LOGGER.log_file_path = log_file_path if plot_cfg: LOGGER.input_file(plot_cfg) LOGGER.log_message(str(args), label='vars') data = util.load_loglin_stats(json_path) positions = list(data.keys()) positions.sort() num_pos = len(positions) + 1 mp = num_pos // 2 counts_array = numpy.zeros((4, num_pos), int) for i, pos in enumerate(positions): if i >= mp: i += 1 pos_stats = data[pos]['stats'] counts = pos_stats[pos_stats['mut'] == 'M'][["base", "count"]] counts = dict(zip(counts['base'], counts['count'])) for base in counts: base_index = DNA.alphabet.index(base) counts_array[base_index, i] = counts[base] freq_matrix = entropy.counts_to_freq_matrix(counts_array) mit = entropy.get_mit(freq_matrix, freq_matrix=True) mi = mit.sum(axis=0) char_hts = get_mi_char_heights(numpy.fabs(mit), mi) plot_cfg = util.get_plot_configs(cfg_path=plot_cfg) figsize = plot_cfg.get('1-way plot', 'figsize') ytick_font = plot_cfg.get('1-way plot', 'ytick_fontsize') xtick_font = plot_cfg.get('1-way plot', 'xtick_fontsize') ylabel_font = plot_cfg.get('1-way plot', 'ylabel_fontsize') xlabel_font = plot_cfg.get('1-way plot', 'xlabel_fontsize') fig = logo.draw_multi_position(char_hts.T, characters=[list(DNA)] * num_pos, position_indices=list(range(num_pos)), figsize=figsize, figwidth=figsize[0], xtick_fontsize=xtick_font, ytick_fontsize=ytick_font, sort_data=True) ax = fig.gca() ax.tick_params(axis='y', labelsize=ytick_font) ax.tick_params(axis='x', labelsize=xtick_font) ax.set_ylabel("MI", fontsize=ylabel_font) ax.set_xlabel("Position", fontsize=xlabel_font) fig.tight_layout() fig.savefig(figpath) LOGGER.output_file(figpath) click.secho("Wrote %s" % figpath, fg="green")
def grid(fig_config, figpath, format, no_type3): """draws an arbitrary shaped grid of mutation motifs based on fig_config""" # we read in the config file and determine number of rows and columns # paths, headings, etc .. # then create the figure and axes and call the mutation_motif drawing code args = locals() if no_type3: util.exclude_type3_fonts() if not figpath: dirname = os.path.dirname(fig_config.name) figpath = os.path.join(dirname, "drawn_array.%s" % format) log_file_path = os.path.join(dirname, "drawn_array.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) util.makedirs(os.path.dirname(figpath)) LOGGER.log_file_path = log_file_path LOGGER.log_message(str(args), label='vars') ncols, nrows, figsize, col_labels, row_labels, paths, axis_cfg = \ read_plot_array_config(fig_config) fig, axes = pyplot.subplots(nrows=nrows, ncols=ncols, figsize=figsize, sharex=True, sharey=True) figwidth = fig.get_figwidth() try: axes[0] except TypeError: axes = numpy.array([[axes]]) if len(axes.shape) == 1: # required for indexing of appropriate axis axes = numpy.vstack(axes) if nrows == 1: axes = axes.T adaptive_y = 0 plottable = {} for coord in paths: data = util.load_loglin_stats(paths[coord]) positions = list(data) positions.sort() heights, characters, indices = get_plot_data(data, positions) adaptive_y = max(adaptive_y, logo.est_ylim(heights)) plottable[coord] = dict(char_heights=heights, characters=characters, position_indices=indices, figwidth=figwidth, verbose=False) ylim = axis_cfg.get("ylim", adaptive_y) for coord in plottable: kwargs = plottable[coord] kwargs["ax"] = axes[coord] kwargs["ylim"] = ylim fig = logo.draw_multi_position(**kwargs) xformat = FuncFormatter(format_float(1e-3, float_places=2)) for col in range(ncols): top_ax = axes[0, col] top_ax.set_title(col_labels[col], fontsize=axis_cfg["xlabel_fontsize"], weight="bold", y=1.1) btm_ax = axes[-1, col] for xticklabel in btm_ax.get_xticklabels(): xticklabel.set_fontsize(axis_cfg["xtick_fontsize"]) xticklabel.set_rotation(0) btm_ax.set_xlabel("Position", fontsize=axis_cfg["xlabel_fontsize"], weight="bold") btm_ax.xaxis.labelpad = axis_cfg['xlabel_pad'] for row in range(nrows): lft_ax = axes[row, 0] for yticklabel in lft_ax.get_yticklabels(): yticklabel.set_fontsize(axis_cfg["ytick_fontsize"]) yticklabel.set_rotation(0) lft_ax.yaxis.set_major_formatter(FuncFormatter(xformat)) lft_ax.yaxis.labelpad = axis_cfg['ylabel_pad'] lft_ax.set_ylabel(row_labels[row], rotation=0, fontsize=axis_cfg['ylabel_fontsize'], weight="bold") fig.tight_layout() fig.savefig(figpath) click.secho("Wrote %s" % figpath, fg="green")
def get_three_position_fig(three_pos_results, positions, figsize, group_label=None, group_ref=None, figwidth=None, xtick_fontsize=14, ytick_fontsize=14): position_sets = list(combinations(positions, 3)) array_coords = get_resized_array_coordinates3(positions, position_sets) coords = list(array_coords.values()) xdim = max(v[0] for v in coords) + 1 ydim = max(v[1] for v in coords) + 1 fig, axarr = pyplot.subplots(xdim, ydim, figsize=figsize, sharex=True, sharey=True) for i in range(xdim): for j in range(ydim): if (i, j) in coords: continue ax = axarr[i, j] ax.set_frame_on(False) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) num_pos = len(positions) + 1 mid_pos = num_pos // 2 rel_entropies = [] for position_set in position_sets: rel_entropies.append(three_pos_results[position_set]['rel_entropy']) ylim = logo.est_ylim(rel_entropies) position_re = numpy.zeros((num_pos,), float) multi_positions = {} characters = numpy.zeros((num_pos, 64), str) for motif in combinations(positions, 3): rets = numpy.zeros((64, num_pos), float) indices = list(map(positions.index, motif)) row, col = array_coords[motif] ax = axarr[row, col] # now adjust indices to reflect position along sequence for i in range(len(indices)): if indices[i] >= mid_pos: indices[i] += 1 position_re.put(indices, three_pos_results[motif]['rel_entropy']) stats = three_pos_results[motif]['stats'] mut_stats = stats[ get_selected_indices(stats, group_label=group_label, group_ref=group_ref)][['base1', 'base2', 'base3', 'ret']] mut_stats = mut_stats.sort_values(by='ret') characters[indices[0]] = list(mut_stats['base1']) characters[indices[1]] = list(mut_stats['base2']) characters[indices[2]] = list(mut_stats['base3']) for index in indices: rets[:, index] = mut_stats['ret'] heights = get_re_char_heights(rets, re_positionwise=position_re) multi_positions[motif] = dict(rets=rets, indices=indices, characters=characters, heights=heights) logo.draw_multi_position(char_heights=heights.T, characters=characters, position_indices=indices, ax=ax, ylim=ylim, xtick_fontsize=xtick_fontsize, ytick_fontsize=ytick_fontsize) return fig