def nbr_matrix(paths_cfg, plot_cfg, figpath, format, no_type3, sample_size, force_overwrite, dry_run): '''draws square matrix of sequence logo's from neighbour analysis''' if no_type3: util.exclude_type3_fonts() args = locals() LOGGER.log_message(str(args), label='vars') config_path = util.abspath(paths_cfg) indir = os.path.dirname(config_path) parser = SafeConfigParser() parser.optionxform = str # stops automatic conversion to lower case parser.read(config_path) json_paths = {} for direction, path in parser.items("json_paths"): # assumes paths are relative to indir path = os.path.join(indir, path) if not os.path.exists(path): print("Couldn't find %s" % path) print("json file paths should be relative to paths_cfg") sys.exit(1) json_paths[direction] = path if not figpath: figpath = os.path.join(indir, "nbr_matrix.%s" % format) log_file_path = os.path.join(indir, "nbr_matrix.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) LOGGER.log_file_path = log_file_path plot_data = {} for direction, path in list(json_paths.items()): LOGGER.input_file(path) data = util.load_loglin_stats(path) plot_data[direction] = data fig = draw_position_grid(plot_data, sample_size) fig.text(0.4, 0.955, "Ending Base", fontsize=20) fig.text(0.03, 0.55, "Starting Base", rotation=90, fontsize=20) fig.tight_layout(rect=(0.06, 0, 0.95, 0.95)) fig.savefig(figpath) LOGGER.output_file(figpath) click.secho("Wrote %s" % figpath, fg="green")
def load_spectra_data(json_path, group_col): # for each starting base, we need the total relative entropy # we need the ret's for each ending base LOGGER.input_file(json_path) data = util.load_loglin_stats(json_path) bases = list(data) bases.sort() assert set(data.keys()) <= set('CTAG') if group_col: assert group_col in "strand group", \ "group_col must be 'group' or 'strand', got %s" % group_col if 'group' in data[bases[0]]['stats'].columns: group_col = 'group' else: group_col = 'strand' selected_group = {'strand': '+', 'group': '1'}.get(group_col, None) assert selected_group is not None, selected_group result = {} for base in bases: total_re = data[base]['rel_entropy'] subset = data[base]['stats'][data[base]['stats'][ group_col].apply(str) == selected_group].copy() if subset.empty: print("No entries equal to '%s'" % str(selected_group)) exit(-1) total_ret = numpy.fabs(subset["ret"]).sum() subset['prop'] = total_re * subset['ret'] / total_ret subset['end'] = [d[-1:] for d in subset['direction']] result[base] = dict((b, v) for i, b, v in subset[['end', 'prop']].to_records()) return result
def mi(json_path, plot_cfg, no_type3, figpath, format, sample_size, force_overwrite, dry_run): """draws conventional sequence logo, using MI, from first order effects""" # the following is for logging json_path = util.abspath(json_path) args = locals() if no_type3: util.exclude_type3_fonts() if not figpath: dirname = os.path.dirname(json_path) figpath = os.path.join(dirname, "MI.%s" % format) log_file_path = os.path.join(dirname, "MI.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) LOGGER.log_file_path = log_file_path if plot_cfg: LOGGER.input_file(plot_cfg) LOGGER.log_message(str(args), label='vars') data = util.load_loglin_stats(json_path) positions = list(data.keys()) positions.sort() num_pos = len(positions) + 1 mp = num_pos // 2 counts_array = numpy.zeros((4, num_pos), int) for i, pos in enumerate(positions): if i >= mp: i += 1 pos_stats = data[pos]['stats'] counts = pos_stats[pos_stats['mut'] == 'M'][["base", "count"]] counts = dict(zip(counts['base'], counts['count'])) for base in counts: base_index = DNA.alphabet.index(base) counts_array[base_index, i] = counts[base] freq_matrix = entropy.counts_to_freq_matrix(counts_array) mit = entropy.get_mit(freq_matrix, freq_matrix=True) mi = mit.sum(axis=0) char_hts = get_mi_char_heights(numpy.fabs(mit), mi) plot_cfg = util.get_plot_configs(cfg_path=plot_cfg) figsize = plot_cfg.get('1-way plot', 'figsize') ytick_font = plot_cfg.get('1-way plot', 'ytick_fontsize') xtick_font = plot_cfg.get('1-way plot', 'xtick_fontsize') ylabel_font = plot_cfg.get('1-way plot', 'ylabel_fontsize') xlabel_font = plot_cfg.get('1-way plot', 'xlabel_fontsize') fig = logo.draw_multi_position(char_hts.T, characters=[list(DNA)] * num_pos, position_indices=list(range(num_pos)), figsize=figsize, figwidth=figsize[0], xtick_fontsize=xtick_font, ytick_fontsize=ytick_font, sort_data=True) ax = fig.gca() ax.tick_params(axis='y', labelsize=ytick_font) ax.tick_params(axis='x', labelsize=xtick_font) ax.set_ylabel("MI", fontsize=ylabel_font) ax.set_xlabel("Position", fontsize=xlabel_font) fig.tight_layout() fig.savefig(figpath) LOGGER.output_file(figpath) click.secho("Wrote %s" % figpath, fg="green")
def grid(fig_config, figpath, format, no_type3): """draws an arbitrary shaped grid of mutation motifs based on fig_config""" # we read in the config file and determine number of rows and columns # paths, headings, etc .. # then create the figure and axes and call the mutation_motif drawing code args = locals() if no_type3: util.exclude_type3_fonts() if not figpath: dirname = os.path.dirname(fig_config.name) figpath = os.path.join(dirname, "drawn_array.%s" % format) log_file_path = os.path.join(dirname, "drawn_array.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) util.makedirs(os.path.dirname(figpath)) LOGGER.log_file_path = log_file_path LOGGER.log_message(str(args), label='vars') ncols, nrows, figsize, col_labels, row_labels, paths, axis_cfg = \ read_plot_array_config(fig_config) print("ncols:", ncols) print("nrows:", nrows) print("figsize:", figsize) print("col_labels:", col_labels) print("row_labels:", row_labels) print("paths:", paths) print("axis_cfg:", axis_cfg) #TODO: Convert below into Cogent3 Plotly #-Plotly layout = UnionDict(shapes=[]) adaptive_y = 0 plottable = {} for coord in paths: data = util.load_loglin_stats(paths[coord]) positions = list(data) positions.sort() heights, characters, indices = get_plot_data(data, positions) adaptive_y = max(adaptive_y, logo.est_ylim(heights)) plottable[coord] = dict(char_heights=heights, characters=characters, position_indices=indices) ylim = axis_cfg.get("ylim", adaptive_y) for coord in plottable: kwargs = plottable[coord] kwargs["ax"] = coord kwargs["ylim"] = ylim r = logo.draw_multi_position_cogent3(**kwargs) for key in r: if key == "shapes": layout.shapes.extend(r.shapes) else: layout[key] = r[key] for i in range(0, ncols): xaxis = "xaxis" + str(i + 1 if i != 0 else "") layout[xaxis]["domain"] = [ 0.0 + (i * (1 / ncols)), (i * (1 / ncols)) + (1 / ncols) ] print(layout) MARGININCHES = 0 PPI = 100 fig = Drawable(layout=layout, width=(figsize[0] - MARGININCHES) * PPI, height=(figsize[1] - MARGININCHES) * PPI) #export fig.write(path=figpath) click.secho("Wrote Cogent3 %s" % figpath, fg="green") """
def grid(fig_config, figpath, format, no_type3): """draws an arbitrary shaped grid of mutation motifs based on fig_config""" # we read in the config file and determine number of rows and columns # paths, headings, etc .. # then create the figure and axes and call the mutation_motif drawing code args = locals() if no_type3: util.exclude_type3_fonts() if not figpath: dirname = os.path.dirname(fig_config.name) figpath = os.path.join(dirname, "drawn_array.%s" % format) log_file_path = os.path.join(dirname, "drawn_array.log") else: figpath = util.abspath(figpath) log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1]) util.makedirs(os.path.dirname(figpath)) LOGGER.log_file_path = log_file_path LOGGER.log_message(str(args), label='vars') ncols, nrows, figsize, col_labels, row_labels, paths, axis_cfg = \ read_plot_array_config(fig_config) fig, axes = pyplot.subplots(nrows=nrows, ncols=ncols, figsize=figsize, sharex=True, sharey=True) figwidth = fig.get_figwidth() try: axes[0] except TypeError: axes = numpy.array([[axes]]) if len(axes.shape) == 1: # required for indexing of appropriate axis axes = numpy.vstack(axes) if nrows == 1: axes = axes.T adaptive_y = 0 plottable = {} for coord in paths: data = util.load_loglin_stats(paths[coord]) positions = list(data) positions.sort() heights, characters, indices = get_plot_data(data, positions) adaptive_y = max(adaptive_y, logo.est_ylim(heights)) plottable[coord] = dict(char_heights=heights, characters=characters, position_indices=indices, figwidth=figwidth, verbose=False) ylim = axis_cfg.get("ylim", adaptive_y) for coord in plottable: kwargs = plottable[coord] kwargs["ax"] = axes[coord] kwargs["ylim"] = ylim fig = logo.draw_multi_position(**kwargs) xformat = FuncFormatter(format_float(1e-3, float_places=2)) for col in range(ncols): top_ax = axes[0, col] top_ax.set_title(col_labels[col], fontsize=axis_cfg["xlabel_fontsize"], weight="bold", y=1.1) btm_ax = axes[-1, col] for xticklabel in btm_ax.get_xticklabels(): xticklabel.set_fontsize(axis_cfg["xtick_fontsize"]) xticklabel.set_rotation(0) btm_ax.set_xlabel("Position", fontsize=axis_cfg["xlabel_fontsize"], weight="bold") btm_ax.xaxis.labelpad = axis_cfg['xlabel_pad'] for row in range(nrows): lft_ax = axes[row, 0] for yticklabel in lft_ax.get_yticklabels(): yticklabel.set_fontsize(axis_cfg["ytick_fontsize"]) yticklabel.set_rotation(0) lft_ax.yaxis.set_major_formatter(FuncFormatter(xformat)) lft_ax.yaxis.labelpad = axis_cfg['ylabel_pad'] lft_ax.set_ylabel(row_labels[row], rotation=0, fontsize=axis_cfg['ylabel_fontsize'], weight="bold") fig.tight_layout() fig.savefig(figpath) click.secho("Wrote %s" % figpath, fg="green")