def get_four_position_fig(four_pos_results, positions, figsize,
                          group_label=None, group_ref=None, figwidth=None,
                          xtick_fontsize=14, ytick_fontsize=14):
    position_sets = list(combinations(positions, 4))
    assert len(position_sets) == 1
    rel_entropies = [four_pos_results[position_sets[0]]['rel_entropy']]
    ylim = logo.est_ylim(rel_entropies)

    rel_entropy = rel_entropies[0]

    fig = pyplot.figure(figsize=figsize)
    ax = fig.gca()

    num_pos = len(positions) + 1

    mid_pos = num_pos // 2

    position_re = numpy.zeros((num_pos,), float)
    characters = numpy.zeros((num_pos, 256), str)

    rets = numpy.zeros((256, num_pos), float)
    indices = list(range(4))

    # now adjust indices to reflect position along sequence
    for i in range(len(indices)):
        if indices[i] >= mid_pos:
            indices[i] += 1

    position_re.put(indices, rel_entropy)
    stats = four_pos_results[position_sets[0]]['stats']
    mut_stats = stats[
        get_selected_indices(stats, group_label=group_label,
                             group_ref=group_ref)][['base1', 'base2', 'base3',
                                                    'base4', 'ret']]
    mut_stats = mut_stats.sort_values(by='ret')

    characters[indices[0]] = list(mut_stats['base1'])
    characters[indices[1]] = list(mut_stats['base2'])
    characters[indices[2]] = list(mut_stats['base3'])
    characters[indices[3]] = list(mut_stats['base4'])

    for index in indices:
        rets[:, index] = mut_stats['ret']

    heights = get_re_char_heights(rets, re_positionwise=position_re)
    logo.draw_multi_position(char_heights=heights.T, characters=characters,
                             position_indices=indices, ax=ax, ylim=ylim,
                             xtick_fontsize=xtick_fontsize,
                             ytick_fontsize=ytick_fontsize)

    return fig
示例#2
0
def draw_position_grid(directions, sample_size=False, width=8, height=8,
                       title_space=1.1, axis_font_size=20, tick_font_size=10,
                       ylim=None):
    """docstring for draw_position_grid"""
    f, axes = pyplot.subplots(4, 4, sharex=True, sharey=True,
                              figsize=(width, height))

    bases = list('CTAG')
    positions = None
    plottables = []
    adaptive_y = 0
    for direction in directions:
        data = directions[direction]
        if positions is None:
            positions = list(data.keys())
            positions.sort()
        number = data[positions[0]]['stats']["count"].sum() // 2
        heights, characters, indices = get_plot_data(data, positions)
        adaptive_y = max(adaptive_y, logo.est_ylim(heights))
        plottables.append([direction, heights, characters, indices, number])

    if ylim is None:
        ylim = adaptive_y

    for direction, heights, characters, indices, number in plottables:
        fr, to = list(map(bases.index, direction.split('to')))
        ax = axes[fr, to]
        fig = logo.draw_multi_position(heights, characters=characters,
                                       position_indices=indices, ylim=ylim,
                                       ax=ax, figwidth=width, verbose=False)
        if sample_size:
            y = ax.get_ylim()[1]
            ax.text(0.2, y * 0.85, "N={:,}".format(number), fontsize=10)

    xformat = FuncFormatter(format_float(1e-3, float_places=2))

    for i in range(4):
        top_ax = axes[0, i]
        top_ax.set_title(
            bases[i], fontsize=axis_font_size, weight="bold", y=1.1)

        lft_ax = axes[i, 0]
        for yticklabel in lft_ax.get_yticklabels():
            yticklabel.set_fontsize(tick_font_size)
            yticklabel.set_rotation(0)
        lft_ax.yaxis.set_major_formatter(FuncFormatter(xformat))
        lft_ax.set_ylabel(bases[i], rotation=0,
                          fontsize=axis_font_size, weight="bold")
        lft_ax.yaxis.labelpad = axis_font_size

        btm_ax = axes[-1, i]
        for xticklabel in btm_ax.get_xticklabels():
            xticklabel.set_fontsize(tick_font_size)
            xticklabel.set_rotation(0)

    f.tight_layout()

    return f
def get_single_position_fig(single_results, positions, figsize,
                            group_label=None, group_ref=None, figwidth=None,
                            xlabel_fontsize=14, ylabel_fontsize=14,
                            xtick_fontsize=14, ytick_fontsize=14):
    num_pos = len(positions) + 1
    mid = num_pos // 2

    position_re = numpy.zeros((num_pos,), float)
    rets = numpy.zeros((4, num_pos), float)
    characters = [list('ACGT') for i in range(num_pos)]
    for index, pos in enumerate(positions):
        if index >= mid:
            index += 1

        stats = single_results[pos]['stats']
        position_re[index] = single_results[pos]['rel_entropy']
        mut_stats = stats[get_selected_indices(stats, group_label=group_label,
                                               group_ref=group_ref)][['base',
                                                                      'ret']]
        mut_stats = mut_stats.sort_values(by='ret')
        characters[index] = list(mut_stats['base'])
        rets[:, index] = mut_stats['ret']

    heights = get_re_char_heights(rets, re_positionwise=position_re)
    fig = logo.draw_multi_position(heights.T, characters=characters,
                                   position_indices=list(range(num_pos)),
                                   figsize=figsize, figwidth=figwidth,
                                   verbose=False)

    if figwidth:
        fig.set_figwidth(figwidth)

    ax = fig.gca()
    ax.set_xlabel('Position', fontsize=xlabel_fontsize)
    ax.set_ylabel('RE', rotation='vertical', fontsize=ylabel_fontsize)
    ax.tick_params(axis='x', labelsize=xtick_fontsize, pad=xtick_fontsize // 2,
                   length=0)
    ax.tick_params(axis='y', labelsize=ytick_fontsize, pad=ytick_fontsize // 2)
    return fig
示例#4
0
def mi(json_path, plot_cfg, no_type3, figpath, format, sample_size,
       force_overwrite, dry_run):
    """draws conventional sequence logo, using MI, from first order effects"""
    # the following is for logging
    json_path = util.abspath(json_path)
    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(json_path)
        figpath = os.path.join(dirname, "MI.%s" % format)
        log_file_path = os.path.join(dirname, "MI.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    LOGGER.log_file_path = log_file_path

    if plot_cfg:
        LOGGER.input_file(plot_cfg)

    LOGGER.log_message(str(args), label='vars')

    data = util.load_loglin_stats(json_path)
    positions = list(data.keys())
    positions.sort()
    num_pos = len(positions) + 1
    mp = num_pos // 2
    counts_array = numpy.zeros((4, num_pos), int)
    for i, pos in enumerate(positions):
        if i >= mp:
            i += 1
        pos_stats = data[pos]['stats']
        counts = pos_stats[pos_stats['mut'] == 'M'][["base", "count"]]
        counts = dict(zip(counts['base'], counts['count']))
        for base in counts:
            base_index = DNA.alphabet.index(base)
            counts_array[base_index, i] = counts[base]

    freq_matrix = entropy.counts_to_freq_matrix(counts_array)
    mit = entropy.get_mit(freq_matrix, freq_matrix=True)
    mi = mit.sum(axis=0)
    char_hts = get_mi_char_heights(numpy.fabs(mit), mi)

    plot_cfg = util.get_plot_configs(cfg_path=plot_cfg)
    figsize = plot_cfg.get('1-way plot', 'figsize')
    ytick_font = plot_cfg.get('1-way plot', 'ytick_fontsize')
    xtick_font = plot_cfg.get('1-way plot', 'xtick_fontsize')
    ylabel_font = plot_cfg.get('1-way plot', 'ylabel_fontsize')
    xlabel_font = plot_cfg.get('1-way plot', 'xlabel_fontsize')
    fig = logo.draw_multi_position(char_hts.T,
                                   characters=[list(DNA)] * num_pos,
                                   position_indices=list(range(num_pos)),
                                   figsize=figsize,
                                   figwidth=figsize[0],
                                   xtick_fontsize=xtick_font,
                                   ytick_fontsize=ytick_font,
                                   sort_data=True)

    ax = fig.gca()
    ax.tick_params(axis='y', labelsize=ytick_font)
    ax.tick_params(axis='x', labelsize=xtick_font)
    ax.set_ylabel("MI", fontsize=ylabel_font)
    ax.set_xlabel("Position", fontsize=xlabel_font)
    fig.tight_layout()
    fig.savefig(figpath)
    LOGGER.output_file(figpath)
    click.secho("Wrote %s" % figpath, fg="green")
示例#5
0
def grid(fig_config, figpath, format, no_type3):
    """draws an arbitrary shaped grid of mutation motifs based on fig_config"""
    # we read in the config file and determine number of rows and columns
    # paths, headings, etc ..
    # then create the figure and axes and call the mutation_motif drawing code

    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(fig_config.name)
        figpath = os.path.join(dirname, "drawn_array.%s" % format)
        log_file_path = os.path.join(dirname, "drawn_array.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    util.makedirs(os.path.dirname(figpath))
    LOGGER.log_file_path = log_file_path
    LOGGER.log_message(str(args), label='vars')

    ncols, nrows, figsize, col_labels, row_labels, paths, axis_cfg = \
        read_plot_array_config(fig_config)

    fig, axes = pyplot.subplots(nrows=nrows, ncols=ncols, figsize=figsize,
                                sharex=True, sharey=True)
    figwidth = fig.get_figwidth()
    try:
        axes[0]
    except TypeError:
        axes = numpy.array([[axes]])

    if len(axes.shape) == 1:
        # required for indexing of appropriate axis
        axes = numpy.vstack(axes)
        if nrows == 1:
            axes = axes.T

    adaptive_y = 0
    plottable = {}
    for coord in paths:
        data = util.load_loglin_stats(paths[coord])
        positions = list(data)
        positions.sort()
        heights, characters, indices = get_plot_data(data, positions)
        adaptive_y = max(adaptive_y, logo.est_ylim(heights))
        plottable[coord] = dict(char_heights=heights,
                                characters=characters,
                                position_indices=indices,
                                figwidth=figwidth,
                                verbose=False)

    ylim = axis_cfg.get("ylim", adaptive_y)
    for coord in plottable:
        kwargs = plottable[coord]
        kwargs["ax"] = axes[coord]
        kwargs["ylim"] = ylim
        fig = logo.draw_multi_position(**kwargs)

    xformat = FuncFormatter(format_float(1e-3, float_places=2))

    for col in range(ncols):
        top_ax = axes[0, col]
        top_ax.set_title(col_labels[col], fontsize=axis_cfg["xlabel_fontsize"],
                         weight="bold", y=1.1)
        btm_ax = axes[-1, col]
        for xticklabel in btm_ax.get_xticklabels():
            xticklabel.set_fontsize(axis_cfg["xtick_fontsize"])
            xticklabel.set_rotation(0)
        btm_ax.set_xlabel("Position", fontsize=axis_cfg["xlabel_fontsize"],
                          weight="bold")
        btm_ax.xaxis.labelpad = axis_cfg['xlabel_pad']

    for row in range(nrows):
        lft_ax = axes[row, 0]
        for yticklabel in lft_ax.get_yticklabels():
            yticklabel.set_fontsize(axis_cfg["ytick_fontsize"])
            yticklabel.set_rotation(0)

        lft_ax.yaxis.set_major_formatter(FuncFormatter(xformat))
        lft_ax.yaxis.labelpad = axis_cfg['ylabel_pad']
        lft_ax.set_ylabel(row_labels[row], rotation=0,
                          fontsize=axis_cfg['ylabel_fontsize'],
                          weight="bold")

    fig.tight_layout()
    fig.savefig(figpath)
    click.secho("Wrote %s" % figpath, fg="green")
def get_three_position_fig(three_pos_results, positions, figsize,
                           group_label=None, group_ref=None, figwidth=None,
                           xtick_fontsize=14, ytick_fontsize=14):
    position_sets = list(combinations(positions, 3))
    array_coords = get_resized_array_coordinates3(positions, position_sets)

    coords = list(array_coords.values())
    xdim = max(v[0] for v in coords) + 1
    ydim = max(v[1] for v in coords) + 1

    fig, axarr = pyplot.subplots(xdim, ydim, figsize=figsize, sharex=True,
                                 sharey=True)

    for i in range(xdim):
        for j in range(ydim):
            if (i, j) in coords:
                continue

            ax = axarr[i, j]
            ax.set_frame_on(False)
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)

    num_pos = len(positions) + 1
    mid_pos = num_pos // 2

    rel_entropies = []
    for position_set in position_sets:
        rel_entropies.append(three_pos_results[position_set]['rel_entropy'])
    ylim = logo.est_ylim(rel_entropies)

    position_re = numpy.zeros((num_pos,), float)
    multi_positions = {}
    characters = numpy.zeros((num_pos, 64), str)

    for motif in combinations(positions, 3):
        rets = numpy.zeros((64, num_pos), float)
        indices = list(map(positions.index, motif))
        row, col = array_coords[motif]
        ax = axarr[row, col]

        # now adjust indices to reflect position along sequence
        for i in range(len(indices)):
            if indices[i] >= mid_pos:
                indices[i] += 1

        position_re.put(indices, three_pos_results[motif]['rel_entropy'])

        stats = three_pos_results[motif]['stats']
        mut_stats = stats[
            get_selected_indices(stats, group_label=group_label,
                                 group_ref=group_ref)][['base1', 'base2',
                                                        'base3', 'ret']]
        mut_stats = mut_stats.sort_values(by='ret')

        characters[indices[0]] = list(mut_stats['base1'])
        characters[indices[1]] = list(mut_stats['base2'])
        characters[indices[2]] = list(mut_stats['base3'])

        for index in indices:
            rets[:, index] = mut_stats['ret']

        heights = get_re_char_heights(rets, re_positionwise=position_re)
        multi_positions[motif] = dict(rets=rets, indices=indices,
                                      characters=characters, heights=heights)
        logo.draw_multi_position(char_heights=heights.T, characters=characters,
                                 position_indices=indices, ax=ax, ylim=ylim,
                                 xtick_fontsize=xtick_fontsize,
                                 ytick_fontsize=ytick_fontsize)

    return fig