Пример #1
0
def spectra_grid(json_path, group_label, plot_cfg, no_type3, figpath, format,
                 sample_size, force_overwrite, dry_run):
    """draws logo from mutation spectra analysis"""
    # the following is for logging
    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(json_path)
        figpath = os.path.join(dirname, "spectra_grid.%s" % format)
        log_file_path = os.path.join(dirname, "spectra_grid.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    LOGGER.log_file_path = log_file_path

    LOGGER.log_message(str(args), label='vars')

    data = load_spectra_data(json_path, group_label)

    if plot_cfg:
        LOGGER.input_file(plot_cfg)
    plot_cfg = util.get_plot_configs(cfg_path=plot_cfg)
    f = draw_spectrum_grid(data, sample_size=sample_size, plot_cfg=plot_cfg)
    f.savefig(figpath)
    LOGGER.output_file(figpath)
    click.secho("Wrote %s" % figpath, fg="green")
Пример #2
0
def spectra(countsfile, outpath, countsfile2, strand_symmetry, force_overwrite,
            no_type3, dry_run, verbose):
    '''log-linear analysis of mutation spectra between groups
    '''
    if no_type3:
        util.exclude_type3_fonts()

    spectra_analysis.main(countsfile, outpath,
                          countsfile2, strand_symmetry,
                          force_overwrite, dry_run,
                          verbose)
Пример #3
0
def nbr_matrix(paths_cfg, plot_cfg, figpath, format, no_type3, sample_size,
               force_overwrite, dry_run):
    '''draws square matrix of sequence logo's from neighbour analysis'''
    if no_type3:
        util.exclude_type3_fonts()

    args = locals()
    LOGGER.log_message(str(args), label='vars')

    config_path = util.abspath(paths_cfg)
    indir = os.path.dirname(config_path)
    parser = SafeConfigParser()
    parser.optionxform = str  # stops automatic conversion to lower case
    parser.read(config_path)

    json_paths = {}
    for direction, path in parser.items("json_paths"):
        # assumes paths are relative to indir
        path = os.path.join(indir, path)
        if not os.path.exists(path):
            print("Couldn't find %s" % path)
            print("json file paths should be relative to paths_cfg")
            sys.exit(1)

        json_paths[direction] = path

    if not figpath:
        figpath = os.path.join(indir, "nbr_matrix.%s" % format)
        log_file_path = os.path.join(indir, "nbr_matrix.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    LOGGER.log_file_path = log_file_path
    plot_data = {}
    for direction, path in list(json_paths.items()):
        LOGGER.input_file(path)
        data = util.load_loglin_stats(path)
        plot_data[direction] = data

    fig = draw_position_grid(plot_data, sample_size)

    fig.text(0.4, 0.955, "Ending Base", fontsize=20)
    fig.text(0.03, 0.55, "Starting Base", rotation=90, fontsize=20)
    fig.tight_layout(rect=(0.06, 0, 0.95, 0.95))

    fig.savefig(figpath)
    LOGGER.output_file(figpath)
    click.secho("Wrote %s" % figpath, fg="green")
Пример #4
0
def mi(json_path, plot_cfg, no_type3, figpath, format, sample_size,
       force_overwrite, dry_run):
    """draws conventional sequence logo, using MI, from first order effects"""
    # the following is for logging
    json_path = util.abspath(json_path)
    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(json_path)
        figpath = os.path.join(dirname, "MI.%s" % format)
        log_file_path = os.path.join(dirname, "MI.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    LOGGER.log_file_path = log_file_path

    if plot_cfg:
        LOGGER.input_file(plot_cfg)

    LOGGER.log_message(str(args), label='vars')

    data = util.load_loglin_stats(json_path)
    positions = list(data.keys())
    positions.sort()
    num_pos = len(positions) + 1
    mp = num_pos // 2
    counts_array = numpy.zeros((4, num_pos), int)
    for i, pos in enumerate(positions):
        if i >= mp:
            i += 1
        pos_stats = data[pos]['stats']
        counts = pos_stats[pos_stats['mut'] == 'M'][["base", "count"]]
        counts = dict(zip(counts['base'], counts['count']))
        for base in counts:
            base_index = DNA.alphabet.index(base)
            counts_array[base_index, i] = counts[base]

    freq_matrix = entropy.counts_to_freq_matrix(counts_array)
    mit = entropy.get_mit(freq_matrix, freq_matrix=True)
    mi = mit.sum(axis=0)
    char_hts = get_mi_char_heights(numpy.fabs(mit), mi)

    plot_cfg = util.get_plot_configs(cfg_path=plot_cfg)
    figsize = plot_cfg.get('1-way plot', 'figsize')
    ytick_font = plot_cfg.get('1-way plot', 'ytick_fontsize')
    xtick_font = plot_cfg.get('1-way plot', 'xtick_fontsize')
    ylabel_font = plot_cfg.get('1-way plot', 'ylabel_fontsize')
    xlabel_font = plot_cfg.get('1-way plot', 'xlabel_fontsize')
    fig = logo.draw_multi_position(char_hts.T,
                                   characters=[list(DNA)] * num_pos,
                                   position_indices=list(range(num_pos)),
                                   figsize=figsize,
                                   figwidth=figsize[0],
                                   xtick_fontsize=xtick_font,
                                   ytick_fontsize=ytick_font,
                                   sort_data=True)

    ax = fig.gca()
    ax.tick_params(axis='y', labelsize=ytick_font)
    ax.tick_params(axis='x', labelsize=xtick_font)
    ax.set_ylabel("MI", fontsize=ylabel_font)
    ax.set_xlabel("Position", fontsize=xlabel_font)
    fig.tight_layout()
    fig.savefig(figpath)
    LOGGER.output_file(figpath)
    click.secho("Wrote %s" % figpath, fg="green")
Пример #5
0
def grid(fig_config, figpath, format, no_type3):
    """draws an arbitrary shaped grid of mutation motifs based on fig_config"""
    # we read in the config file and determine number of rows and columns
    # paths, headings, etc ..
    # then create the figure and axes and call the mutation_motif drawing code

    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(fig_config.name)
        figpath = os.path.join(dirname, "drawn_array.%s" % format)
        log_file_path = os.path.join(dirname, "drawn_array.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    util.makedirs(os.path.dirname(figpath))
    LOGGER.log_file_path = log_file_path
    LOGGER.log_message(str(args), label='vars')

    ncols, nrows, figsize, col_labels, row_labels, paths, axis_cfg = \
        read_plot_array_config(fig_config)
    print("ncols:", ncols)
    print("nrows:", nrows)
    print("figsize:", figsize)
    print("col_labels:", col_labels)
    print("row_labels:", row_labels)
    print("paths:", paths)
    print("axis_cfg:", axis_cfg)

    #TODO: Convert below into Cogent3 Plotly

    #-Plotly
    layout = UnionDict(shapes=[])
    adaptive_y = 0
    plottable = {}
    for coord in paths:
        data = util.load_loglin_stats(paths[coord])
        positions = list(data)
        positions.sort()
        heights, characters, indices = get_plot_data(data, positions)
        adaptive_y = max(adaptive_y, logo.est_ylim(heights))
        plottable[coord] = dict(char_heights=heights,
                                characters=characters,
                                position_indices=indices)

    ylim = axis_cfg.get("ylim", adaptive_y)
    for coord in plottable:
        kwargs = plottable[coord]
        kwargs["ax"] = coord
        kwargs["ylim"] = ylim
        r = logo.draw_multi_position_cogent3(**kwargs)
        for key in r:
            if key == "shapes":
                layout.shapes.extend(r.shapes)
            else:
                layout[key] = r[key]

    for i in range(0, ncols):
        xaxis = "xaxis" + str(i + 1 if i != 0 else "")
        layout[xaxis]["domain"] = [
            0.0 + (i * (1 / ncols)), (i * (1 / ncols)) + (1 / ncols)
        ]

    print(layout)
    MARGININCHES = 0
    PPI = 100
    fig = Drawable(layout=layout,
                   width=(figsize[0] - MARGININCHES) * PPI,
                   height=(figsize[1] - MARGININCHES) * PPI)

    #export
    fig.write(path=figpath)
    click.secho("Wrote Cogent3 %s" % figpath, fg="green")
    """
Пример #6
0
def grid(fig_config, figpath, format, no_type3):
    """draws an arbitrary shaped grid of mutation motifs based on fig_config"""
    # we read in the config file and determine number of rows and columns
    # paths, headings, etc ..
    # then create the figure and axes and call the mutation_motif drawing code

    args = locals()
    if no_type3:
        util.exclude_type3_fonts()

    if not figpath:
        dirname = os.path.dirname(fig_config.name)
        figpath = os.path.join(dirname, "drawn_array.%s" % format)
        log_file_path = os.path.join(dirname, "drawn_array.log")
    else:
        figpath = util.abspath(figpath)
        log_file_path = "%s.log" % ".".join(figpath.split(".")[:-1])

    util.makedirs(os.path.dirname(figpath))
    LOGGER.log_file_path = log_file_path
    LOGGER.log_message(str(args), label='vars')

    ncols, nrows, figsize, col_labels, row_labels, paths, axis_cfg = \
        read_plot_array_config(fig_config)

    fig, axes = pyplot.subplots(nrows=nrows, ncols=ncols, figsize=figsize,
                                sharex=True, sharey=True)
    figwidth = fig.get_figwidth()
    try:
        axes[0]
    except TypeError:
        axes = numpy.array([[axes]])

    if len(axes.shape) == 1:
        # required for indexing of appropriate axis
        axes = numpy.vstack(axes)
        if nrows == 1:
            axes = axes.T

    adaptive_y = 0
    plottable = {}
    for coord in paths:
        data = util.load_loglin_stats(paths[coord])
        positions = list(data)
        positions.sort()
        heights, characters, indices = get_plot_data(data, positions)
        adaptive_y = max(adaptive_y, logo.est_ylim(heights))
        plottable[coord] = dict(char_heights=heights,
                                characters=characters,
                                position_indices=indices,
                                figwidth=figwidth,
                                verbose=False)

    ylim = axis_cfg.get("ylim", adaptive_y)
    for coord in plottable:
        kwargs = plottable[coord]
        kwargs["ax"] = axes[coord]
        kwargs["ylim"] = ylim
        fig = logo.draw_multi_position(**kwargs)

    xformat = FuncFormatter(format_float(1e-3, float_places=2))

    for col in range(ncols):
        top_ax = axes[0, col]
        top_ax.set_title(col_labels[col], fontsize=axis_cfg["xlabel_fontsize"],
                         weight="bold", y=1.1)
        btm_ax = axes[-1, col]
        for xticklabel in btm_ax.get_xticklabels():
            xticklabel.set_fontsize(axis_cfg["xtick_fontsize"])
            xticklabel.set_rotation(0)
        btm_ax.set_xlabel("Position", fontsize=axis_cfg["xlabel_fontsize"],
                          weight="bold")
        btm_ax.xaxis.labelpad = axis_cfg['xlabel_pad']

    for row in range(nrows):
        lft_ax = axes[row, 0]
        for yticklabel in lft_ax.get_yticklabels():
            yticklabel.set_fontsize(axis_cfg["ytick_fontsize"])
            yticklabel.set_rotation(0)

        lft_ax.yaxis.set_major_formatter(FuncFormatter(xformat))
        lft_ax.yaxis.labelpad = axis_cfg['ylabel_pad']
        lft_ax.set_ylabel(row_labels[row], rotation=0,
                          fontsize=axis_cfg['ylabel_fontsize'],
                          weight="bold")

    fig.tight_layout()
    fig.savefig(figpath)
    click.secho("Wrote %s" % figpath, fg="green")
Пример #7
0
def nbr(countsfile, outpath, countsfile2, first_order, strand_symmetry,
        group_label, group_ref, plot_cfg, no_type3, format, verbose, dry_run):
    '''log-linear analysis of neighbouring base influence on point mutation

    Writes estimated statistics, figures and a run log to the specified
    directory outpath.

    See documentation for count table format requirements.
    '''
    if no_type3:
        util.exclude_type3_fonts()

    args = locals()

    outpath = util.abspath(outpath)

    if not dry_run:
        util.makedirs(outpath)
        runlog_path = os.path.join(outpath, "analysis.log")
        LOGGER.log_file_path = runlog_path
        LOGGER.log_message(str(args), label='vars')

    counts_filename = util.abspath(countsfile)
    counts_table = util.load_table_from_delimited_file(counts_filename,
                                                       sep='\t')

    LOGGER.input_file(counts_filename, label="countsfile1_path")

    positions = [c for c in counts_table.header if c.startswith('pos')]
    if not first_order and len(positions) != 4:
        raise ValueError("Requires four positions for analysis")

    group_label = group_label or None
    group_ref = group_ref or None
    if strand_symmetry:
        group_label = 'strand'
        group_ref = group_ref or '+'
        if group_label not in counts_table.header:
            print("ERROR: no column named 'strand', exiting.")
            exit(-1)

    if countsfile2:
        print("Performing 2 group analysis")
        group_label = group_label or 'group'
        group_ref = group_ref or '1'
        counts_table1 = counts_table.with_new_column(group_label,
                                                     lambda x: '1',
                                                     columns=counts_table.header[0])

        fn2 = util.abspath(countsfile2)
        counts_table2 = util.load_table_from_delimited_file(fn2, sep='\t')

        LOGGER.input_file(fn2, label="countsfile2_path")

        counts_table2 = counts_table2.with_new_column(group_label,
                                                      lambda x: '2',
                                                      columns=counts_table2.header[0])
        # now combine
        header = [group_label] + counts_table2.header[:-1]
        raw1 = counts_table1.tolist(header)
        raw2 = counts_table2.tolist(header)
        counts_table = make_table(header=header, rows=raw1 + raw2)

        if not dry_run:
            outfile = os.path.join(outpath, 'group_counts_table.txt')
            counts_table.write(outfile, sep='\t')
            LOGGER.output_file(outfile, label="group_counts")

    if dry_run or verbose:
        print()
        print(counts_table)
        print()

    plot_config = util.get_plot_configs(cfg_path=plot_cfg)

    msg = single_group(counts_table, outpath, group_label, group_ref,
                       positions, plot_config, first_order,
                       dry_run)
    print(msg)