Пример #1
0
def download_idr_tfs(root_dir, metadata):
    """Download all tfs with idr called peaks"""
    idr_records = fetch_idr_record(metadata)
    ## Theere is only one IDR per sample
    if len(idr_records)!=1:
        print(idr_records[0]['dataset'])
    assert len(idr_records) <= 1
    for idr_record in idr_records:
        dataset = idr_record['dataset']
        peakfilename = idr_record['peakfilename'] + '.bed.gz'
        dataset_dir = os.path.join(root_dir, dataset)
        safe_makedir(dataset_dir)
        source_url = __base_url__ + idr_record['href']
        print(source_url)
        download_peakfile(source_url, peakfilename, dataset_dir)
        save_metadata_json(idr_record, dataset_dir)
        return {'assembly': idr_record['assembly'],'bedfile': os.path.join(dataset_dir, peakfilename.replace('.gz',''))}
Пример #2
0
def create_plot(meme_file,
                plot_title,
                output_dir=None,
                centrimo_dir=None,
                motif_number=1,
                flank_length=5,
                sample_score_files=[],
                control_score_files=[],
                reg_plot_titles=[],
                annotate=None,
                save=True):
    """Create plot
    Parameters
    ----------
    meme_file: string
        Path to meme.txt
    peak_file: string
        Path to summit file
    centrimo_dir: string
        Path to centrimo's output directory
    motif_number: int
        1-based number of motif in the motif file
    sample_score_files: list
        Path to conservation scores files for sample
    control_score_files: list
        Path to conservation score files for control
    legend_titles: list
        List of legend titles
    """
    meme_record = read_memefile(meme_file)
    total_sequences = get_total_sequences(meme_file)
    record = meme_record['motif_records'][motif_number-1]
    num_occurrences = getattr(record, 'num_occurrences', 'Unknown')
    all_meme_occurrences = []
    for motif_record in meme_record['motif_records']:
        all_meme_occurrences.append(getattr(motif_record, 'num_occurrences', 'Unknown'))

    meme_dir = os.path.abspath(os.path.dirname(meme_file))
    if not output_dir:
        output_dir = os.path.join(os.path.join(meme_dir, '..'), 'moca_plots')
    safe_makedir(output_dir)

    subplot_ncols = 1

    if len(sample_score_files) == 0:
        raise MocaException('Found no sample score files')
    elif len(control_score_files) == 0:
        raise MocaException('Found no control score filees')
    elif len(sample_score_files)!=len(control_score_files):
        raise MocaException('Found unequal size of sample and control score files')

    if annotate == "" or annotate == ' ':
        annotate = None
        subplot_ncols +=1

    max_occur = get_max_occuring_bases(record, max_count=1, count_type=COUNT_TYPE)
    motif_freq = []
    for position in max_occur:
        motif_freq.append(position[0][1])

    motif_freq = np.asarray(motif_freq)
    sample_conservation_scores = []
    control_conservation_scores = []
    for i in range(0, len(sample_score_files)):
        sample_conservation_scores.append(np.loadtxt(sample_score_files[i]))
    for i in range(0, len(control_score_files)):
        control_conservation_scores.append(np.loadtxt(control_score_files[i]))

    motif = record
    motif_length = motif.length
    motif_evalue = motif.evalue
    meme_dir = os.path.abspath(os.path.dirname(meme_file))
    X_values = [40+15] ## this is by trial and error, the position for the first base logo
    ## Generate all other X coordinates
    for j in range(1,len(motif)+2*flank_length):
        X_values.append( X_values[j-1]+OFFSET+1.9 )

    if centrimo_dir:
        subplot_ncols +=1
        centrimo_dir = os.path.abspath(centrimo_dir)
        centrimo_txt = os.path.join(centrimo_dir, 'centrimo.txt')
        centrimo_stats = os.path.join(centrimo_dir, 'site_counts.txt')

    plot_title += r' \# {}'.format(motif_number)
    ##FIXME This is a big dirty hacl to get thegenerate plots for the Reverse complement logo too
    logo_name =['logo{}.png'.format(motif_number), 'logo_rc{}.png'.format(motif_number)]
    figures = []
    for sample_score, control_score, subplot_legend_title in zip(sample_conservation_scores,
                                                  control_conservation_scores,
                                                  reg_plot_titles):
        for logo_filename in logo_name:
            setup_matplotlib()
            if 'rc'in logo_filename:
                sample_score = sample_score[::-1]
            matplot_dict = init_figure(meme_dir=meme_dir, X_values=X_values,
                                    motif=motif_number,
                                    subplot_ncols=subplot_ncols, annotate=annotate)
            f = matplot_dict['figure']
            gs = matplot_dict['gs']
            figsize = matplot_dict['figsize']
            right_margin = matplot_dict['right_margin']
            #total_px= matplot_dict['total_px']

            title = r'\textbf{' + '\\underline{'+'{}'.format(plot_title)+'}}'
            f.suptitle(title, fontsize=LEGEND_FONTSIZE)
            logo_plot = create_logo_plot({'figure':f, 'gridspec': gs[0]}, meme_dir, logo_filename, motif_length)

            subgrid = gridspec.GridSpec(2, subplot_ncols, height_ratios=[1,2], width_ratios=[1]*subplot_ncols)
            subgrid.update(bottom=0.14, right=0.9, left=1-right_margin*0.85, wspace=0.58)
            X_left, X_center, X_right = create_stemplot({'figure': f,
                                                        'gridspec': gs[1],
                                                        'shareX': logo_plot},
                                                        X_values,
                                                        sample_score,
                                                        motif_length,
                                                        flank_length=flank_length,
                                                        legend_title=subplot_legend_title)

            create_bar_plot(logo_plot,  X_right, matplot_dict['height_px'],
                            total_sequences, all_meme_occurrences, motif_number, motif_evalue)
            create_ols_legend_plot({'figure':f, 'gridspec': subgrid[0,0]},  motif_freq,
                                sample_score, control_score,
                                flank_length, legend_title=subplot_legend_title)
            create_scatter_plot({'figure':f, 'gridspec': subgrid[1,0]}, motif_freq,
                                sample_score, control_score,
                                flank_length, num_occurrences, y_label=subplot_legend_title)

            if centrimo_dir:
                create_enrichment_plot({'figure': f,
                                        'gridspec_header': subgrid[0,1],
                                        'gridspec_body': subgrid[1,1]},
                                        motif_number,
                                        centrimo_txt,
                                        centrimo_stats)

            if 'rc' not in logo_filename:
                out_file = os.path.join(output_dir,'moca_{}_{}.png'.format(subplot_legend_title, motif_number))
            else:
                out_file = os.path.join(output_dir,'moca_{}_{}_rc.png'.format(subplot_legend_title, motif_number))

            if annotate:
                create_annnotation_plot({'figure': f,
                                        'gridspec_header': subgrid[0,-1],
                                        'gridspec_body': subgrid[1,-1]},
                                        annotate)

            if save:
                f.savefig(out_file, figsize=figsize, dpi=DPI)
            figures.append(f)
            plt.close('all')
    return figures