def download_idr_tfs(root_dir, metadata): """Download all tfs with idr called peaks""" idr_records = fetch_idr_record(metadata) ## Theere is only one IDR per sample if len(idr_records)!=1: print(idr_records[0]['dataset']) assert len(idr_records) <= 1 for idr_record in idr_records: dataset = idr_record['dataset'] peakfilename = idr_record['peakfilename'] + '.bed.gz' dataset_dir = os.path.join(root_dir, dataset) safe_makedir(dataset_dir) source_url = __base_url__ + idr_record['href'] print(source_url) download_peakfile(source_url, peakfilename, dataset_dir) save_metadata_json(idr_record, dataset_dir) return {'assembly': idr_record['assembly'],'bedfile': os.path.join(dataset_dir, peakfilename.replace('.gz',''))}
def create_plot(meme_file, plot_title, output_dir=None, centrimo_dir=None, motif_number=1, flank_length=5, sample_score_files=[], control_score_files=[], reg_plot_titles=[], annotate=None, save=True): """Create plot Parameters ---------- meme_file: string Path to meme.txt peak_file: string Path to summit file centrimo_dir: string Path to centrimo's output directory motif_number: int 1-based number of motif in the motif file sample_score_files: list Path to conservation scores files for sample control_score_files: list Path to conservation score files for control legend_titles: list List of legend titles """ meme_record = read_memefile(meme_file) total_sequences = get_total_sequences(meme_file) record = meme_record['motif_records'][motif_number-1] num_occurrences = getattr(record, 'num_occurrences', 'Unknown') all_meme_occurrences = [] for motif_record in meme_record['motif_records']: all_meme_occurrences.append(getattr(motif_record, 'num_occurrences', 'Unknown')) meme_dir = os.path.abspath(os.path.dirname(meme_file)) if not output_dir: output_dir = os.path.join(os.path.join(meme_dir, '..'), 'moca_plots') safe_makedir(output_dir) subplot_ncols = 1 if len(sample_score_files) == 0: raise MocaException('Found no sample score files') elif len(control_score_files) == 0: raise MocaException('Found no control score filees') elif len(sample_score_files)!=len(control_score_files): raise MocaException('Found unequal size of sample and control score files') if annotate == "" or annotate == ' ': annotate = None subplot_ncols +=1 max_occur = get_max_occuring_bases(record, max_count=1, count_type=COUNT_TYPE) motif_freq = [] for position in max_occur: motif_freq.append(position[0][1]) motif_freq = np.asarray(motif_freq) sample_conservation_scores = [] control_conservation_scores = [] for i in range(0, len(sample_score_files)): sample_conservation_scores.append(np.loadtxt(sample_score_files[i])) for i in range(0, len(control_score_files)): control_conservation_scores.append(np.loadtxt(control_score_files[i])) motif = record motif_length = motif.length motif_evalue = motif.evalue meme_dir = os.path.abspath(os.path.dirname(meme_file)) X_values = [40+15] ## this is by trial and error, the position for the first base logo ## Generate all other X coordinates for j in range(1,len(motif)+2*flank_length): X_values.append( X_values[j-1]+OFFSET+1.9 ) if centrimo_dir: subplot_ncols +=1 centrimo_dir = os.path.abspath(centrimo_dir) centrimo_txt = os.path.join(centrimo_dir, 'centrimo.txt') centrimo_stats = os.path.join(centrimo_dir, 'site_counts.txt') plot_title += r' \# {}'.format(motif_number) ##FIXME This is a big dirty hacl to get thegenerate plots for the Reverse complement logo too logo_name =['logo{}.png'.format(motif_number), 'logo_rc{}.png'.format(motif_number)] figures = [] for sample_score, control_score, subplot_legend_title in zip(sample_conservation_scores, control_conservation_scores, reg_plot_titles): for logo_filename in logo_name: setup_matplotlib() if 'rc'in logo_filename: sample_score = sample_score[::-1] matplot_dict = init_figure(meme_dir=meme_dir, X_values=X_values, motif=motif_number, subplot_ncols=subplot_ncols, annotate=annotate) f = matplot_dict['figure'] gs = matplot_dict['gs'] figsize = matplot_dict['figsize'] right_margin = matplot_dict['right_margin'] #total_px= matplot_dict['total_px'] title = r'\textbf{' + '\\underline{'+'{}'.format(plot_title)+'}}' f.suptitle(title, fontsize=LEGEND_FONTSIZE) logo_plot = create_logo_plot({'figure':f, 'gridspec': gs[0]}, meme_dir, logo_filename, motif_length) subgrid = gridspec.GridSpec(2, subplot_ncols, height_ratios=[1,2], width_ratios=[1]*subplot_ncols) subgrid.update(bottom=0.14, right=0.9, left=1-right_margin*0.85, wspace=0.58) X_left, X_center, X_right = create_stemplot({'figure': f, 'gridspec': gs[1], 'shareX': logo_plot}, X_values, sample_score, motif_length, flank_length=flank_length, legend_title=subplot_legend_title) create_bar_plot(logo_plot, X_right, matplot_dict['height_px'], total_sequences, all_meme_occurrences, motif_number, motif_evalue) create_ols_legend_plot({'figure':f, 'gridspec': subgrid[0,0]}, motif_freq, sample_score, control_score, flank_length, legend_title=subplot_legend_title) create_scatter_plot({'figure':f, 'gridspec': subgrid[1,0]}, motif_freq, sample_score, control_score, flank_length, num_occurrences, y_label=subplot_legend_title) if centrimo_dir: create_enrichment_plot({'figure': f, 'gridspec_header': subgrid[0,1], 'gridspec_body': subgrid[1,1]}, motif_number, centrimo_txt, centrimo_stats) if 'rc' not in logo_filename: out_file = os.path.join(output_dir,'moca_{}_{}.png'.format(subplot_legend_title, motif_number)) else: out_file = os.path.join(output_dir,'moca_{}_{}_rc.png'.format(subplot_legend_title, motif_number)) if annotate: create_annnotation_plot({'figure': f, 'gridspec_header': subgrid[0,-1], 'gridspec_body': subgrid[1,-1]}, annotate) if save: f.savefig(out_file, figsize=figsize, dpi=DPI) figures.append(f) plt.close('all') return figures