def get_structure_motif_fig(filter_weights, filter_outs, out_dir, protein, seq_targets, sample_i=0, structure=None): print 'plot motif fig', out_dir #seqs, seq_targets = get_seq_targets(protein) seqs = structure if sample_i: print 'sampling' seqs = [] for ind, val in enumerate(seqs): if ind in sample_i: seqs.append(val) seq_targets = seq_targets[sample_i] filter_outs = filter_outs[sample_i] num_filters = filter_weights.shape[0] filter_size = 7 #filter_weights.shape[2] filters_ic = [] meme_out = structure_motifs.meme_intro('%s/filters_meme.txt' % out_dir, seqs) for f in range(num_filters): print 'Filter %d' % f # plot filter parameters as a heatmap structure_motifs.plot_filter_heat( filter_weights[f, :, :], '%s/filter%d_heat.pdf' % (out_dir, f)) # write possum motif file structure_motifs.filter_possum(filter_weights[f, :, :], 'filter%d' % f, '%s/filter%d_possum.txt' % (out_dir, f), False) structure_motifs.plot_filter_logo(filter_outs[:, :, f], filter_size, seqs, '%s/filter%d_logo' % (out_dir, f), maxpct_t=0.5) filter_pwm, nsites = structure_motifs.make_filter_pwm( '%s/filter%d_logo.fa' % (out_dir, f)) if nsites < 10: # no information filters_ic.append(0) else: # compute and save information content filters_ic.append(info_content(filter_pwm)) # add to the meme motif file structure_motifs.meme_add(meme_out, f, filter_pwm, nsites, False) meme_out.close()
def get_motif_fig_new(filter_weights, filter_outs, out_dir, seqs, sample_i=0): print('plot motif fig', out_dir) # seqs, seq_targets = get_seq_targets(protein) if sample_i: print('sampling') seqs = [] for ind, val in enumerate(seqs): if ind in sample_i: seqs.append(val) # seq_targets = seq_targets[sample_i] filter_outs = filter_outs[sample_i] num_filters = filter_weights.shape[0] filter_size = 7 # filter_weights.shape[2] # pdb.set_trace() ################################################################# # individual filter plots ################################################################# # also save information contents filters_ic = [] meme_out = meme_intro('%s/filters_meme.txt' % out_dir, seqs) for f in range(num_filters): print('Filter %d' % f) # plot filter parameters as a heatmap structure_motifs.plot_filter_heat( filter_weights[f, :, :], '%s/filter%d_heat.pdf' % (out_dir, f)) # write possum motif file structure_motifs.filter_possum(filter_weights[f, :, :], 'filter%d' % f, '%s/filter%d_possum.txt' % (out_dir, f), False) # plot weblogo of high scoring outputs structure_motifs.plot_filter_logo(filter_outs[:, :, f], filter_size, seqs, '%s/filter%d_logo' % (out_dir, f), maxpct_t=0.5) # make a PWM for the filter filter_pwm, nsites = structure_motifs.make_filter_pwm( '%s/filter%d_logo.fa' % (out_dir, f)) if nsites < 10: # no information filters_ic.append(0) else: # compute and save information content filters_ic.append(structure_motifs.info_content(filter_pwm)) # add to the meme motif file structure_motifs.meme_add(meme_out, f, filter_pwm, nsites, False) meme_out.close() ################################################################# # annotate filters ################################################################# # run tomtom #-evalue 0.01 #sp.call('docker stop rnashapes') docker_start = "docker start memesuite" sp.check_output(docker_start, shell=True) sp.call( 'docker exec -it tomtom -dist pearson -thresh 0.05 -eps -oc %s/tomtom %s/filters_meme.txt %s' % (out_dir, out_dir, 'Ray2013_rbp_RNA.meme'), shell=True) sp.check_output("docker stop memesuite", shell=True) # subprocess.call('tomtom -dist pearson -thresh 0.05 -eps -oc %s/tomtom %s/filters_meme.txt %s' % (out_dir, out_dir, 'Ray2013_rbp_RNA.meme'), shell=True) # read in annotations filter_names = structure_motifs.name_filters( num_filters, '%s/tomtom/tomtom.txt' % out_dir, 'Ray2013_rbp_RNA.meme') ################################################################# # print a table of information ################################################################# table_out = open('%s/table.txt' % out_dir, 'w') # print header for later panda reading header_cols = ('', 'consensus', 'annotation', 'ic', 'mean', 'std') print >> table_out, '%3s %19s %10s %5s %6s %6s' % header_cols for f in range(num_filters): # collapse to a consensus motif consensus = structure_motifs.filter_motif(filter_weights[f, :, :]) # grab annotation annotation = '.' name_pieces = filter_names[f].split('_') if len(name_pieces) > 1: annotation = name_pieces[1] # plot density of filter output scores fmean, fstd = structure_motifs.plot_score_density( np.ravel(filter_outs[:, :, f]), '%s/filter%d_dens.pdf' % (out_dir, f)) row_cols = (f, consensus, annotation, filters_ic[f], fmean, fstd) print >> table_out, '%-3d %19s %10s %5.2f %6.4f %6.4f' % row_cols table_out.close() if True: new_outs = [] for val in filter_outs: new_outs.append(val.T) filter_outs = np.array(new_outs) print(filter_outs.shape) # plot filter-sequence heatmap structure_motifs.plot_filter_seq_heat(filter_outs, '%s/filter_seqs.pdf' % out_dir)