def plot_results(res, path): """Some results plots""" if res is None or len(res) == 0: return counts = base.pivot_count_data(res, idxcols=['name','ref']) x = base.get_fractions_mapped(res) print (x) import seaborn as sns sns.set_style('white') sns.set_context("paper",font_scale=1.2) fig = plotting.plot_fractions(x) fig.savefig(os.path.join(path,'libraries_mapped.png')) fig = plotting.plot_sample_counts(counts) fig.savefig(os.path.join(path,'total_per_sample.png')) fig = plotting.plot_read_count_dists(counts) fig.savefig(os.path.join(path,'top_mapped.png')) scols,ncols = base.get_column_names(counts) for l,df in counts.groupby('ref'): if 'mirbase' in l: fig = plotting.plot_read_count_dists(df) fig.savefig(os.path.join(path,'top_%s.png' %l)) #if len(scols)>1: # fig = plotting.expression_clustermap(counts) # fig.savefig(os.path.join(path,'expr_map.png')) return
def map_genomic_features(self): """Map to a single set of features with a reference genome, requires we use ensembl gtf with biotype for best results""" out = self.output temp = self.temp_path ref_name = self.ref_name features = self.features if ref_name in self.aligner_params: params = self.aligner_params[ref_name] else: params = '' if ref_name == '': print ('you need to provide a reference genome') return print () print ('found features files %s' %features) print ('mapping to reference genome') res = base.map_genome_features(self.files, ref_name, features, outpath=temp, aligner=self.aligner, aligner_params=params) counts = base.pivot_count_data(res, idxcols=['name','gene_name','gene_biotype']) res.to_csv( os.path.join(out, 'features_found.csv'), index=False ) counts.to_csv( os.path.join(out, 'feature_counts.csv'), index=False) print ('results saved to feature_counts.csv') plot_feature_results(res, out) return
def plot_results(res, path): """Some results plots""" if res is None or len(res) == 0: return counts = base.pivot_count_data(res, idxcols=['name', 'ref']) x = base.get_fractions_mapped(res) print(x) import seaborn as sns sns.set_style('white') sns.set_context("paper", font_scale=1.2) fig = plotting.plot_fractions(x) fig.savefig(os.path.join(path, 'libraries_mapped.png')) fig = plotting.plot_sample_counts(counts) fig.savefig(os.path.join(path, 'total_per_sample.png')) fig = plotting.plot_read_count_dists(counts) fig.savefig(os.path.join(path, 'top_mapped.png')) scols, ncols = base.get_column_names(counts) for l, df in counts.groupby('ref'): if 'mirbase' in l: fig = plotting.plot_read_count_dists(df) fig.savefig(os.path.join(path, 'top_%s.png' % l)) #if len(scols)>1: # fig = plotting.expression_clustermap(counts) # fig.savefig(os.path.join(path,'expr_map.png')) return
def plot_feature_results(res, path): """plot results from feature counting""" if res is None or len(res) == 0: return counts = base.pivot_count_data(res, idxcols=['name','gene_name','gene_biotype']) x = base.get_fractions_mapped(res, by=['gene_biotype','label']) print (x) fig = plotting.plot_fractions(x) fig.savefig(os.path.join(path,'features_mapped.png')) fig = plotting.plot_sample_counts(counts) fig.savefig(os.path.join(path,'total_features_per_sample.png')) fig = plotting.plot_read_count_dists(counts) fig.savefig(os.path.join(path,'top_feature_counts.png')) return
def map_mirnas(self): """Map miRNAs using mirbase with isomir counts and do novel prediction if a reference genome and index is provided""" out = self.output libraries = self.libraries temp = self.temp_path ref_name = self.ref_name mat_name = 'mirbase-%s' %self.species self.aligner_params[mat_name] = self.mirna_params novel.VERBOSE = self.verbose if self.check_index(ref_name) == False: print ('no index for reference genome') ref_name = '' print ('mapping miRNAs..') res, counts = base.map_mirbase(self.files, outpath=temp, indexes=libraries, species=self.species, ref_genome=ref_name, pad5=self.pad5, pad3=self.pad3, aligner=self.aligner, samplelabels=self.labels, params=self.aligner_params, verbose=self.verbose) self.results = res #seperate out mature counts and save matcounts = counts[counts.ref==mat_name] res.to_csv( os.path.join(out, 'results.csv'),index=False ) res = res[res.ref!=ref_name] matcounts.to_csv( os.path.join(out, 'mirbase_mature_counts.csv'), index=False, float_format='%.1f' ) counts.to_csv( os.path.join(out, 'all_counts.csv'), index=False, float_format='%.1f') #get fractions per sample and plot results c = base.pivot_count_data(res, idxcols=['name','ref']) self.samples = s = base.get_fractions_mapped(res) print (s) plot_results(s, c, out) #isomir counting print () print ('counting isomirs..') iso, isocounts = base.map_isomirs(self.files, temp, self.species, samplelabels=self.labels) if isocounts is not None: isocounts.to_csv( os.path.join(out, 'isomir_counts.csv'), index=False, float_format='%.1f') else: print ('no isomirs could be counted') #novel prediction #train classifier first if not present novel.create_classifier() if self.ref_fasta == '' or not os.path.exists(self.ref_fasta): print ('no reference genome file, skipping novel mirna step') elif ref_name == None or ref_name == '': print ('no index for ref genome, required for novel mirna step') elif check_viennarna() == False: print ('Vienna RNA package not installed') print ('see https://www.tbi.univie.ac.at/RNA/') else: print () print ('predicting novel mirnas..') start = time.time() #change map_rnas so it can use remaining files from previous run....? allreads = utils.combine_aligned_reads(temp, idx=ref_name) new,cl = novel.find_mirnas(allreads, self.ref_fasta, species=self.species, score_cutoff=float(self.score_cutoff), read_cutoff=int(self.read_cutoff), cpus=self.cpus) if new is None or len(new) == 0: print ('Could not find any novel mirnas.') print ('There may not be sufficient aligned reads or the score cutoff is too high.\n') return if self.strict == True: new = new[new.mature_check=='ok'] print ('filtered %s' %len(new)) new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False) #pad mature novel and write to fasta for counting novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature') novpad = novpad.drop_duplicates('name') utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'), seqkey='sequence', idkey='name') novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html')) #now count novel mirnas for all samples build_indexes(os.path.join(out,'novel.fa'), self.index_path) r,nc = base.map_rnas(self.files, ['novel'], self.temp_path, aligner=self.aligner, samplelabels=self.labels) nc.to_csv( os.path.join(out, 'novel_mirna_counts.csv'), index=False ) end = round(time.time()-start,1) print ('took %s seconds' %str(end)) return