Python pivot_count_data示例，smallrnaseq.base.pivot_count_data Python示例

示例#1

0

显示文件

文件： app.py 项目： dmnfarrell/mirnaseq

def plot_results(res, path):
    """Some results plots"""

    if res is None or len(res) == 0:
        return
    counts = base.pivot_count_data(res, idxcols=['name','ref'])
    x = base.get_fractions_mapped(res)
    print (x)
    import seaborn as sns
    sns.set_style('white')
    sns.set_context("paper",font_scale=1.2)
    fig = plotting.plot_fractions(x)
    fig.savefig(os.path.join(path,'libraries_mapped.png'))
    fig = plotting.plot_sample_counts(counts)
    fig.savefig(os.path.join(path,'total_per_sample.png'))
    fig = plotting.plot_read_count_dists(counts)
    fig.savefig(os.path.join(path,'top_mapped.png'))
    scols,ncols = base.get_column_names(counts)
    for l,df in counts.groupby('ref'):
        if 'mirbase' in l:
            fig = plotting.plot_read_count_dists(df)
            fig.savefig(os.path.join(path,'top_%s.png' %l))
    #if len(scols)>1:
    #    fig = plotting.expression_clustermap(counts)
    #    fig.savefig(os.path.join(path,'expr_map.png'))
    return

示例#2

0

显示文件

文件： app.py 项目： dmnfarrell/mirnaseq

    def map_genomic_features(self):
        """Map to a single set of features with a reference genome, requires we
           use ensembl gtf with biotype for best results"""

        out = self.output
        temp = self.temp_path
        ref_name = self.ref_name
        features = self.features
        if ref_name in self.aligner_params:
            params = self.aligner_params[ref_name]
        else:
            params = ''
        if ref_name == '':
            print ('you need to provide a reference genome')
            return

        print ()
        print ('found features files %s' %features)
        print ('mapping to reference genome')
        res = base.map_genome_features(self.files, ref_name, features,
                                       outpath=temp, aligner=self.aligner,
                                       aligner_params=params)
        counts = base.pivot_count_data(res, idxcols=['name','gene_name','gene_biotype'])
        res.to_csv( os.path.join(out, 'features_found.csv'), index=False )
        counts.to_csv( os.path.join(out, 'feature_counts.csv'), index=False)
        print ('results saved to feature_counts.csv')
        plot_feature_results(res, out)
        return

示例#3

0

显示文件

    def map_genomic_features(self):
        """Map to a single set of features with a reference genome, requires we
           use ensembl gtf with biotype for best results"""

        out = self.output
        temp = self.temp_path
        ref_name = self.ref_name
        features = self.features
        if ref_name in self.aligner_params:
            params = self.aligner_params[ref_name]
        else:
            params = ''
        if ref_name == '':
            print ('you need to provide a reference genome')
            return

        print ()
        print ('found features files %s' %features)
        print ('mapping to reference genome')
        res = base.map_genome_features(self.files, ref_name, features,
                                       outpath=temp, aligner=self.aligner,
                                       aligner_params=params)
        counts = base.pivot_count_data(res, idxcols=['name','gene_name','gene_biotype'])
        res.to_csv( os.path.join(out, 'features_found.csv'), index=False )
        counts.to_csv( os.path.join(out, 'feature_counts.csv'), index=False)
        print ('results saved to feature_counts.csv')
        plot_feature_results(res, out)
        return

示例#4

0

显示文件

文件： app.py 项目： vallurumk/smallrnaseq

def plot_results(res, path):
    """Some results plots"""

    if res is None or len(res) == 0:
        return
    counts = base.pivot_count_data(res, idxcols=['name', 'ref'])
    x = base.get_fractions_mapped(res)
    print(x)
    import seaborn as sns
    sns.set_style('white')
    sns.set_context("paper", font_scale=1.2)
    fig = plotting.plot_fractions(x)
    fig.savefig(os.path.join(path, 'libraries_mapped.png'))
    fig = plotting.plot_sample_counts(counts)
    fig.savefig(os.path.join(path, 'total_per_sample.png'))
    fig = plotting.plot_read_count_dists(counts)
    fig.savefig(os.path.join(path, 'top_mapped.png'))
    scols, ncols = base.get_column_names(counts)
    for l, df in counts.groupby('ref'):
        if 'mirbase' in l:
            fig = plotting.plot_read_count_dists(df)
            fig.savefig(os.path.join(path, 'top_%s.png' % l))
    #if len(scols)>1:
    #    fig = plotting.expression_clustermap(counts)
    #    fig.savefig(os.path.join(path,'expr_map.png'))
    return

示例#5

0

显示文件

文件： app.py 项目： dmnfarrell/mirnaseq

def plot_feature_results(res, path):
    """plot results from feature counting"""

    if res is None or len(res) == 0:
        return
    counts = base.pivot_count_data(res, idxcols=['name','gene_name','gene_biotype'])
    x = base.get_fractions_mapped(res, by=['gene_biotype','label'])
    print (x)
    fig = plotting.plot_fractions(x)
    fig.savefig(os.path.join(path,'features_mapped.png'))
    fig = plotting.plot_sample_counts(counts)
    fig.savefig(os.path.join(path,'total_features_per_sample.png'))
    fig = plotting.plot_read_count_dists(counts)
    fig.savefig(os.path.join(path,'top_feature_counts.png'))
    return

示例#6

0

显示文件

def plot_feature_results(res, path):
    """plot results from feature counting"""

    if res is None or len(res) == 0:
        return
    counts = base.pivot_count_data(res, idxcols=['name','gene_name','gene_biotype'])
    x = base.get_fractions_mapped(res, by=['gene_biotype','label'])
    print (x)
    fig = plotting.plot_fractions(x)
    fig.savefig(os.path.join(path,'features_mapped.png'))
    fig = plotting.plot_sample_counts(counts)
    fig.savefig(os.path.join(path,'total_features_per_sample.png'))
    fig = plotting.plot_read_count_dists(counts)
    fig.savefig(os.path.join(path,'top_feature_counts.png'))
    return

示例#7

0

显示文件

    def map_mirnas(self):
        """Map miRNAs using mirbase with isomir counts and do novel prediction
           if a reference genome and index is provided"""

        out = self.output
        libraries = self.libraries
        temp = self.temp_path
        ref_name = self.ref_name
        mat_name = 'mirbase-%s' %self.species
        self.aligner_params[mat_name] = self.mirna_params
        novel.VERBOSE = self.verbose

        if self.check_index(ref_name) == False:
            print ('no index for reference genome')
            ref_name = ''

        print ('mapping miRNAs..')
        res, counts = base.map_mirbase(self.files, outpath=temp, indexes=libraries,
                                       species=self.species, ref_genome=ref_name,
                                       pad5=self.pad5, pad3=self.pad3, aligner=self.aligner,
                                       samplelabels=self.labels,
                                       params=self.aligner_params,
                                       verbose=self.verbose)

        self.results = res
        #seperate out mature counts and save
        matcounts = counts[counts.ref==mat_name]
        res.to_csv( os.path.join(out, 'results.csv'),index=False )
        res = res[res.ref!=ref_name]
        matcounts.to_csv( os.path.join(out, 'mirbase_mature_counts.csv'), index=False,
                            float_format='%.1f' )
        counts.to_csv( os.path.join(out, 'all_counts.csv'), index=False, float_format='%.1f')

        #get fractions per sample and plot results
        c = base.pivot_count_data(res, idxcols=['name','ref'])
        self.samples = s = base.get_fractions_mapped(res)
        print (s)
        plot_results(s, c, out)

        #isomir counting
        print ()
        print ('counting isomirs..')
        iso, isocounts = base.map_isomirs(self.files, temp, self.species,
                                          samplelabels=self.labels)
        if isocounts is not None:
            isocounts.to_csv( os.path.join(out, 'isomir_counts.csv'),
                                index=False, float_format='%.1f')
        else:
            print ('no isomirs could be counted')
        #novel prediction
        #train classifier first if not present
        novel.create_classifier()

        if self.ref_fasta == '' or not os.path.exists(self.ref_fasta):
            print ('no reference genome file, skipping novel mirna step')
        elif ref_name == None or ref_name == '':
            print ('no index for ref genome, required for novel mirna step')
        elif check_viennarna() == False:
            print ('Vienna RNA package not installed')
            print ('see https://www.tbi.univie.ac.at/RNA/')
        else:
            print ()
            print ('predicting novel mirnas..')
            start = time.time()
            #change map_rnas so it can use remaining files from previous run....?

            allreads = utils.combine_aligned_reads(temp, idx=ref_name)
            new,cl = novel.find_mirnas(allreads, self.ref_fasta, species=self.species,
                                       score_cutoff=float(self.score_cutoff),
                                       read_cutoff=int(self.read_cutoff),
                                       cpus=self.cpus)
            if new is None or len(new) == 0:
                print ('Could not find any novel mirnas.')
                print ('There may not be sufficient aligned reads or the score cutoff is too high.\n')
                return
            if self.strict == True:
                new = new[new.mature_check=='ok']
                print ('filtered %s' %len(new))
            new.to_csv(os.path.join(out,'novel_mirna.csv'), index=False)

            #pad mature novel and write to fasta for counting
            novpad = base.get_mature_padded(new, idkey='mature_id', seqkey='mature')
            novpad = novpad.drop_duplicates('name')
            utils.dataframe_to_fasta(novpad,os.path.join(out,'novel.fa'),
                                     seqkey='sequence', idkey='name')
            novel.create_report(new, cl, self.species, outfile=os.path.join(out, 'novel.html'))

            #now count novel mirnas for all samples
            build_indexes(os.path.join(out,'novel.fa'), self.index_path)
            r,nc = base.map_rnas(self.files, ['novel'], self.temp_path,
                                 aligner=self.aligner,
                                 samplelabels=self.labels)
            nc.to_csv( os.path.join(out, 'novel_mirna_counts.csv'), index=False )
            end = round(time.time()-start,1)
            print ('took %s seconds' %str(end))
        return