Exemplo n.º 1
0
    def run(self):
        work = self.get_workspace()

        filenames = util.wildcard(self.images,
                                  ['.png', '.tif', '.tiff', '.jpg'])

        index = []
        seen = set()
        for filename in filenames:
            name = os.path.splitext(os.path.basename(filename))[0]

            assert name not in seen, 'Duplicate image name: ' + name
            seen.add(name)

            index.append(name)

        util.clear(work / ('config', 'index.pgz'))

        with nesoni.Stage() as stage:
            for name, filename in zip(index, filenames):
                stage.process(segment_image,
                              work / ('images', name),
                              filename,
                              min_area=self.min_area,
                              blur=self.blur)

        util.save(work / ('config', 'index.pgz'), index)
Exemplo n.º 2
0
    def run(self):
        workspace = self.get_workspace()

        stage = nesoni.Stage()

        if self.panpipes:
            if self.make:
                demakein.Make_panpipe(workspace /
                                      'panpipe').process_make(stage)

        if self.flutes:
            for model_name, model_code, designer in [
                ('folk-flute-straight', 'FFS',
                 demakein.Design_straight_folk_flute),
                ('folk-flute-tapered', 'FFT',
                 demakein.Design_tapered_folk_flute),
                ('pflute-straight', 'PFS', demakein.Design_straight_pflute),
                ('pflute-tapered', 'PFT', demakein.Design_tapered_pflute),
            ]:
                for size_name, size_code, transpose in [
                    ('tenor', 't', 0),
                    ('alto', 'a', 5),
                    ('soprano', 's', 12),
                ]:
                    stage.process(self._do_flute, model_name, model_code,
                                  size_name, size_code, designer, transpose)

        if self.whistles:
            for model_name, model_code in [
                ('folk-whistle', 'FW'),
            ]:
                for size_name, size_code, transpose in [
                    ('tenor', 't', 0),
                    ('alto', 'a', 5),
                    ('soprano', 's', 12),
                    ('sopranino', 'ss', 17),
                ]:
                    stage.process(self._do_folk_whistle, model_name,
                                  model_code, size_name, size_code, transpose)

        if self.shawms:
            for model_name, model_code, designer in [
                ('shawm', 'SH', demakein.Design_shawm),
                ('folk-shawm', 'FSH', demakein.Design_folk_shawm),
            ]:
                for size_name, size_code, transpose, bore in [
                    ('4mm-alto', '4a', 5, 4.0),
                    ('4mm-tenor', '4t', 0, 4.0),
                    ('6mm-tenor', '6t', 0, 6.0),
                    ('6mm-bass', '6b', -7, 6.0),
                ]:
                    stage.process(self._do_shawm, model_name, model_code,
                                  size_name, size_code, designer, transpose,
                                  bore)

        stage.barrier()
Exemplo n.º 3
0
    def run(self):
        context = self.get_context()

        if context.clip:
            context.clip.make()

        context.align.make()

        if context.filter:
            context.filter.make()

        with nesoni.Stage() as stage:
            if context.reconsensus:
                context.reconsensus.process_make(stage)

            nesoni.Tag(self.output_dir, tags=self.tags).make()

            if context.count:
                context.count.make()
Exemplo n.º 4
0
    def run(self):
        work = self.get_workspace()
        with nesoni.Stage() as stage:
            for accession in self.genbanks:
                Fetch_genbank(
                    work / 'genbank',
                    accession,
                    self.email,
                ).process_make(stage)

            for accession in self.accessions:
                Fetch_sra(work / 'sra', accession).process_make(stage)

        nesoni.Make_reference(
            output_dir=work / 'TW20',
            filenames=[
                work / ('genbank', accession + '.gbk')
                for accession in self.genbanks
            ],
            genome=True,
            bowtie=True,
            ls=True,
            snpeff=True,
        ).make()

        analyser = nesoni.Analyse_samples
        analyser(work / 'analysis',
                 work / 'TW20',
                 samples=[
                     nesoni.Analyse_sample(
                         accession,
                         pairs=[[
                             work / ('sra', accession + '_1.fastq.bz2'),
                             work / ('sra', accession + '_2.fastq.bz2')
                         ]]) for accession in self.accessions
                 ]).make()
Exemplo n.º 5
0
    def run(self):
        context = self.get_context()

        with nesoni.Stage() as stage:
            for sample in context.samples:
                sample.process_make(stage)

        with nesoni.Stage() as stage:
            if context.variants:
                context.variants.process_make(stage)

            if context.expression:
                context.expression.process_make(stage)

        if self.igv_plots:
            plot_space = workspace.Workspace(context.space / 'plot', False)
            self.igv_plots(
                prefix=plot_space / ('plot'),
                genome=context.reference.get_genome_filename(),
                norm_file=context.space /
                ('expression', 'norm.csv') if context.expression else None,
                working_dirs=context.sample_dirs,
            ).make()

        # =================================================================================
        # =================================================================================
        # =================================================================================

        reporter = reporting.Reporter(context.space / 'report',
                                      self.report_title, context.name)

        reporter.report_logs(
            'alignment-statistics',
            [
                sample.get_context().clip.log_filename()
                for sample in context.samples if sample.clip
            ] + [
                sample.get_context().filter.log_filename() if not sample.count
                else sample.get_context().count.log_filename()
                for sample in context.samples if sample.filter or sample.count
            ],
            filter=lambda sample, field: field != 'fragments',
        )

        if self.expression:
            io.symbolic_link(source=context.space / ('expression', 'report'),
                             link_name=context.space /
                             ('report', 'expression'))
            reporter.heading(
                '<a href="expression/index.html">&gt; Expression analysis</a>')

        if self.variants:
            io.symbolic_link(source=context.space / ('variants', 'report'),
                             link_name=context.space / ('report', 'variants'))
            reporter.heading(
                '<a href="variants/index.html">&gt; Variants analysis</a>')

        if self.igv_plots:
            reporter.heading('IGV plots')
            reporter.p(
                'These files show the depth of coverage. They can be viewed with the IGV genome browser.'
            )

            genome_files = []
            if self.include_genome:
                genome_filename = context.reference.get_genome_filename()
                genome_dir = context.reference.get_genome_dir()
                genome_files.append(genome_filename)
                if genome_dir:
                    base = os.path.split(genome_dir)[1]
                    for filename in os.listdir(genome_dir):
                        genome_files.append(
                            (os.path.join(genome_dir, filename),
                             os.path.join(base, filename)))

            reporter.p(
                reporter.tar('igv-plots',
                             genome_files + glob.glob(plot_space / '*.tdf')))

        if self.include_bams:
            reporter.heading('BAM files')

            reporter.p(
                'These BAM files contain the alignments of reads to the reference sequences.'
                ' They can also be viewed using IGV.')

            bam_files = []
            for sample in self.samples:
                name = sample.output_dir
                bam_files.append(
                    (context.space /
                     ('samples', name, 'alignments_filtered_sorted.bam'),
                     name + '.bam'))
                bam_files.append(
                    (context.space /
                     ('samples', name, 'alignments_filtered_sorted.bam.bai'),
                     name + '.bam.bai'))
            reporter.p(reporter.tar('bam-files', bam_files))

        reporter.write('<p/><hr/>\n')
        reporter.p('nesoni version ' + nesoni.VERSION)
        reporter.close()
Exemplo n.º 6
0
    def run(self):
        #assert self.reference is not None, 'No reference directory given.'
        space = self.get_workspace()

        #self.count(
        #    space / 'counts',
        #    filenames=self.samples,
        #    ).make()

        nesoni.Merge_counts(space / 'counts',
                            filenames=[(os.path.join(item, 'counts.csv')
                                        if os.path.isdir(item) else item)
                                       for item in self.samples]).make()

        self.norm_from_counts(space / 'norm', space / 'counts.csv').make()

        similarity = nesoni.Similarity(
            space / 'similarity',
            space / 'counts.csv',
            norm_file=space / 'norm.csv',
        )

        heatmaps = [
            heatmap(
                space / 'heatmap-' + heatmap.prefix,
                space / 'counts.csv',
                norm_file=space / 'norm.csv',
            ) for heatmap in self.heatmap
        ]

        tests = [
            test(
                space / 'test-' + test.prefix,
                space / 'counts.csv',
                norm_file=space / 'norm.csv',
            ) for test in self.test
        ]

        with nesoni.Stage() as stage:
            similarity.process_make(stage)
            for heatmap in heatmaps:
                heatmap.process_make(stage)
            for test in tests:
                test.process_make(stage)

        reporter = reporting.Reporter(space / 'report', self.title)

        similarity.report(reporter)

        #reporter.heading('Sample similarity')
        #
        #reporter.p(
        #    'The following plots attempt to summarize the similarity/differences in expression patterns between samples, '
        #    'based on the glog2-transformed normalized read counts. '
        #    'Samples from the same experimental group should cluster together.'
        #    )
        #
        #reporter.p(
        #    reporter.get(space / 'similarity-plotMDS.png',
        #        title = 'limma\'s "plotMDS" Multi-Dimensional Scaling plot of sample similarity',
        #        image = True
        #        )
        #    )
        #
        #reporter.p(
        #    reporter.get(space / 'similarity.svg',
        #        title = 'Split Network visualization of sample similarity.',
        #        image = True
        #        ) +
        #    '<br>(Visualization of euclidean distances as a split network. '
        #    'Note: This is <i>not</i> a phylogenetic network.)'
        #    )

        if heatmaps:
            reporter.heading('Heatmaps')
            for heatmap in heatmaps:
                reporter.report_heatmap(heatmap)

        if tests:
            reporter.heading('Differential expression analysis')
            for test in tests:
                #reporter.report_test(test)
                test.report(reporter)

        reporter.heading('Raw data')

        reporter.p(reporter.get(space / 'counts.csv'))
        reporter.p(reporter.get(space / 'norm.csv'))

        reporter.close()
Exemplo n.º 7
0
    def run(self):
        bams = []
        reference = None
        reference2 = None

        extra = []

        for sample in self.samples:
            if sam.is_bam(sample):
                bams.append(sample)
            elif os.path.isdir(sample):
                working = working_directory.Working(sample, True)
                bams.append(working.get_filtered_sorted_bam())
                extra.append('##sampleTags=' + ','.join(working.get_tags()))
                if reference2 is None:
                    reference2 = working.get_reference(
                    ).reference_fasta_filename()
            elif io.is_sequence_file(sample):
                assert reference is None, 'Only one reference FASTA file allowed.'
                reference = sample

        if reference is None:
            reference = reference2
        if reference is None:
            raise grace.Error('No reference FASTA file given.')

        with nesoni.Stage() as stage:
            tempspace = stage.enter(workspace.tempspace())
            if self.depth_limit:
                with nesoni.Stage() as stage2:
                    for i in xrange(len(bams)):
                        sam.Bam_depth_limit(
                            tempspace / ('%d' % i),
                            bams[i],
                            depth=self.depth_limit).process_make(stage2)
                        bams[i] = tempspace / ('%d.bam' % i)

            # FreeBayes claims to handle multiple bams, but it doesn't actually work
            if len(bams) > 1:
                sam.Bam_merge(tempspace / 'merged', bams=bams,
                              index=False).run()
                bams = [tempspace / 'merged.bam']

            command = [
                'freebayes',
                '-f',
                reference,
                '--ploidy',
                str(self.ploidy),
                '--pvar',
                str(self.pvar),
            ] + self.freebayes_options + bams

            self.log.log('Running: ' + ' '.join(command) + '\n')

            f_out = stage.enter(open(self.prefix + '.vcf', 'wb'))
            f_in = stage.enter(io.pipe_from(command))
            done_extra = False
            for line in f_in:
                if not done_extra and not line.startswith('##'):
                    for extra_line in extra:
                        f_out.write(extra_line + '\n')
                    done_extra = True
                f_out.write(line)

        index_vcf(self.prefix + '.vcf')
Exemplo n.º 8
0
def _parallel(*items):
    with nesoni.Stage() as stage:
        for item in items:
            stage.process(item)
Exemplo n.º 9
0
    def run(self):
        #===============================================
        #                Sanity checks
        #===============================================
        
        assert len(set([ item.output_dir for item in self.samples ])) == len(self.samples), "Duplicate sample name."
        
        all_inputs = [ ]
        for sample in self.samples:
            all_inputs.extend(sample.reads)
        assert len(set(all_inputs)) == len(all_inputs), "Duplicate read filename."
        
        assert len(set([ item.output_dir for item in self.tests ])) == len(self.tests), "Duplicate test name."
        
        for test in self.tests:
            assert not test.analysis, "analysis parameter for tests should not be set, will be filled in automatically"
        
        #===============================================
        #                Run pipeline
        #===============================================
        
        names = [ sample.output_dir for sample in self.samples ]
        
        reference = reference_directory.Reference(self.reference, must_exist=True)
        
        workspace = io.Workspace(self.output_dir, must_exist=False)
        samplespace = io.Workspace(workspace/'samples', must_exist=False)
        expressionspace = io.Workspace(workspace/'expression', must_exist=False)
        testspace = io.Workspace(workspace/'test', must_exist=False)
        
        self._create_json()
                
        file_prefix = self.file_prefix
        if file_prefix and not file_prefix.endswith('-'):
            file_prefix += '-'


        samples = [ ]
        for sample in self.samples:
            samples.append(sample(
                samplespace / sample.output_dir,
                reference = self.reference,
                ))
        
        dirs = [ item.output_dir for item in samples ]
        
        clipper_logs = [ join(item.output_dir, 'clipped_reads_log.txt') for item in samples ]
        filter_logs = [ join(item.output_dir, 'filter_log.txt') for item in samples ]
        filter_polya_logs = [ join(item.output_dir + '-polyA', 'filter_log.txt') for item in samples ]

        analyse_template = tail_lengths.Analyse_tail_counts(
            working_dirs = dirs,
            extension = self.extension,
            annotations = reference/'reference.gff',
            types = self.types,
            parts = self.parts
            )
        
        with nesoni.Stage() as stage:        
            for item in samples:
                item.process_make(stage)

        job_gene_counts = analyse_template(
            output_dir = expressionspace/'genewise',
            extension = self.extension,
            title = 'Genewise expression - ' + self.title,
            file_prefix = file_prefix+'genewise-',
            ).make
        
        job_peaks = _call(self._run_peaks, 
            workspace=workspace, 
            expressionspace=expressionspace, 
            reference=reference, 
            dirs = dirs,
            analyse_template = analyse_template,
            file_prefix=file_prefix,
            )
        
        job_norm = nesoni.Norm_from_samples(
            workspace/'norm',
            working_dirs = dirs
            ).make
            
        job_bigwig = bigwig.Polya_bigwigs(
            workspace/'bigwigs', 
            working_dirs = dirs, 
            norm_file = workspace/"norm.csv",
            peaks_file = workspace/("peaks", "relation-child.gff"),
            title = "IGV tracks - "+self.title
            ).make
        
        job_norm_bigwig = _call(_serial, job_norm, job_bigwig)

        job_utrs = tail_tools.Call_utrs(
            workspace/('peaks','primary-peak'),
            self.reference,
            self.output_dir,
            extension=self.extension
            ).make
            
        job_primpeak_counts = analyse_template(
            expressionspace/'primarypeakwise',
            annotations=workspace/('peaks','primary-peak-peaks.gff'), 
            extension=0,
            types='peak',
            parts='peak',
            title='Primary-peakwise expression - ' + self.title,
            file_prefix=file_prefix+'primarypeakwise-',
            ).make
        
        job_primpeak = _call(_serial, job_utrs, job_primpeak_counts)
        
        job_peak_primpeak_bigwig = _call(_serial, 
            job_peaks, 
            _call(_parallel, job_norm_bigwig, job_primpeak))
        
        job_count = _call(_parallel, job_gene_counts, job_peak_primpeak_bigwig)
            
        test_jobs = [ ]
        for test in self.tests:
            test_jobs.append(test(
                output_dir = testspace/test.output_dir,
                analysis = self.output_dir,
                ).make)

        job_test = _call(_parallel, *test_jobs)

        job_raw = self._extract_raw

        job_all = _call(_serial, job_count, _call(_parallel, job_raw, job_test))        
        
        job_all()



        #===============================================
        #                   Report        
        #===============================================

        r = reporting.Reporter(workspace/'report', self.title, self.file_prefix, style=web.style())
        
        io.symbolic_link(source=workspace/'bigwigs', link_name=r.workspace/'bigwigs')
        r.write('<div style="font-size: 150%; margin-top: 1em; margin-bottom: 1em;"><a href="bigwigs/index.html">&rarr; Load tracks into IGV</a></div>')

        tail_tools.Shiny(workspace/('report','shiny'), self.output_dir, title=self.title, species=self.species).run()
        r.write('<div style="font-size: 150%; margin-top: 1em; margin-bottom: 1em;"><a href="shiny/" target="_blank">&rarr; Interactive report (shiny)</a></div>')
        
        r.heading('Alignment to reference')
        
        r.report_logs('alignment-statistics',
            #[ workspace/'stats.txt' ] +
            clipper_logs + filter_logs + #filter_polya_logs +
            [ expressionspace/('genewise','aggregate-tail-counts_log.txt') ],
            filter=lambda sample, field: (
                field not in [
                    'fragments','fragments aligned to the reference','reads kept',
                    'average depth of coverage, ambiguous',
                    'average depth of coverage, unambiguous',
                    ]
            ),
        )
        

        r.heading('Genewise expression')
        
        r.p("This is based on all reads within each gene (possibly from multiple peaks, or decay products).")
        
        io.symbolic_link(source=expressionspace/('genewise','report'),link_name=r.workspace/'genewise')
        r.p('<a href="genewise/index.html">&rarr; Genewise expression</a>')


        r.heading('Peakwise expression')
        
        r.p("This shows results from all called peaks.")
        
        peak_filename = expressionspace/('peakwise','features-with-data.gff')
        r.p(r.get(peak_filename, name='peaks.gff') + ' - peaks called')        

        self._describe_peaks(r)
        
        io.symbolic_link(source=expressionspace/('peakwise','report'),link_name=r.workspace/'peakwise')
        r.p('<a href="peakwise/index.html">&rarr; Peakwise expression</a>')


        r.subheading('Primary-peakwise expression')
        
        r.p("This is based on the most prominent peak in the 3'UTR for each gene. (Peak can be up to %d bases downstrand of the annotated 3'UTR end, but not inside another gene on the same strand.)" % self.extension)
        
        io.symbolic_link(source=expressionspace/('primarypeakwise','report'),link_name=r.workspace/'primarypeakwise')
        r.p('<a href="primarypeakwise/index.html">&rarr; Primary-peakwise expression</a>')

        r.p(r.get(workspace/('peaks','primary-peak-peaks.gff')) + ' - primary peaks for each gene.')
        r.p(r.get(workspace/('peaks','primary-peak-utrs.gff')) + ' - 3\' UTR regions, based on primary peak call.')
        r.p(r.get(workspace/('peaks','primary-peak-genes.gff')) + ' - full extent of gene, based on primary peak call.')


        if self.tests:
            r.heading('Differential tests')
            for test in self.tests:
                io.symbolic_link(source=testspace/test.output_dir,link_name=r.workspace/('test-'+test.output_dir))
                r.p('<a href="test-%s">&rarr; %s</a> '
                    % (test.output_dir, test.get_title()))


        web.Geneview_webapp(r.workspace/'view').run()        
                
        r.heading('Gene viewers')
        r.p('Having identified interesting genes from heatmaps and differential tests above, '
            'these viewers allow specific genes to be examined in detail.')
        
        if self.groups:
            r.get(workspace/('peak-shift','grouped.json'))
            r.p('<a href="view.html?json=%sgrouped.json">&rarr; Gene viewer, grouped samples</a>' % r.file_prefix)
        r.get(workspace/('peak-shift','individual.json'))
        r.p('<a href="view.html?json=%sindividual.json">&rarr; Gene viewer, individual samples</a>' % r.file_prefix)
        
        
        r.heading('Raw data')
        
        r.p(r.tar('csv-files',glob.glob(workspace/('raw','*.csv'))))
        
        r.write('<ul>\n')
        r.write('<li> -info.csv = gene name and product, etc\n')
        r.write('<li> -count.csv = read count\n')
        r.write('<li> -mlog2-RPM.csv = moderated log2 Reads Per Million\n')
        r.write('<li> -tail.csv = average poly(A) tail length\n')
        r.write('<li> -tail-count.csv = poly(A) read count\n')
        r.write('<li> -proportion.csv = proportion of reads with poly(A)\n')
        r.write('<li> -norm.csv = read count normalization used for log2 transformation, heatmaps, differential tests, etc etc\n')
        r.write('</ul>\n')

        r.p('This set of genes was used in the analysis:')
        
        r.p(r.get(reference/'reference.gff') + ' - Reference annotations in GFF3 format')
        r.p(r.get(reference/'utr.gff') + ' - 3\' UTR regions')

        r.p('<b>%d further bases 3\' extension was allowed</b> beyond the GFF files above (but not extending into the next gene on the same strand).' % self.extension)

        r.write('<p/><hr>\n')
        r.subheading('About normalization and log transformation')

        r.p('Counts are converted to '
            'log2 Reads Per Million using Anscombe\'s variance stabilizing transformation '
            'for the negative binomial distribution, implemented in '
            'R package "varistran".')
                
        r.write('<p/><hr>\n')

        r.p('Reference directory '+self.reference)
        r.p('Tail Tools version '+tail_tools.VERSION)
        r.p('Nesoni version '+nesoni.VERSION)
        
        r.close()
Exemplo n.º 10
0
    def run(self):
        assert self.extension is not None, '--extension must be specified'

        # Also allow simply the analyse-polya-batch directory
        working_dirs = []
        for item in self.working_dirs:
            state_filename = os.path.join(item, 'analyse-polya-batch.state')
            if not os.path.exists(state_filename):
                working_dirs.append(item)
            else:
                with open(state_filename, 'rb') as f:
                    state = pickle.load(f)

                for sample in state.samples:
                    working_dirs.append(
                        os.path.join(item, 'samples', sample.output_dir))

        work = self.get_workspace()

        if self.reuse:
            pickle_workspace = workspace.Workspace(
                os.path.join(self.reuse, 'pickles'))
        else:
            pickle_workspace = workspace.Workspace(work / 'pickles')
        plot_workspace = workspace.Workspace(work / 'plots')

        pickle_filenames = []

        file_prefix = self.file_prefix
        if file_prefix and not file_prefix.endswith('-'):
            file_prefix += '-'

        with nesoni.Stage() as stage:
            for dir in working_dirs:
                working = working_directory.Working(dir, must_exist=True)
                pickle_filenames.append(pickle_workspace / working.name +
                                        '.pickle.gz')
                if self.reuse: continue
                Tail_count(
                    pickle_workspace / working.name,
                    working_dir=dir,
                    annotations=self.annotations,
                    types=self.types,
                    parts=self.parts,
                    extension=self.extension,
                ).process_make(stage)

        assert len(set(pickle_filenames)) == len(
            pickle_filenames), "Duplicate sample name."

        with nesoni.Stage() as stage:
            Aggregate_tail_counts(output_dir=self.output_dir,
                                  pickles=pickle_filenames,
                                  tail=self.tail,
                                  adaptor=self.adaptor).process_make(stage)

        nesoni.Norm_from_counts(
            prefix=work / 'norm',
            counts_filename=work / 'counts.csv',
        ).make()

        similarity = nesoni.Similarity(
            prefix=plot_workspace / 'similarity',
            counts=work / 'counts.csv',
        )

        plot_pooleds = [
            Plot_pooled(
                prefix=plot_workspace / 'pooled-heatmap',
                aggregate=self.output_dir,
                #min_tails = min_tails,
                min_tails=1,
                top=100,
            )
            #for min_tails in (20,50,100,200,500,1000,2000)
        ]

        #plot_comparisons = [
        #    Plot_comparison(
        #        prefix = plot_workspace/('comparison-min-tails-%d-min-span-%.1f' % (min_tails,min_span)),
        #        aggregate = self.output_dir,
        #        min_tails = min_tails,
        #        min_span = min_span,
        #        )
        #    for min_tails in [50,100,200,500]
        #    for min_span in [2,4,8,10,15,20,25,30]
        #    ]
        #
        heatmaps = [
            nesoni.Heatmap(
                prefix=plot_workspace / ('heatmap-min-fold-%.1f' % fold),
                counts=work / 'counts.csv',
                norm_file=work / 'norm.csv',
                min_span=math.log(fold) / math.log(2.0),
            ) for fold in [1.5, 2.0, 4.0, 6.0, 8.0, 10.0, 20.0, 30.0, 40.0]
        ]

        with nesoni.Stage() as stage:
            similarity.process_make(stage)
            for action in plot_pooleds + heatmaps:  #+ plot_comparisons:
                action.process_make(stage)

        r = reporting.Reporter(
            work / 'report',
            self.title,
            file_prefix,
            style=web.style(),
        )

        similarity.report(r)

        r.heading('Poly(A) tail length distribution')

        r.p('This plot shows the distribution of lengths of poly(A) tail sequence in top expressed features. '
            'Its main purpose is to assess data quality. '
            'If the plot has many bright spots there may be many identical reads, possibly due to non-random digestion.'
            )

        r.p('Only reads with a poly(A) sequence of four or more bases are used.'
            )

        for heatmap in plot_pooleds:
            r.report_heatmap(heatmap)

        r.heading('Heatmaps')

        r.p('Genes were selected based '
            'on there being at least some fold change difference between '
            'some pair of samples.')

        for heatmap in heatmaps:
            r.report_heatmap(heatmap)

        #r.heading('Average poly(A) tail length and its relation to expression levels')
        #
        #r.p(
        #    'Only reads with a poly(A) sequence of four or more bases was included in the averages.'
        #    )
        #
        #r.p(
        #    'Genes were selected based on there being at least a certain number of reads with poly(A) sequence in <i>each</i> sample (min-tails), '
        #    'and on there being at least some amount of difference in average tail length between samples (min-span).'
        #    )
        #
        #for heatmap in plot_comparisons:
        #    r.report_heatmap(heatmap)

        r.close()
Exemplo n.º 11
0
Arquivo: path.py Projeto: pfh/demakein
 def run(self):
     with nesoni.Stage() as stage:
         for filename in self.stls:
             stage.process(self.do_one, filename)
Exemplo n.º 12
0
    def run(self):
        working_dirs = []
        peaks_file = self.peaks_file
        for item in self.working_dirs:
            state_filename = os.path.join(item, 'analyse-polya-batch.state')
            if not os.path.exists(state_filename):
                working_dirs.append(item)
            else:
                with open(state_filename, 'rb') as f:
                    state = pickle.load(f)

                for sample in state.samples:
                    working_dirs.append(
                        os.path.join(item, 'samples', sample.output_dir))

                if not peaks_file:
                    peaks_file = os.path.join(self.pipeline_dir, "peaks",
                                              "relation-child.gff")

        sample_names = [os.path.split(dirname)[1] for dirname in working_dirs]
        workspaces = [
            working_directory.Working(dirname, must_exist=True)
            for dirname in working_dirs
        ]

        workspace = self.get_workspace()

        with open(workspace / "index.html", "wb") as f:
            web.emit(
                f, "igv.html",
                dict(
                    SAMPLES=json.dumps(sample_names),
                    HAVE_NORM=json.dumps(bool(self.norm_file)),
                    TITLE=self.title,
                ))

        bams = [item / "alignments_filtered_sorted.bam" for item in workspaces]

        for i in xrange(len(sample_names)):
            io.symbolic_link(bams[i], workspace / (sample_names[i] + ".bam"))
            io.symbolic_link(bams[i] + ".bai",
                             workspace / (sample_names[i] + ".bam.bai"))

        io.symbolic_link(peaks_file, workspace / "peaks.gff")

        if self.norm_file:
            mults = io.read_grouped_table(self.norm_file)['All']
            norm_mult = [
                float(mults[name]['Normalizing.multiplier'])
                for name in sample_names
            ]

        with nesoni.Stage() as stage:
            Bam_to_bigwig(
                workspace / "total",
                bam_files=bams,
                what="ambiguity,span,3p,polyaspan,polya3p",
            ).process_make(stage)

            for i in xrange(len(sample_names)):
                for scale_desc, scale in \
                        [("raw",1.0)] + \
                        ([("norm",norm_mult[i])] if self.norm_file else []):
                    Bam_to_bigwig(workspace /
                                  (sample_names[i] + "-" + scale_desc),
                                  bam_files=[bams[i]],
                                  what='span,3p,polyaspan,polya3p',
                                  scale=scale).process_make(stage)
Exemplo n.º 13
0
 def run(self):
     with nesoni.Stage() as stage:
         for item in self.what.split(","):
             if item == "cover":
                 stage.process(make_bigwig,
                               self.prefix + "-cover",
                               self.bam_files,
                               fragment_split_coverage,
                               True,
                               scale=self.scale)
             elif item == "span":
                 stage.process(make_bigwig,
                               self.prefix + "-span",
                               self.bam_files,
                               fragment_coverage,
                               True,
                               scale=self.scale)
             elif item == "start":
                 stage.process(make_bigwig,
                               self.prefix + "-start",
                               self.bam_files,
                               read1_starts,
                               False,
                               scale=self.scale)
             elif item == "end":
                 stage.process(make_bigwig,
                               self.prefix + "-end",
                               self.bam_files,
                               read2_starts,
                               False,
                               scale=self.scale)
             elif item == "5p":
                 stage.process(make_bigwig,
                               self.prefix + "-5p",
                               self.bam_files,
                               read_starts,
                               False,
                               scale=self.scale)
             elif item == "3p":
                 stage.process(make_bigwig,
                               self.prefix + "-3p",
                               self.bam_files,
                               read_ends,
                               False,
                               scale=self.scale)
             elif item == "polyaspan":
                 stage.process(make_bigwig,
                               self.prefix + "-polyaspan",
                               self.bam_files,
                               fragment_coverage,
                               True,
                               scale=self.scale,
                               polya=True)
             elif item == "polya3p":
                 stage.process(make_bigwig,
                               self.prefix + "-polya3p",
                               self.bam_files,
                               read_ends,
                               False,
                               scale=self.scale,
                               polya=True)
             elif item == "ambiguity":
                 stage.process(make_ambiguity_bigwig,
                               self.prefix + "-ambiguity",
                               self.bam_files,
                               subsample=self.subsample)
             else:
                 raise config.Error("Don't know how to make: " + item)