Python NGSTk.skewer примеры использования

Язык программирования: Python

Пространство имен/Пакет: pypiper.ngstk

Класс/Тип: NGSTk

Метод/Функция: skewer

Примеров на hotexamples.com: 8

Python NGSTk.skewer - 8 примеров найдено. Это лучшие примеры Python кода для pypiper.ngstk.NGSTk.skewer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

NGSTk(10)

bam2fastq(7)

skewer(6)

merge_bams(6)

trimmomatic(6)

fastqc(4)

fastqc_rename(4)

plot_atacseq_insert_sizes(4)

bowtie2_map(3)

filterReads(3)

filter_reads(3)

genomeWideCoverage(3)

run_spp(3)

indexBam(3)

index_bam(3)

bowtie2Map(3)

peakTools(3)

macs2_call_peaks_atacseq(2)

slurm_submit_job(2)

slurm_header(2)

slurm_footer(2)

macs2CallPeaksATACSeq(2)

genome_wide_coverage(2)

get_mitochondrial_reads(2)

calculate_frip(2)

calculate_FRiP(2)

calculateFRiP(1)

mergeBams(1)

bam_to_bigwig(1)

macs2PlotModel(1)

plotInsertSizesFit(1)

macs2CallPeaks(1)

shiftReads(1)

shift_reads(1)

bamToBigWig(1)

sppCallPeaks(1)

spp_call_peaks(1)

macs2_call_peaks(1)

Пример #1

Показать файл

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed, shifted Bam files along with a UCSC browser track.
    Peaks are called and filtered.
    """
    print("Start processing RNA-seq sample %s." % sample.sample_name)

    for path in ["sample_root"] + sample.paths.__dict__.keys():
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" %
                           (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.data_path.split(" ")) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(
            # this is a list of sample paths
            input_bams=sample.data_path.split(" "),
            merged_bam=sample.unmapped)
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_path = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc_rename(input_bam=sample.data_path,
                           output_dir=sample.paths.sample_root,
                           sample_name=sample.sample_name)
    pipe_manager.run(cmd,
                     os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     shell=True)
    report_dict(
        pipe_manager,
        parse_fastqc(os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        inputBam=sample.data_path,
        outputFastq=sample.fastq1 if sample.paired else sample.fastq,
        outputFastq2=sample.fastq2 if sample.paired else None,
        unpairedFastq=sample.fastq_unpaired if sample.paired else None)
    pipe_manager.run(cmd,
                     sample.fastq1 if sample.paired else sample.fastq,
                     shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq1unpaired=sample.trimmed1_unpaired
            if sample.paired else None,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            outputFastq2unpaired=sample.trimmed2_unpaired
            if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputPrefix=os.path.join(sample.paths.unmapped,
                                      sample.sample_name),
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            trimLog=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)

        report_dict(
            pipe_manager,
            parse_trim_stats(sample.trimlog,
                             prefix="trim_",
                             paired_end=sample.paired))

    # Quantify gene expression
    pipe_manager.timestamp("Quantifying expression with Kallisto")
    cmd = kallisto(
        fastq_files=[sample.trimmed1, sample.trimmed2]
        if sample.paired else [sample.trimmed],
        kallisto_index=getattr(pipe_manager.config.resources.kallisto_index,
                               sample.genome),
        read_type=sample.read_type,
        output_dir=sample.kallisto_output_dir,
        threads=args.cores,
        bootstrap_number=pipe_manager.config.parameters.bootstrap_number,
        fragment_size=pipe_manager.config.parameters.fragment_size,
        fragment_std=pipe_manager.config.parameters.fragment_std)
    pipe_manager.run(cmd, sample.kallisto_quantification, shell=True)
    report_dict(pipe_manager,
                parse_kallisto_stats(sample.kallisto_quantification))

    # Finish up
    print(pipe_manager.stats_dict)

    pipe_manager.stop_pipeline()
    print("Finished processing sample %s." % sample.sample_name)

Пример #2

Показать файл

Файл: starrseq.py Проект: maryam1353/open_pipelines

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed Bam files along with a UCSC browser track.
    Peaks are called and filtered.
    """

    print("Start processing STARR-seq sample %s." % sample.sample_name)

    for path in ["sample_root"] + list(sample.paths.__dict__.keys()):
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" %
                           (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.data_path.split(" ")) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.mergeBams(
            inputBams=sample.data_path.split(
                " "),  # this is a list of sample paths
            outputBam=sample.unmapped)
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_path = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc(inputBam=sample.data_path,
                    outputDir=sample.paths.sample_root,
                    sampleName=sample.sample_name)
    pipe_manager.run(cmd,
                     os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     shell=True)

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        inputBam=sample.data_path,
        outputFastq=sample.fastq1 if sample.paired else sample.fastq,
        outputFastq2=sample.fastq2 if sample.paired else None,
        unpairedFastq=sample.fastq_unpaired if sample.paired else None)
    pipe_manager.run(cmd,
                     sample.fastq1 if sample.paired else sample.fastq,
                     shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq1unpaired=sample.trimmed1_unpaired
            if sample.paired else None,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            outputFastq2unpaired=sample.trimmed2_unpaired
            if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.resources.adapters,
            log=sample.trimlog)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputPrefix=os.path.join(sample.paths.unmapped,
                                      sample.sample_name),
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            trimLog=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.resources.adapters)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)

    # Map
    pipe_manager.timestamp("Mapping reads with Bowtie2")
    cmd = tk.bowtie2Map(
        inputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
        inputFastq2=sample.trimmed2 if sample.paired else None,
        outputBam=sample.mapped,
        log=sample.aln_rates,
        metrics=sample.aln_metrics,
        genomeIndex=getattr(pipe_manager.resources.genomes, sample.genome),
        maxInsert=pipe_manager.parameters.max_insert,
        cpus=args.cores)
    pipe_manager.run(cmd, sample.mapped, shell=True)

    # Filter reads
    pipe_manager.timestamp("Filtering reads for quality")
    cmd = tk.filterReads(inputBam=sample.mapped,
                         outputBam=sample.filtered,
                         metricsFile=sample.dups_metrics,
                         paired=sample.paired,
                         cpus=args.cores,
                         Q=pipe_manager.parameters.read_quality)
    pipe_manager.run(cmd, sample.filtered, shell=True)

    # Index bams
    pipe_manager.timestamp("Indexing bamfiles with samtools")
    cmd = tk.indexBam(inputBam=sample.mapped)
    pipe_manager.run(cmd, sample.mapped + ".bai", shell=True)
    cmd = tk.indexBam(inputBam=sample.filtered)
    pipe_manager.run(cmd, sample.filtered + ".bai", shell=True)

    # Make tracks
    # right now tracks are only made for bams without duplicates
    pipe_manager.timestamp("Making bigWig tracks from bam file")
    cmd = tk.bamToBigWig(
        inputBam=sample.filtered,
        outputBigWig=sample.bigwig,
        genomeSizes=getattr(pipe_manager.resources.chromosome_sizes,
                            sample.genome),
        genome=sample.genome,
        tagmented=False,  # by default make extended tracks
        normalize=True)
    pipe_manager.run(cmd, sample.bigwig, shell=True)

    # Plot fragment distribution
    if sample.paired and not os.path.exists(sample.insertplot):
        pipe_manager.timestamp("Plotting insert size distribution")
        tk.plotInsertSizesFit(bam=sample.filtered,
                              plot=sample.insertplot,
                              outputCSV=sample.insertdata)
        pipe_manager.report_figure("insert_sizes", sample.insertplot)

    # Count coverage genome-wide
    pipe_manager.timestamp("Calculating genome-wide coverage")
    cmd = tk.genomeWideCoverage(inputBam=sample.filtered,
                                genomeWindows=getattr(
                                    pipe_manager.resources.genome_windows,
                                    sample.genome),
                                output=sample.coverage)
    pipe_manager.run(cmd, sample.coverage, shell=True)

    # Calculate NSC, RSC
    pipe_manager.timestamp("Assessing signal/noise in sample")
    cmd = tk.peakTools(inputBam=sample.filtered,
                       output=sample.qc,
                       plot=sample.qc_plot,
                       cpus=args.cores)
    pipe_manager.run(cmd, sample.qc_plot, shell=True, nofail=True)
    pipe_manager.report_figure("cross_correlation", sample.qc_plot)

    # Call peaks
    pipe_manager.timestamp("Calling peaks with MACS2")
    # make dir for output (macs fails if it does not exist)
    if not os.path.exists(sample.paths.peaks):
        os.makedirs(sample.paths.peaks)

    cmd = tk.macs2CallPeaksATACSeq(treatmentBam=sample.filtered,
                                   outputDir=sample.paths.peaks,
                                   sampleName=sample.sample_name,
                                   genome=sample.genome)
    pipe_manager.run(cmd, sample.peaks, shell=True)

    # Calculate fraction of reads in peaks (FRiP)
    pipe_manager.timestamp("Calculating fraction of reads in peaks (FRiP)")
    cmd = tk.calculateFRiP(inputBam=sample.filtered,
                           inputBed=sample.peaks,
                           output=sample.frip)
    pipe_manager.run(cmd, sample.frip, shell=True)

    print("Finished processing sample %s." % sample.sample_name)
    pipe_manager.stop_pipeline()

Пример #3

Показать файл

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed (and shifted if necessary) Bam files
    along with a UCSC browser track.

    :param Sample sample: individual Sample object to process
    :param pypiper.PipelineManager pipe_manager: PipelineManager to use during
        Sample processing
    :param argparse.Namespace args: binding between command-line option and
        argument, for specifying values various pipeline parameters
    """
    print("Start processing ChIP-seq sample %s." % sample.name)

    for path in ["sample_root"] + list(sample.paths.__dict__.keys()):
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" %
                           (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.input_file_paths) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(input_bams=sample.input_file_paths,
                            merged_bam=sample.unmapped)
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_source = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc(file=sample.data_source,
                    output_dir=sample.paths.sample_root)
    pipe_manager.run(cmd, sample.fastqc_initial_output, shell=False)
    # # rename output
    if os.path.exists(sample.fastqc_initial_output):
        os.rename(sample.fastqc_initial_output, sample.fastqc)
    report_dict(pipe_manager, parse_fastqc(sample.fastqc, prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        input_bam=sample.data_source,
        output_fastq=sample.fastq1 if sample.paired else sample.fastq,
        output_fastq2=sample.fastq2 if sample.paired else None,
        unpaired_fastq=sample.fastq_unpaired if sample.paired else None)
    pipe_manager.run(cmd,
                     sample.fastq1 if sample.paired else sample.fastq,
                     shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            input_fastq1=sample.fastq1 if sample.paired else sample.fastq,
            input_fastq2=sample.fastq2 if sample.paired else None,
            output_fastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            output_fastq1_unpaired=sample.trimmed1_unpaired
            if sample.paired else None,
            output_fastq2=sample.trimmed2 if sample.paired else None,
            output_fastq2_unpaired=sample.trimmed2_unpaired
            if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            input_fastq1=sample.fastq1 if sample.paired else sample.fastq,
            input_fastq2=sample.fastq2 if sample.paired else None,
            output_prefix=os.path.join(sample.paths.unmapped,
                                       sample.sample_name),
            output_fastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            output_fastq2=sample.trimmed2 if sample.paired else None,
            log=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
        report_dict(
            pipe_manager,
            parse_trim_stats(sample.trimlog,
                             prefix="trim_",
                             paired_end=sample.paired))

    # Map
    pipe_manager.timestamp("Mapping reads with Bowtie2")
    cmd = tk.bowtie2_map(
        input_fastq1=sample.trimmed1 if sample.paired else sample.trimmed,
        input_fastq2=sample.trimmed2 if sample.paired else None,
        output_bam=sample.mapped,
        log=sample.aln_rates,
        metrics=sample.aln_metrics,
        genome_index=getattr(pipe_manager.config.resources.genome_index,
                             sample.genome),
        max_insert=pipe_manager.config.parameters.max_insert,
        cpus=args.cores)
    pipe_manager.run(cmd, sample.mapped, shell=True)
    report_dict(
        pipe_manager,
        parse_mapping_stats(sample.aln_rates, paired_end=sample.paired))

    # Filter reads
    pipe_manager.timestamp("Filtering reads for quality")
    cmd = tk.filter_reads(input_bam=sample.mapped,
                          output_bam=sample.filtered,
                          metrics_file=sample.dups_metrics,
                          paired=sample.paired,
                          cpus=args.cores,
                          Q=pipe_manager.config.parameters.read_quality)
    pipe_manager.run(cmd, sample.filtered, shell=True)
    report_dict(pipe_manager, parse_duplicate_stats(sample.dups_metrics))

    # Index bams
    pipe_manager.timestamp("Indexing bamfiles with samtools")
    cmd = tk.index_bam(input_bam=sample.mapped)
    pipe_manager.run(cmd, sample.mapped + ".bai", shell=True)
    cmd = tk.index_bam(input_bam=sample.filtered)
    pipe_manager.run(cmd, sample.filtered + ".bai", shell=True)

    # Plot fragment distribution
    if sample.paired and not os.path.exists(sample.insertplot):
        pipe_manager.timestamp("Plotting insert size distribution")
        tk.plot_atacseq_insert_sizes(bam=sample.filtered,
                                     plot=sample.insertplot,
                                     output_csv=sample.insertdata)

    # Count coverage genome-wide
    pipe_manager.timestamp("Calculating genome-wide coverage")
    cmd = tk.genome_wide_coverage(
        input_bam=sample.filtered,
        genome_windows=getattr(pipe_manager.config.resources.genome_windows,
                               sample.genome),
        output=sample.coverage)
    pipe_manager.run(cmd, sample.coverage, shell=True)

    # Calculate NSC, RSC
    pipe_manager.timestamp("Assessing signal/noise in sample")
    cmd = tk.run_spp(input_bam=sample.filtered,
                     output=sample.qc,
                     plot=sample.qc_plot,
                     cpus=args.cores)
    pipe_manager.run(cmd, sample.qc_plot, shell=True, nofail=True)
    report_dict(pipe_manager, parse_nsc_rsc(sample.qc))

    # If the sample is a control, we're finished.
    # The type/value for the comparison Sample in this case should be either
    # absent or a null-indicative/-suggestive value.
    comparison = getattr(sample, CHIP_COMPARE_COLUMN, None)
    if comparison in [None, "", "NA"]:
        pipe_manager.stop_pipeline()
        print("Finished processing sample {}".format(sample.name))
        return

    # The pipeline will now wait for the comparison sample file to be completed
    pipe_manager._wait_for_file(
        sample.filtered.replace(sample.name, comparison))

    # Call peaks.
    broad_mode = sample.broad
    peaks_folder = sample.paths.peaks
    treatment_file = sample.filtered
    control_file = sample.filtered.replace(sample.name, comparison)
    if not os.path.exists(peaks_folder):
        os.makedirs(peaks_folder)
    # TODO: include the filepaths as caller-neutral positionals/keyword args
    # TODO (cont.) once NGSTK API is tweaked.
    peak_call_kwargs = {
        "output_dir": peaks_folder,
        "broad": broad_mode,
        "qvalue": args.qvalue
    }
    if args.peak_caller == "macs2":
        cmd = tk.macs2_call_peaks(treatment_bams=treatment_file,
                                  control_bams=control_file,
                                  sample_name=sample.name,
                                  pvalue=args.pvalue,
                                  genome=sample.genome,
                                  paired=sample.paired,
                                  **peak_call_kwargs)
    else:
        cmd = tk.spp_call_peaks(treatment_bam=treatment_file,
                                control_bam=control_file,
                                treatment_name=sample.name,
                                control_name=comparison,
                                cpus=args.cpus,
                                **peak_call_kwargs)
    pipe_manager.run(cmd, target=sample.peaks, shell=True)
    report_dict(pipe_manager, parse_peak_number(sample.peaks))

    # Do plotting as desired.
    if args.peak_caller == "macs2" and not broad_mode:
        pipe_manager.timestamp("Plotting MACS2 model")
        model_files_base = sample.name + "_model"

        # Create the command to run the model script.
        name_model_script = model_files_base + ".r"
        path_model_script = os.path.join(peaks_folder, name_model_script)
        exec_model_script = \
            "{} {}".format(pipe_manager.config.tools.Rscript, path_model_script)

        # Create the command to create and rename the model plot.
        plot_name = model_files_base + ".pdf"
        src_plot_path = os.path.join(os.getcwd(), plot_name)
        dst_plot_path = os.path.join(peaks_folder, plot_name)
        rename_model_plot = "mv {} {}".format(src_plot_path, dst_plot_path)

        # Run the model script and rename the model plot.
        pipe_manager.run([exec_model_script, rename_model_plot],
                         target=dst_plot_path,
                         shell=True,
                         nofail=True)

    # Calculate fraction of reads in peaks (FRiP)
    pipe_manager.timestamp("Calculating fraction of reads in peaks (FRiP)")
    cmd = tk.calculate_frip(input_bam=sample.filtered,
                            input_bed=sample.peaks,
                            output=sample.frip,
                            cpus=args.cores)
    pipe_manager.run(cmd, sample.frip, shell=True)
    total = (float(pipe_manager.stats_dict["filtered_single_ends"]) +
             (float(pipe_manager.stats_dict["filtered_paired_ends"]) / 2.))
    report_dict(pipe_manager, parse_frip(sample.frip, total))

    # on an oracle peak list
    if hasattr(pipe_manager.config.resources.oracle_peak_regions,
               sample.genome):
        cmd = calculate_frip(
            input_bam=sample.filtered,
            input_bed=getattr(
                pipe_manager.config.resources.oracle_peak_regions,
                sample.genome),
            output=sample.oracle_frip,
            cpus=args.cores)
        pipe_manager.run(cmd, sample.oracle_frip, shell=True)
        report_dict(pipe_manager,
                    parse_frip(sample.oracle_frip, total, prefix="oracle_"))

    # Make tracks
    track_dir = os.path.dirname(sample.bigwig)
    if not os.path.exists(track_dir):
        os.makedirs(track_dir)
    # right now tracks are only made for bams without duplicates
    pipe_manager.timestamp("Making bigWig tracks from BAM file")
    cmd = bam_to_bigwig(input_bam=sample.filtered,
                        output_bigwig=sample.bigwig,
                        genome=sample.genome,
                        normalization_method="RPGC")
    pipe_manager.run(cmd, sample.bigwig, shell=True)

    print("Finished processing sample %s." % sample.name)
    pipe_manager.stop_pipeline()

Пример #4

Показать файл

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed (and shifted if necessary) Bam files
    along with a UCSC browser track.
    """
    print("Start processing ChIP-seq sample '{}'.".format(sample.name))

    for path in ["sample_root"] + list(sample.paths.__dict__.keys()):
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" %
                           (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.data_path.split(" ")) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(
            input_bams=sample.data_path.split(
                " "),  # this is a list of sample paths
            merged_bam=sample.unmapped)
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_path = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc_rename(input_bam=sample.data_path,
                           output_dir=sample.paths.sample_root,
                           sample_name=sample.sample_name)
    pipe_manager.run(cmd,
                     os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     shell=True)
    report_dict(
        pipe_manager,
        parse_fastqc(os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        inputBam=sample.data_path,
        outputFastq=sample.fastq1 if sample.paired else sample.fastq,
        outputFastq2=sample.fastq2 if sample.paired else None,
        unpairedFastq=sample.fastq_unpaired if sample.paired else None)
    pipe_manager.run(cmd,
                     sample.fastq1 if sample.paired else sample.fastq,
                     shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq1unpaired=sample.trimmed1_unpaired
            if sample.paired else None,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            outputFastq2unpaired=sample.trimmed2_unpaired
            if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputPrefix=os.path.join(sample.paths.unmapped,
                                      sample.sample_name),
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            trimLog=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
        report_dict(
            pipe_manager,
            parse_trim_stats(sample.trimlog,
                             prefix="trim_",
                             paired_end=sample.paired))

    # Map
    pipe_manager.timestamp("Mapping reads with Bowtie2")
    cmd = tk.bowtie2Map(
        inputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
        inputFastq2=sample.trimmed2 if sample.paired else None,
        outputBam=sample.mapped,
        log=sample.aln_rates,
        metrics=sample.aln_metrics,
        genomeIndex=getattr(pipe_manager.config.resources.genomes,
                            sample.genome),
        maxInsert=pipe_manager.config.parameters.max_insert,
        cpus=args.cores)
    pipe_manager.run(cmd, sample.mapped, shell=True)
    report_dict(
        pipe_manager,
        parse_mapping_stats(sample.aln_rates, paired_end=sample.paired))

    # Filter reads
    pipe_manager.timestamp("Filtering reads for quality")
    cmd = tk.filterReads(inputBam=sample.mapped,
                         outputBam=sample.filtered,
                         metricsFile=sample.dups_metrics,
                         paired=sample.paired,
                         cpus=args.cores,
                         Q=pipe_manager.config.parameters.read_quality)
    pipe_manager.run(cmd, sample.filtered, shell=True)
    report_dict(pipe_manager, parse_duplicate_stats(sample.dups_metrics))

    # Index bams
    pipe_manager.timestamp("Indexing bamfiles with samtools")
    cmd = tk.indexBam(inputBam=sample.mapped)
    pipe_manager.run(cmd, sample.mapped + ".bai", shell=True)
    cmd = tk.indexBam(inputBam=sample.filtered)
    pipe_manager.run(cmd, sample.filtered + ".bai", shell=True)

    track_dir = os.path.dirname(sample.bigwig)
    if not os.path.exists(track_dir):
        os.makedirs(track_dir)

    # Make tracks
    # right now tracks are only made for bams without duplicates
    pipe_manager.timestamp("Making bigWig tracks from bam file")
    cmd = bamToBigWig(
        inputBam=sample.filtered,
        outputBigWig=sample.bigwig,
        genomeSizes=getattr(pipe_manager.config.resources.chromosome_sizes,
                            sample.genome),
        genome=sample.genome,
        tagmented=pipe_manager.config.parameters.
        tagmented,  # by default make extended tracks
        normalize=pipe_manager.config.parameters.normalize_tracks,
        norm_factor=pipe_manager.config.parameters.norm_factor)
    pipe_manager.run(cmd, sample.bigwig, shell=True)

    # Plot fragment distribution
    if sample.paired and not os.path.exists(sample.insertplot):
        pipe_manager.timestamp("Plotting insert size distribution")
        tk.plot_atacseq_insert_sizes(bam=sample.filtered,
                                     plot=sample.insertplot,
                                     output_csv=sample.insertdata)
        pipe_manager.report_figure("insert_sizes", sample.insertplot)

    # Count coverage genome-wide
    pipe_manager.timestamp("Calculating genome-wide coverage")
    cmd = tk.genomeWideCoverage(
        inputBam=sample.filtered,
        genomeWindows=getattr(pipe_manager.config.resources.genome_windows,
                              sample.genome),
        output=sample.coverage)
    pipe_manager.run(cmd, sample.coverage, shell=True)

    # Calculate NSC, RSC
    pipe_manager.timestamp("Assessing signal/noise in sample")
    cmd = tk.peakTools(inputBam=sample.filtered,
                       output=sample.qc,
                       plot=sample.qc_plot,
                       cpus=args.cores)
    pipe_manager.run(cmd, sample.qc_plot, shell=True, nofail=True)
    report_dict(pipe_manager, parse_nsc_rsc(sample.qc))
    pipe_manager.report_figure("cross_correlation", sample.qc_plot)

    print("Finished processing sample '{}'.".format(sample.name))
    return pipe_manager

Пример #5

Показать файл

Файл: chipseq.py Проект: epigen/open_pipelines

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed (and shifted if necessary) Bam files
    along with a UCSC browser track.
    """
    print("Start processing ChIP-seq sample '{}'.".format(sample.name))

    for path in ["sample_root"] + list(sample.paths.__dict__.keys()):
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" % (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.data_path.split(" ")) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(
            input_bams=sample.data_path.split(" "),  # this is a list of sample paths
            merged_bam=sample.unmapped
        )
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_path = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc_rename(
        input_bam=sample.data_path,
        output_dir=sample.paths.sample_root,
        sample_name=sample.sample_name
    )
    pipe_manager.run(cmd, os.path.join(sample.paths.sample_root, sample.sample_name + "_fastqc.zip"), shell=True)
    report_dict(pipe_manager, parse_fastqc(os.path.join(sample.paths.sample_root, sample.sample_name + "_fastqc.zip"), prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        inputBam=sample.data_path,
        outputFastq=sample.fastq1 if sample.paired else sample.fastq,
        outputFastq2=sample.fastq2 if sample.paired else None,
        unpairedFastq=sample.fastq_unpaired if sample.paired else None
    )
    pipe_manager.run(cmd, sample.fastq1 if sample.paired else sample.fastq, shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq1unpaired=sample.trimmed1_unpaired if sample.paired else None,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            outputFastq2unpaired=sample.trimmed2_unpaired if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog
        )
        pipe_manager.run(cmd, sample.trimmed1 if sample.paired else sample.trimmed, shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputPrefix=os.path.join(sample.paths.unmapped, sample.sample_name),
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            trimLog=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters
        )
        pipe_manager.run(cmd, sample.trimmed1 if sample.paired else sample.trimmed, shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
        report_dict(pipe_manager, parse_trim_stats(sample.trimlog, prefix="trim_", paired_end=sample.paired))

    # Map
    pipe_manager.timestamp("Mapping reads with Bowtie2")
    cmd = tk.bowtie2Map(
        inputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
        inputFastq2=sample.trimmed2 if sample.paired else None,
        outputBam=sample.mapped,
        log=sample.aln_rates,
        metrics=sample.aln_metrics,
        genomeIndex=getattr(pipe_manager.config.resources.genome_index, sample.genome),
        maxInsert=pipe_manager.config.parameters.max_insert,
        cpus=args.cores
    )
    pipe_manager.run(cmd, sample.mapped, shell=True)
    report_dict(pipe_manager, parse_mapping_stats(sample.aln_rates, paired_end=sample.paired))

    # Filter reads
    pipe_manager.timestamp("Filtering reads for quality")
    cmd = tk.filterReads(
        inputBam=sample.mapped,
        outputBam=sample.filtered,
        metricsFile=sample.dups_metrics,
        paired=sample.paired,
        cpus=args.cores,
        Q=pipe_manager.config.parameters.read_quality
    )
    pipe_manager.run(cmd, sample.filtered, shell=True)
    report_dict(pipe_manager, parse_duplicate_stats(sample.dups_metrics))

    # Index bams
    pipe_manager.timestamp("Indexing bamfiles with samtools")
    cmd = tk.indexBam(inputBam=sample.mapped)
    pipe_manager.run(cmd, sample.mapped + ".bai", shell=True)
    cmd = tk.indexBam(inputBam=sample.filtered)
    pipe_manager.run(cmd, sample.filtered + ".bai", shell=True)

    track_dir = os.path.dirname(sample.bigwig)
    if not os.path.exists(track_dir):
        os.makedirs(track_dir)

    # Report total efficiency
    usable = (
        float(pipe_manager.stats_dict["filtered_single_ends"]) +
        (float(pipe_manager.stats_dict["filtered_paired_ends"]) / 2.))
    total = float(pipe_manager.stats_dict['fastqc_total_pass_filter_reads'])
    report_dict(
        pipe_manager,
        {"total_efficiency": (usable / total) * 100})

    # Make tracks
    track_dir = os.path.dirname(sample.bigwig)
    if not os.path.exists(track_dir):
        os.makedirs(track_dir)
    # right now tracks are only made for bams without duplicates
    pipe_manager.timestamp("Making bigWig tracks from BAM file")
    cmd = bam_to_bigwig(
        input_bam=sample.filtered,
        output_bigwig=sample.bigwig,
        genome=sample.genome,
        normalization_method="RPGC")
    pipe_manager.run(cmd, sample.bigwig, shell=True)

    # Plot fragment distribution
    if sample.paired and not os.path.exists(sample.insertplot):
        pipe_manager.timestamp("Plotting insert size distribution")
        tk.plot_atacseq_insert_sizes(
            bam=sample.filtered,
            plot=sample.insertplot,
            output_csv=sample.insertdata
        )
        pipe_manager.report_figure("insert_sizes", sample.insertplot)

    # Count coverage genome-wide
    pipe_manager.timestamp("Calculating genome-wide coverage")
    cmd = tk.genomeWideCoverage(
        inputBam=sample.filtered,
        genomeWindows=getattr(pipe_manager.config.resources.genome_windows, sample.genome),
        output=sample.coverage
    )
    pipe_manager.run(cmd, sample.coverage, shell=True)

    # Calculate NSC, RSC
    pipe_manager.timestamp("Assessing signal/noise in sample")
    cmd = tk.peakTools(
        inputBam=sample.filtered,
        output=sample.qc,
        plot=sample.qc_plot,
        cpus=args.cores
    )
    pipe_manager.run(cmd, sample.qc_plot, shell=True, nofail=True)
    report_dict(pipe_manager, parse_nsc_rsc(sample.qc))
    pipe_manager.report_figure("cross_correlation", sample.qc_plot)

    print("Finished processing sample '{}'.".format(sample.name))
    return pipe_manager

Пример #6

Показать файл

Файл: atacseq.py Проект: NHLBI-BCB/open_pipelines

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed, shifted Bam files along with a UCSC browser track.
    Peaks are called and filtered.
    """
    print("Start processing ATAC-seq sample %s." % sample.sample_name)

    for path in ["sample_root"] + sample.paths.__dict__.keys():
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" %
                           (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.data_path.split(" ")) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(
            input_bams=sample.data_path.split(
                " "),  # this is a list of sample paths
            merged_bam=sample.unmapped)
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_path = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc_rename(input_bam=sample.data_path,
                           output_dir=sample.paths.sample_root,
                           sample_name=sample.sample_name)
    pipe_manager.run(cmd,
                     os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     shell=True)
    report_dict(
        pipe_manager,
        parse_fastqc(os.path.join(sample.paths.sample_root,
                                  sample.sample_name + "_fastqc.zip"),
                     prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        inputBam=sample.data_path,
        outputFastq=sample.fastq1 if sample.paired else sample.fastq,
        outputFastq2=sample.fastq2 if sample.paired else None,
        unpairedFastq=sample.fastq_unpaired if sample.paired else None)
    pipe_manager.run(cmd,
                     sample.fastq1 if sample.paired else sample.fastq,
                     shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq1unpaired=sample.trimmed1_unpaired
            if sample.paired else None,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            outputFastq2unpaired=sample.trimmed2_unpaired
            if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputPrefix=os.path.join(sample.paths.unmapped,
                                      sample.sample_name),
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            trimLog=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters)
        pipe_manager.run(cmd,
                         sample.trimmed1 if sample.paired else sample.trimmed,
                         shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)

        report_dict(
            pipe_manager,
            parse_trim_stats(sample.trimlog,
                             prefix="trim_",
                             paired_end=sample.paired))

    # Map
    pipe_manager.timestamp("Mapping reads with Bowtie2")
    cmd = tk.bowtie2Map(
        inputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
        inputFastq2=sample.trimmed2 if sample.paired else None,
        outputBam=sample.mapped,
        log=sample.aln_rates,
        metrics=sample.aln_metrics,
        genomeIndex=getattr(pipe_manager.config.resources.genomes,
                            sample.genome),
        maxInsert=pipe_manager.config.parameters.max_insert,
        cpus=args.cores)
    pipe_manager.run(cmd, sample.mapped, shell=True)
    report_dict(
        pipe_manager,
        parse_mapping_stats(sample.aln_rates, paired_end=sample.paired))

    # Get mitochondrial reads
    pipe_manager.timestamp("Getting mitochondrial stats")
    cmd = tk.get_mitochondrial_reads(bam_file=sample.mapped,
                                     output=sample.mitochondrial_stats,
                                     cpus=args.cores)
    pipe_manager.run(cmd, sample.mitochondrial_stats, shell=True, nofail=True)
    report_dict(
        pipe_manager,
        parse_duplicate_stats(sample.mitochondrial_stats, prefix="MT_"))

    # Filter reads
    pipe_manager.timestamp("Filtering reads for quality")
    cmd = tk.filterReads(inputBam=sample.mapped,
                         outputBam=sample.filtered,
                         metricsFile=sample.dups_metrics,
                         paired=sample.paired,
                         cpus=args.cores,
                         Q=pipe_manager.config.parameters.read_quality)
    pipe_manager.run(cmd, sample.filtered, shell=True)
    report_dict(pipe_manager, parse_duplicate_stats(sample.dups_metrics))

    # Shift reads
    if sample.tagmented:
        pipe_manager.timestamp("Shifting reads of tagmented sample")
        cmd = tk.shiftReads(inputBam=sample.filtered,
                            genome=sample.genome,
                            outputBam=sample.filteredshifted)
        pipe_manager.run(cmd, sample.filteredshifted, shell=True)

    # Index bams
    pipe_manager.timestamp("Indexing bamfiles with samtools")
    cmd = tk.indexBam(inputBam=sample.mapped)
    pipe_manager.run(cmd, sample.mapped + ".bai", shell=True)
    cmd = tk.indexBam(inputBam=sample.filtered)
    pipe_manager.run(cmd, sample.filtered + ".bai", shell=True)
    if sample.tagmented:
        cmd = tk.indexBam(inputBam=sample.filteredshifted)
        pipe_manager.run(cmd, sample.filteredshifted + ".bai", shell=True)

    track_dir = os.path.dirname(sample.bigwig)
    if not os.path.exists(track_dir):
        os.makedirs(track_dir)

    # Make tracks
    # right now tracks are only made for bams without duplicates
    pipe_manager.timestamp("Making bigWig tracks from bam file")
    cmd = bamToBigWig(
        inputBam=sample.filtered,
        outputBigWig=sample.bigwig,
        genomeSizes=getattr(pipe_manager.config.resources.chromosome_sizes,
                            sample.genome),
        genome=sample.genome,
        tagmented=pipe_manager.config.parameters.
        tagmented,  # by default make extended tracks
        normalize=pipe_manager.config.parameters.normalize_tracks,
        norm_factor=pipe_manager.config.parameters.norm_factor)
    pipe_manager.run(cmd, sample.bigwig, shell=True)

    # Plot fragment distribution
    if sample.paired and not os.path.exists(sample.insertplot):
        pipe_manager.timestamp("Plotting insert size distribution")
        tk.plot_atacseq_insert_sizes(bam=sample.filtered,
                                     plot=sample.insertplot,
                                     output_csv=sample.insertdata)
        pipe_manager.report_figure("insert_sizes", sample.insertplot)

    # Count coverage genome-wide
    pipe_manager.timestamp("Calculating genome-wide coverage")
    cmd = tk.genomeWideCoverage(
        inputBam=sample.filtered,
        genomeWindows=getattr(pipe_manager.config.resources.genome_windows,
                              sample.genome),
        output=sample.coverage)
    pipe_manager.run(cmd, sample.coverage, shell=True)

    # Calculate NSC, RSC
    pipe_manager.timestamp("Assessing signal/noise in sample")
    cmd = tk.peakTools(inputBam=sample.filtered,
                       output=sample.qc,
                       plot=sample.qc_plot,
                       cpus=args.cores)
    pipe_manager.run(cmd, sample.qc_plot, shell=True, nofail=True)
    report_dict(pipe_manager, parse_nsc_rsc(sample.qc))
    pipe_manager.report_figure("cross_correlation", sample.qc_plot)

    # Call peaks
    pipe_manager.timestamp("Calling peaks with MACS2")
    # make dir for output (macs fails if it does not exist)
    if not os.path.exists(sample.paths.peaks):
        os.makedirs(sample.paths.peaks)

    cmd = tk.macs2CallPeaksATACSeq(treatmentBam=sample.filtered,
                                   outputDir=sample.paths.peaks,
                                   sampleName=sample.sample_name,
                                   genome=sample.genome)
    pipe_manager.run(cmd, sample.peaks, shell=True)
    report_dict(pipe_manager, parse_peak_number(sample.peaks))

    # Filter peaks
    if hasattr(pipe_manager.config.resources.blacklisted_regions,
               sample.genome):
        pipe_manager.timestamp("Filtering peaks from blacklisted regions")
        cmd = filter_peaks(
            peaks=sample.peaks,
            exclude=getattr(pipe_manager.config.resources.blacklisted_regions,
                            sample.genome),
            filtered_peaks=sample.filtered_peaks)
        pipe_manager.run(cmd, sample.filtered_peaks, shell=True)
        report_dict(
            pipe_manager,
            parse_peak_number(sample.filtered_peaks, prefix="filtered_"))

    # Calculate fraction of reads in peaks (FRiP)
    pipe_manager.timestamp("Calculating fraction of reads in peaks (FRiP)")
    # on the sample's peaks
    cmd = tk.calculate_FRiP(inputBam=sample.filtered,
                            inputBed=sample.peaks,
                            output=sample.frip,
                            cpus=args.cores)
    pipe_manager.run(cmd, sample.frip, shell=True)
    total = (float(pipe_manager.stats_dict["filtered_single_ends"]) +
             (float(pipe_manager.stats_dict["filtered_paired_ends"]) / 2.))
    report_dict(pipe_manager, parse_FRiP(sample.frip, total))

    # on an oracle peak list
    if hasattr(pipe_manager.config.resources.oracle_peak_regions,
               sample.genome):
        cmd = tk.calculate_FRiP(
            inputBam=sample.filtered,
            inputBed=getattr(pipe_manager.config.resources.oracle_peak_regions,
                             sample.genome),
            output=sample.oracle_frip,
            cpus=args.cores)
        pipe_manager.run(cmd, sample.oracle_frip, shell=True)
        report_dict(pipe_manager,
                    parse_FRiP(sample.oracle_frip, total, prefix="oracle_"))

    # Finish up
    print(pipe_manager.stats_dict)

    pipe_manager.stop_pipeline()
    print("Finished processing sample %s." % sample.sample_name)

Пример #7

Показать файл

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed, shifted Bam files along with a UCSC browser track.
    Peaks are called and filtered.
    """
    print("Start processing ATAC-seq sample %s." % sample.sample_name)

    # for path in ["sample_root"] + list(sample.__dict__.keys()):
    for path in [
            "sample_root",
            "unmapped_dir",
            "mapped_dir",
            "peaks_dir",
            "coverage_dir",
            "tss_dir",
    ]:
        p = getattr(sample, path)
        try:
            exists = os.path.exists(p)
        except TypeError:
            continue
        if not exists:
            msg = "Cannot create '{}' path: {}".format(path, p)
            try:
                os.mkdir(p)
            except OSError(msg):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    # if len(sample.data_source.split(" ")) > 1:
    if (type(sample.data_source) == list) & (len(sample.data_source) > 1):
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(
            input_bams=sample.data_source,  # this is a list of sample paths
            merged_bam=sample.unmapped,
        )
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_source = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    if not os.path.exists(sample.fastqc):
        cmd = tk.fastqc(file=sample.data_source, output_dir=sample.sample_root)
        pipe_manager.run(cmd, sample.fastqc_initial_output, shell=False)
    # # rename output
    if os.path.exists(sample.fastqc_initial_output):
        os.rename(sample.fastqc_initial_output, sample.fastqc)
    report_dict(pipe_manager, parse_fastqc(sample.fastqc, prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        input_bam=sample.data_source,
        output_fastq=sample.fastq1 if sample.paired else sample.fastq,
        output_fastq2=sample.fastq2 if sample.paired else None,
        unpaired_fastq=sample.fastq_unpaired if sample.paired else None,
    )
    pipe_manager.run(cmd,
                     sample.fastq1 if sample.paired else sample.fastq,
                     shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            input_fastq1=sample.fastq1 if sample.paired else sample.fastq,
            input_fastq2=sample.fastq2 if sample.paired else None,
            output_fastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            output_fastq1_unpaired=sample.trimmed1_unpaired
            if sample.paired else None,
            output_fastq2=sample.trimmed2 if sample.paired else None,
            output_fastq2_unpaired=sample.trimmed2_unpaired
            if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog,
        )
        pipe_manager.run(
            cmd,
            sample.trimmed1 if sample.paired else sample.trimmed,
            shell=True,
        )
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            input_fastq1=sample.fastq1 if sample.paired else sample.fastq,
            input_fastq2=sample.fastq2 if sample.paired else None,
            output_prefix=pjoin(sample.unmapped_dir, sample.sample_name),
            output_fastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            output_fastq2=sample.trimmed2 if sample.paired else None,
            log=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
        )
        pipe_manager.run(
            cmd,
            sample.trimmed1 if sample.paired else sample.trimmed,
            shell=True,
        )
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)

        report_dict(
            pipe_manager,
            parse_trim_stats(sample.trimlog,
                             prefix="trim_",
                             paired_end=sample.paired),
        )

    # Map
    pipe_manager.timestamp("Mapping reads with Bowtie2")
    cmd = tk.bowtie2_map(
        input_fastq1=sample.trimmed1 if sample.paired else sample.trimmed,
        input_fastq2=sample.trimmed2 if sample.paired else None,
        output_bam=sample.mapped,
        log=sample.aln_rates,
        metrics=sample.aln_metrics,
        genome_index=getattr(pipe_manager.config.resources.genome_index,
                             sample.genome),
        max_insert=pipe_manager.config.parameters.max_insert,
        cpus=args.cores,
    )
    pipe_manager.run(cmd, sample.mapped, shell=True)
    report_dict(
        pipe_manager,
        parse_mapping_stats(sample.aln_rates, paired_end=sample.paired),
    )

    # Get mitochondrial reads
    pipe_manager.timestamp("Getting mitochondrial stats")
    cmd = tk.get_mitochondrial_reads(
        bam_file=sample.mapped,
        output=sample.mitochondrial_stats,
        cpus=args.cores,
    )
    pipe_manager.run(cmd, sample.mitochondrial_stats, shell=True, nofail=True)
    report_dict(
        pipe_manager,
        parse_duplicate_stats(sample.mitochondrial_stats, prefix="MT_"),
    )

    # Filter reads
    pipe_manager.timestamp("Filtering reads for quality")
    cmd = tk.filter_reads(
        input_bam=sample.mapped,
        output_bam=sample.filtered,
        metrics_file=sample.dups_metrics,
        paired=sample.paired,
        cpus=args.cores,
        Q=pipe_manager.config.parameters.read_quality,
    )
    pipe_manager.run(cmd, sample.filtered, shell=True)
    report_dict(pipe_manager, parse_duplicate_stats(sample.dups_metrics))

    # Index bams
    pipe_manager.timestamp("Indexing bamfiles with samtools")
    cmd = tk.index_bam(input_bam=sample.mapped)
    pipe_manager.run(cmd, sample.mapped + ".bai", shell=True)
    cmd = tk.index_bam(input_bam=sample.filtered)
    pipe_manager.run(cmd, sample.filtered + ".bai", shell=True)

    # Shift reads
    if args.shift_reads:
        pipe_manager.timestamp("Shifting reads of tagmented sample")
        cmd = tk.shift_reads(
            input_bam=sample.filtered,
            genome=sample.genome,
            output_bam=sample.filteredshifted,
        )
        pipe_manager.run(cmd, sample.filteredshifted, shell=True)

        cmd = tk.index_bam(input_bam=sample.filteredshifted)
        pipe_manager.run(cmd, sample.filteredshifted + ".bai", shell=True)

    # Run TSS enrichment
    tss_enrichment = run_tss_analysis(
        sample=sample,
        bam_file=sample.filtered,
        chrom_file=getattr(pipe_manager.config.resources.chromosome_sizes,
                           sample.genome),
        tss_file=getattr(pipe_manager.config.resources.unique_tss,
                         sample.genome),
    )
    report_dict(pipe_manager, {"tss_enrichment": tss_enrichment})

    # Call peaks
    pipe_manager.timestamp("Calling peaks with MACS2")
    # make dir for output (macs fails if it does not exist)
    if not os.path.exists(os.path.dirname(sample.peaks)):
        os.makedirs(os.path.dirname(sample.peaks))

    cmd = tk.macs2_call_peaks_atacseq(
        treatment_bam=sample.filtered,
        output_dir=sample.peaks_dir,
        sample_name=sample.sample_name,
        genome=sample.genome,
    )
    pipe_manager.run(cmd, sample.peaks, shell=True)
    report_dict(pipe_manager, parse_peak_number(sample.peaks))

    # Calculate fraction of reads in peaks (FRiP)
    pipe_manager.timestamp("Calculating fraction of reads in peaks (FRiP)")
    cmd = tk.calculate_frip(
        input_bam=sample.filtered,
        input_bed=sample.peaks,
        output=sample.frip,
        cpus=args.cores,
    )
    pipe_manager.run(cmd, sample.frip, shell=True)
    total = float(pipe_manager.stats_dict["filtered_single_ends"]) + (
        float(pipe_manager.stats_dict["filtered_paired_ends"]) / 2.0)
    report_dict(pipe_manager, parse_frip(sample.frip, total))

    # on an oracle peak list
    if hasattr(pipe_manager.config.resources.oracle_peak_regions,
               sample.genome):
        cmd = calculate_frip(
            input_bam=sample.filtered,
            input_bed=getattr(
                pipe_manager.config.resources.oracle_peak_regions,
                sample.genome),
            output=sample.oracle_frip,
            cpus=args.cores,
        )
        pipe_manager.run(cmd, sample.oracle_frip, shell=True)
        report_dict(
            pipe_manager,
            parse_frip(sample.oracle_frip, total, prefix="oracle_"),
        )

    # Plot fragment distribution
    if sample.paired and not os.path.exists(sample.insertplot):
        pipe_manager.timestamp("Plotting insert size distribution")
        tk.plot_atacseq_insert_sizes(
            bam=sample.filtered,
            plot=sample.insertplot,
            output_csv=sample.insertdata,
        )

    # # Count coverage genome-wide
    # pipe_manager.timestamp("Calculating genome-wide coverage")
    # cmd = tk.genome_wide_coverage(
    #     input_bam=sample.filtered,
    #     genome_windows=getattr(pipe_manager.config.resources.genome_windows, sample.genome),
    #     output=sample.coverage)
    # pipe_manager.run(cmd, sample.coverage, shell=True)

    # Calculate NSC, RSC
    pipe_manager.timestamp("Assessing signal/noise in sample")
    cmd = tk.run_spp(
        input_bam=sample.filtered,
        output=sample.qc,
        plot=sample.qc_plot,
        cpus=args.cores,
    )
    pipe_manager.run(cmd, sample.qc_plot, shell=True, nofail=True)
    report_dict(pipe_manager, parse_nsc_rsc(sample.qc))

    # Make tracks
    track_dir = os.path.dirname(sample.bigwig)
    if not os.path.exists(track_dir):
        os.makedirs(track_dir)
    # right now tracks are only made for bams without duplicates
    pipe_manager.timestamp("Making bigWig tracks from BAM file")
    cmd = bam_to_bigwig(
        input_bam=sample.filtered,
        output_bigwig=sample.bigwig,
        genome=sample.genome,
        normalization_method="RPGC",
    )
    pipe_manager.run(cmd, sample.bigwig, shell=True)

    print(pipe_manager.stats_dict)

    pipe_manager.stop_pipeline()
    print("Finished processing sample %s." % sample.sample_name)

Пример #8

Показать файл

Файл: rnaseq.py Проект: epigen/open_pipelines

def process(sample, pipe_manager, args):
    """
    This takes unmapped Bam files and makes trimmed, aligned, duplicate marked
    and removed, indexed, shifted Bam files along with a UCSC browser track.
    Peaks are called and filtered.
    """
    print("Start processing RNA-seq sample %s." % sample.sample_name)

    for path in ["sample_root"] + list(sample.paths.__dict__.keys()):
        try:
            exists = os.path.exists(sample.paths[path])
        except TypeError:
            continue
        if not exists:
            try:
                os.mkdir(sample.paths[path])
            except OSError("Cannot create '%s' path: %s" % (path, sample.paths[path])):
                raise

    # Create NGSTk instance
    tk = NGSTk(pm=pipe_manager)

    # Merge Bam files if more than one technical replicate
    if len(sample.data_path.split(" ")) > 1:
        pipe_manager.timestamp("Merging bam files from replicates")
        cmd = tk.merge_bams(
            # this is a list of sample paths
            input_bams=sample.data_path.split(" "),
            merged_bam=sample.unmapped
        )
        pipe_manager.run(cmd, sample.unmapped, shell=True)
        sample.data_path = sample.unmapped

    # Fastqc
    pipe_manager.timestamp("Measuring sample quality with Fastqc")
    cmd = tk.fastqc_rename(
        input_bam=sample.data_path,
        output_dir=sample.paths.sample_root,
        sample_name=sample.sample_name
    )
    pipe_manager.run(cmd, os.path.join(sample.paths.sample_root,
                                       sample.sample_name + "_fastqc.zip"), shell=True)
    report_dict(pipe_manager, parse_fastqc(os.path.join(
        sample.paths.sample_root, sample.sample_name + "_fastqc.zip"), prefix="fastqc_"))

    # Convert bam to fastq
    pipe_manager.timestamp("Converting to Fastq format")
    cmd = tk.bam2fastq(
        inputBam=sample.data_path,
        outputFastq=sample.fastq1 if sample.paired else sample.fastq,
        outputFastq2=sample.fastq2 if sample.paired else None,
        unpairedFastq=sample.fastq_unpaired if sample.paired else None
    )
    pipe_manager.run(
        cmd, sample.fastq1 if sample.paired else sample.fastq, shell=True)
    if not sample.paired:
        pipe_manager.clean_add(sample.fastq, conditional=True)
    if sample.paired:
        pipe_manager.clean_add(sample.fastq1, conditional=True)
        pipe_manager.clean_add(sample.fastq2, conditional=True)
        pipe_manager.clean_add(sample.fastq_unpaired, conditional=True)

    # Trim reads
    pipe_manager.timestamp("Trimming adapters from sample")
    if pipe_manager.config.parameters.trimmer == "trimmomatic":
        cmd = tk.trimmomatic(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq1unpaired=sample.trimmed1_unpaired if sample.paired else None,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            outputFastq2unpaired=sample.trimmed2_unpaired if sample.paired else None,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters,
            log=sample.trimlog
        )
        pipe_manager.run(
            cmd, sample.trimmed1 if sample.paired else sample.trimmed, shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed1_unpaired, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)
            pipe_manager.clean_add(sample.trimmed2_unpaired, conditional=True)

    elif pipe_manager.config.parameters.trimmer == "skewer":
        cmd = tk.skewer(
            inputFastq1=sample.fastq1 if sample.paired else sample.fastq,
            inputFastq2=sample.fastq2 if sample.paired else None,
            outputPrefix=os.path.join(
                sample.paths.unmapped, sample.sample_name),
            outputFastq1=sample.trimmed1 if sample.paired else sample.trimmed,
            outputFastq2=sample.trimmed2 if sample.paired else None,
            trimLog=sample.trimlog,
            cpus=args.cores,
            adapters=pipe_manager.config.resources.adapters
        )
        pipe_manager.run(
            cmd, sample.trimmed1 if sample.paired else sample.trimmed, shell=True)
        if not sample.paired:
            pipe_manager.clean_add(sample.trimmed, conditional=True)
        else:
            pipe_manager.clean_add(sample.trimmed1, conditional=True)
            pipe_manager.clean_add(sample.trimmed2, conditional=True)

        report_dict(pipe_manager, parse_trim_stats(
            sample.trimlog, prefix="trim_", paired_end=sample.paired))

    # Quantify gene expression
    pipe_manager.timestamp("Quantifying expression with Kallisto")
    cmd = kallisto(
        fastq_files=[sample.trimmed1, sample.trimmed2] if sample.paired else [sample.trimmed],
        kallisto_index=getattr(pipe_manager.config.resources.kallisto_index, sample.genome),
        read_type=sample.read_type,
        output_dir=sample.kallisto_output_dir,
        threads=args.cores,
        bootstrap_number=pipe_manager.config.parameters.bootstrap_number,
        fragment_size=pipe_manager.config.parameters.fragment_size,
        fragment_std=pipe_manager.config.parameters.fragment_std)
    pipe_manager.run(cmd, sample.kallisto_quantification, shell=True)
    report_dict(pipe_manager, parse_kallisto_stats(sample.kallisto_quantification))

    # Finish up
    print(pipe_manager.stats_dict)

    pipe_manager.stop_pipeline()
    print("Finished processing sample %s." % sample.sample_name)