Python PipelineMappingQC示例，CGATPipelines.PipelineMappingQC Python示例

示例#1

0

显示文件

文件： pipeline_exome_cancer.py 项目： gjaime/CGATPipelines

def buildCoverageStats(infile, outfile):
    '''Generate coverage statistics for regions of interest from a
       bed file using Picard'''

    # TS check whether this is always required or specific to current baits file

    # baits file requires modification to make picard accept it
    # this is performed before CalculateHsMetrics
    to_cluster = USECLUSTER
    baits = PARAMS["roi_baits"]
    modified_baits = infile + "_temp_baits_final.bed"
    regions = PARAMS["roi_regions"]
    statement = '''samtools view -H %(infile)s > %(infile)s_temp_header.txt;
                awk 'NR>2' %(baits)s |
                awk -F '\\t' 'BEGIN { OFS="\\t" } {print $1,$2,$3,"+",$4;}'
                > %(infile)s_temp_baits.bed;
                cat  %(infile)s_temp_header.txt %(infile)s_temp_baits.bed
                > %(modified_baits)s; checkpoint ;
                rm -rf %(infile)s_temp_baits.bed %(infile)s_temp_header.txt
                '''
    P.run()

    PipelineMappingQC.buildPicardCoverageStats(
        infile, outfile, modified_baits, modified_baits)

    IOTools.zapFile(modified_baits)

示例#2

0

显示文件

def buildCoverageStats(infile, outfile):
    '''Generate coverage statistics for regions of interest from a
       bed file using Picard'''

    # TS check whether this is always required or specific to current baits
    # file

    # baits file requires modification to make picard accept it
    # this is performed before CalculateHsMetrics
    to_cluster = USECLUSTER
    baits = PARAMS["roi_baits"]
    modified_baits = infile + "_temp_baits_final.bed"
    regions = PARAMS["roi_regions"]
    statement = '''samtools view -H %(infile)s > %(infile)s_temp_header.txt;
                awk 'NR>2' %(baits)s |
                awk -F '\\t' 'BEGIN { OFS="\\t" } {print $1,$2,$3,"+",$4;}'
                > %(infile)s_temp_baits.bed;
                cat  %(infile)s_temp_header.txt %(infile)s_temp_baits.bed
                > %(modified_baits)s; checkpoint ;
                rm -rf %(infile)s_temp_baits.bed %(infile)s_temp_header.txt
                '''
    P.run()

    PipelineMappingQC.buildPicardCoverageStats(infile, outfile, modified_baits,
                                               modified_baits)

    IOTools.zapFile(modified_baits)

示例#3

0

显示文件

def buildPicardStats(infile, outfile):
    '''build alignment stats using picard.
    Note that picards counts reads but they are in fact alignments.
    '''
    if PARAMS["pool_reads"]:
        reffile = os.path.join(
            os.path.dirname(infile), "agg-agg-agg.filtered.contigs.fa")
    else:
        reffile = P.snip(infile, ".bam") + ".fa"
    PipelineMappingQC.buildPicardAlignmentStats(infile,
                                                outfile,
                                                reffile)

示例#4

0

显示文件

文件： pipeline_exome_cancer.py 项目： hainm/CGATPipelines

def runPicardOnRealigned(infile, outfile):
    to_cluster = USECLUSTER
    job_options = getGATKOptions()
    # TS no multithreading so why 6 threads?
    # job_threads = 6
    tmpdir_gatk = P.getTempDir('/ifs/scratch')
    # threads = PARAMS["gatk_threads"]

    outfile_tumor = outfile.replace("Control", PARAMS["mutect_tumour"])
    infile_tumor = infile.replace("Control", PARAMS["mutect_tumour"])

    track = P.snip(os.path.basename(infile), ".bam")
    track_tumor = track.replace("Control", PARAMS["mutect_tumour"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome)
    PipelineMappingQC.buildPicardAlignmentStats(infile_tumor,
                                                outfile_tumor, genome)

    # check above functions then remove statement
    statement = '''
    cat %(infile)s
    | python %%(scriptsdir)s/bam2bam.py -v 0 --method=set-sequence
    | CollectMultipleMetrics
    INPUT=/dev/stdin
    REFERENCE_SEQUENCE=%%(bwa_index_dir)s/%%(genome)s.fa
    ASSUME_SORTED=true
    OUTPUT=%(outfile)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile)s;
    cat %(infile_tumor)s
    | python %%(scriptsdir)s/bam2bam.py -v 0
    --method=set-sequence --output-sam
    | CollectMultipleMetrics
    INPUT=/dev/stdin
    REFERENCE_SEQUENCE=%%(bwa_index_dir)s/%%(genome)s.fa
    ASSUME_SORTED=true
    OUTPUT=%(outfile_tumor)s
    VALIDATION_STRINGENCY=SILENT
    >& %(outfile_tumor)s;''' % locals()

示例#5

0

显示文件

def runPicardOnRealigned(infile, outfile):
    to_cluster = USECLUSTER
    job_memory = PARAMS["gatk_memory"]

    tmpdir_gatk = P.getTempDir()

    outfile_tumor = outfile.replace(PARAMS["sample_control"],
                                    PARAMS["sample_tumour"])
    infile_tumor = infile.replace(PARAMS["sample_control"],
                                  PARAMS["sample_tumour"])

    track = P.snip(os.path.basename(infile), ".bam")
    track_tumor = track.replace(PARAMS["sample_control"],
                                PARAMS["sample_tumour"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"])

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome)
    PipelineMappingQC.buildPicardAlignmentStats(infile_tumor, outfile_tumor,
                                                genome)

示例#6

0

显示文件

文件： pipeline_exome_cancer.py 项目： gjaime/CGATPipelines

def runPicardOnRealigned(infile, outfile):
    to_cluster = USECLUSTER
    job_memory = PARAMS["gatk_memory"]

    tmpdir_gatk = P.getTempDir()

    outfile_tumor = outfile.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])
    infile_tumor = infile.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])

    track = P.snip(os.path.basename(infile), ".bam")
    track_tumor = track.replace(
        PARAMS["sample_control"], PARAMS["sample_tumour"])

    genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"],
                           PARAMS["genome"])

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome)
    PipelineMappingQC.buildPicardAlignmentStats(infile_tumor,
                                                outfile_tumor, genome)

示例#7

0

显示文件

文件： pipeline_medip.py 项目： Acribbs/CGATPipelines

def buildBAMStats(infile, outfile):
    '''Count number of reads mapped, duplicates, etc. '''
    PipelineMappingQC.buildBAMStats(infile, outfile)

示例#8

0

显示文件

文件： pipeline_medip.py 项目： Acribbs/CGATPipelines

def buildPicardGCStats(infile, outfile):
    '''Gather BAM file GC bias stats using Picard '''
    PipelineMappingQC.buildPicardGCStats(
        infile, outfile,
        os.path.join(PARAMS["bwa_index_dir"], PARAMS["genome"] + ".fa"))

示例#9

0

显示文件

文件： pipeline_medip.py 项目： Acribbs/CGATPipelines

def loadPicardAlignmentStats(infiles, outfile):
    '''Merge Picard alignment stats into single table and load into SQLite.'''

    PipelineMappingQC.loadPicardAlignmentStats(infiles, outfile)

示例#10

0

显示文件

文件： pipeline_medip.py 项目： Acribbs/CGATPipelines

def buildPicardAlignmentStats(infile, outfile):
    '''Gather BAM file alignment statistics using Picard '''

    PipelineMappingQC.buildPicardAlignmentStats(
        infile, outfile,
        os.path.join(PARAMS["bwa_index_dir"], PARAMS["genome"] + ".fa"))

示例#11

0

显示文件

文件： pipeline_medip.py 项目： Acribbs/CGATPipelines

def loadPicardDuplicateStats(infiles, outfile):
    '''Merge Picard duplicate stats into single table and load into SQLite.
    '''
    PipelineMappingQC.loadPicardDuplicateStats(infiles, outfile)

示例#12

0

显示文件

def loadCoverageStats(infiles, outfile):
    PipelineMappingQC.loadPicardCoverageStats(infiles, outfile)

示例#13

0

显示文件

文件： pipeline_medip.py 项目： lesheng/cgat

def buildPicardAlignmentStats(infile, outfile):
    '''Gather BAM file alignment statistics using Picard '''

    PipelineMappingQC.buildPicardAlignmentStats(infile, outfile,
                                                os.path.join(PARAMS["bwa_index_dir"],
                                                             PARAMS["genome"] + ".fa"))

示例#14

0

显示文件

文件： pipeline_rnaseqqc.py 项目： gjaime/CGATPipelines

def loadBAMStats(infiles, outfile):
    ''' load bam statistics into bam_stats table '''
    PipelineMappingQC.loadBAMStats(infiles, outfile)

示例#15

0

显示文件

文件： pipeline_medip.py 项目： Acribbs/CGATPipelines

def loadBAMStats(infiles, outfile):
    '''Import bam statistics into SQLite'''
    PipelineMappingQC.loadBAMStats(infiles, outfile)

示例#16

0

显示文件

文件： pipeline_exome.py 项目： Charlie-George/cgat

def loadPicardAlignStats(infiles, outfile):
    '''Merge Picard alignment stats into single table and load into SQLite.'''
    PipelineMappingQC.loadPicardAlignmentStats(infiles, outfile)

示例#17

0

显示文件

文件： pipeline_exome_cancer.py 项目： gjaime/CGATPipelines

def loadCoverageStats(infiles, outfile):
    PipelineMappingQC.loadPicardCoverageStats(infiles, outfile)

示例#18

0

显示文件

文件： pipeline_medip.py 项目： lesheng/cgat

def buildPicardGCStats(infile, outfile):
    '''Gather BAM file GC bias stats using Picard '''
    PipelineMappingQC.buildPicardGCStats(infile, outfile,
                                         os.path.join(PARAMS["bwa_index_dir"],
                                                      PARAMS["genome"] + ".fa"))

示例#19

0

显示文件

文件： pipeline_medip.py 项目： lesheng/cgat

def loadBAMStats(infiles, outfile):
    '''Import bam statistics into SQLite'''
    PipelineMappingQC.loadBAMStats(infiles, outfile)

示例#20

0

显示文件

文件： pipeline_medip.py 项目： lesheng/cgat

def buildBAMStats(infile, outfile):
    '''Count number of reads mapped, duplicates, etc. '''
    PipelineMappingQC.buildBAMStats(infile, outfile)

示例#21

0

显示文件

文件： pipeline_windows.py 项目： pombredanne/cgat

def loadPicardDuplicateStats( infiles, outfile ):
    '''Merge Picard duplicate stats into single table and load into SQLite.
    '''
    PipelineMappingQC.loadPicardDuplicateStats( infiles, outfile, pipeline_suffix = ".bed.gz" )

示例#22

0

显示文件

def loadPicardStats(infiles, outfile):
    '''merge alignment stats into single tables.'''

    PipelineMappingQC.loadPicardAlignmentStats(infiles, outfile)