def __init__(self, outdir, first, second, third, **kwargs):
        PypedreamPipeline.__init__(self, outdir, **kwargs)

        rnd1 = Urandom()
        rnd1.output = outdir + "/" + first
        self.add(rnd1)

        rnd2 = Ifail()
        rnd2.output = outdir + "/" + second
        self.add(rnd2)

        cat1 = Cat()
        cat1.input = [rnd1.output, rnd2.output]
        cat1.output = outdir + "/" + third
        self.add(cat1)
示例#2
0
    def __init__(self, outdir, first, second, third, **kwargs):
        PypedreamPipeline.__init__(self, outdir, **kwargs)

        rnd1 = Urandom()
        rnd1.jobname = "urandom-{}".format(first)
        rnd1.output = outdir + "/" + first
        rnd1.threads = 1
        self.add(rnd1)

        rnd2 = Urandom()
        rnd2.jobname = "urandom-{}".format(second)
        rnd2.output = outdir + "/" + second
        rnd2.is_intermediate = True
        self.add(rnd2)

        cat1 = Cat()
        cat1.jobname = "cat1-{}".format(third)
        cat1.input = [rnd1.output, rnd2.output]
        cat1.output = outdir + "/" + third
        self.add(cat1)
示例#3
0
def align_se(pipeline, fq1_files, clinseq_barcode, ref, outdir, maxcores, remove_duplicates=True):
    """
    Align single end data
    :param pipeline:
    :param fq1_files:
    :param lib:
    :param ref:
    :param outdir:
    :param maxcores:
    :param remove_duplicates:
    :return:
    """
    logging.debug("Aligning files: {}".format(fq1_files))
    fq1_abs = [normpath(x) for x in fq1_files]
    fq1_trimmed = []
    for fq1 in fq1_abs:
        skewer = Skewer()
        skewer.input1 = fq1
        skewer.input2 = None
        skewer.output1 = outdir + "/skewer/{}".format(os.path.basename(fq1))
        skewer.output2 = outdir + "/skewer/unused-dummyfq2-{}".format(os.path.basename(fq1))
        skewer.stats = outdir + "/skewer/skewer-stats-{}.log".format(os.path.basename(fq1))
        skewer.threads = maxcores
        skewer.jobname = "skewer/{}".format(os.path.basename(fq1))
        skewer.scratch = pipeline.scratch
        skewer.is_intermediate = True
        fq1_trimmed.append(skewer.output1)
        pipeline.add(skewer)


    cat1 = Cat()
    cat1.input = fq1_trimmed
    cat1.output = outdir + "/skewer/{}_1.fastq.gz".format(clinseq_barcode)
    cat1.jobname = "cat/{}".format(clinseq_barcode)
    cat1.is_intermediate = False
    pipeline.add(cat1)

    bwa = Bwa()
    bwa.input_fastq1 = cat1.output
    bwa.input_reference_sequence = ref
    bwa.remove_duplicates = remove_duplicates

    library_id = parse_prep_id(clinseq_barcode)
    sample_string = compose_sample_str(extract_unique_capture(clinseq_barcode))

    bwa.readgroup = "\"@RG\\tID:{rg_id}\\tSM:{rg_sm}\\tLB:{rg_lb}\\tPL:ILLUMINA\"".format(\
        rg_id=clinseq_barcode, rg_sm=sample_string, rg_lb=library_id)

    bwa.threads = maxcores
    bwa.output = "{}/{}.bam".format(outdir, clinseq_barcode)
    bwa.scratch = pipeline.scratch
    bwa.jobname = "bwa/{}".format(clinseq_barcode)
    bwa.is_intermediate = False
    pipeline.add(bwa)

    return bwa.output
示例#4
0
def fq_trimming(pipeline, fq1_files, fq2_files, clinseq_barcode, ref, outdir, maxcores=1, flag=True):
    fq1_abs = [normpath(x) for x in fq1_files]
    fq2_abs = [normpath(x) for x in fq2_files]
    logging.debug("Trimming {} and {}".format(fq1_abs, fq2_abs))
    pairs = [(fq1_abs[k], fq2_abs[k]) for k in range(len(fq1_abs))]

    fq1_trimmed = []
    fq2_trimmed = []

    for fq1, fq2 in pairs:
        skewer = Skewer()
        skewer.input1 = fq1
        skewer.input2 = fq2
        skewer.output1 = outdir + "/skewer/libs/{}".format(os.path.basename(fq1))
        skewer.output2 = outdir + "/skewer/libs/{}".format(os.path.basename(fq2))
        skewer.stats = outdir + "/skewer/libs/skewer-stats-{}.log".format(os.path.basename(fq1))
        skewer.threads = maxcores
        skewer.jobname = "skewer/{}".format(os.path.basename(fq1))
        skewer.scratch = pipeline.scratch
        skewer.is_intermediate = True
        fq1_trimmed.append(skewer.output1)
        fq2_trimmed.append(skewer.output2)
        if flag:
            pipeline.add(skewer)

    cat1 = Cat()
    cat1.input = fq1_trimmed
    cat1.output = outdir + "/skewer/{}-concatenated_1.fastq.gz".format(clinseq_barcode)
    cat1.jobname = "cat1/{}".format(clinseq_barcode)
    cat1.is_intermediate = True
    if flag:
        pipeline.add(cat1)

    cat2 = Cat()
    cat2.input = fq2_trimmed
    cat2.jobname = "cat2/{}".format(clinseq_barcode)
    cat2.output = outdir + "/skewer/{}-concatenated_2.fastq.gz".format(clinseq_barcode)
    cat2.is_intermediate = True
    if flag:
        pipeline.add(cat2)

    return cat1.output, cat2.output
示例#5
0
def align_pe(pipeline, fq1_files, fq2_files, clinseq_barcode, ref, outdir, maxcores=1, remove_duplicates=True):
    """
    align paired end data
    :param pipeline:
    :param fq1_files:
    :param fq2_files:
    :param lib:
    :param ref:
    :param outdir:
    :param maxcores:
    :param remove_duplicates:
    :return:
    """
    fq1_abs = [normpath(x) for x in fq1_files]
    fq2_abs = [normpath(x) for x in fq2_files]
    logging.debug("Trimming {} and {}".format(fq1_abs, fq2_abs))
    pairs = [(fq1_abs[k], fq2_abs[k]) for k in range(len(fq1_abs))]

    fq1_trimmed = []
    fq2_trimmed = []

    for fq1, fq2 in pairs:
        skewer = Skewer()
        skewer.input1 = fq1
        skewer.input2 = fq2
        skewer.output1 = outdir + "/skewer/libs/{}".format(os.path.basename(fq1))
        skewer.output2 = outdir + "/skewer/libs/{}".format(os.path.basename(fq2))
        skewer.stats = outdir + "/skewer/libs/skewer-stats-{}.log".format(os.path.basename(fq1))
        skewer.threads = maxcores
        skewer.jobname = "skewer/{}".format(os.path.basename(fq1))
        skewer.scratch = pipeline.scratch
        skewer.is_intermediate = True
        fq1_trimmed.append(skewer.output1)
        fq2_trimmed.append(skewer.output2)
        pipeline.add(skewer)

    cat1 = Cat()
    cat1.input = fq1_trimmed
    cat1.output = outdir + "/skewer/{}-concatenated_1.fastq.gz".format(clinseq_barcode)
    cat1.jobname = "cat1/{}".format(clinseq_barcode)
    cat1.is_intermediate = True
    pipeline.add(cat1)

    cat2 = Cat()
    cat2.input = fq2_trimmed
    cat2.jobname = "cat2/{}".format(clinseq_barcode)
    cat2.output = outdir + "/skewer/{}-concatenated_2.fastq.gz".format(clinseq_barcode)
    cat2.is_intermediate = True
    pipeline.add(cat2)

    bwa = Bwa()
    bwa.input_fastq1 = cat1.output
    bwa.input_fastq2 = cat2.output
    bwa.input_reference_sequence = ref
    bwa.remove_duplicates = remove_duplicates

    library_id = parse_prep_id(clinseq_barcode)
    sample_string = compose_sample_str(extract_unique_capture(clinseq_barcode))

    bwa.readgroup = "\"@RG\\tID:{rg_id}\\tSM:{rg_sm}\\tLB:{rg_lb}\\tPL:ILLUMINA\"".format(\
        rg_id=clinseq_barcode, rg_sm=sample_string, rg_lb=library_id)

    bwa.threads = maxcores
    bwa.output = "{}/{}.bam".format(outdir, clinseq_barcode)
    bwa.jobname = "bwa/{}".format(clinseq_barcode)
    bwa.scratch = pipeline.scratch
    bwa.is_intermediate = False
    pipeline.add(bwa)

    return bwa.output