Exemplo n.º 1
0
def gatk(timestamp, path_base, folder, samples, nproc, wt, q, genome_build, args):
    args = args.split("|")
    multithread = False
    filt = "30"
    if len(args) == 2:
        if args[0] == "yes":
            multithread = True
        filt = args[1]
    output = "results_gatk"
    secure_mkdir(path_base + folder, output)
    print "## Variang calling with GATK"
    print "> Writing jobs for GATK..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_sam2sortbam/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_sam2sortbam/" + sample + ".sorted.bam"
        if sample + ".sorted.bam" in proc_files:
            C = gatk_commands(path_base + folder, sample, genome_build, multithread, filt)
            commands.append("\n".join(C))
        else:
            print "Warning: [GATK] SORTED BAM output file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return  submit_job_super("gatk", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 2
0
def picardqc(timestamp, path_base, folder, samples, nproc, wt, q, annots, strand):
    nstrand = {" --stranded=no":"NONE", " --stranded=yes":"FIRST_READ_TRANSCRIPTION_STRAND", " --stranded=no":"SECOND_READ_TRANSCRIPTION_STRAND"}
    output = "results_picard"
    secure_mkdir(path_base + folder, output)
    print "## Alignment QC Picard-CollectRnaSeqMetrics"
    print "> Writing jobs for Picard QC..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_star/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_star/" + sample + "_Aligned.out.sam"
        if sample + "_Aligned.out.sam" in proc_files:
            for i in range(len(config.nannots)):
                annot    = annots[i]
                out_file = in_file.replace(".sam", "." + config.nannots[i] + ".qc").replace("results_star/", "results_picard/").replace("_Aligned.out", "")
                call = "java -jar " + config.path_picard + "/CollectRnaSeqMetrics.jar REF_FLAT=" + annot + " STRAND_SPECIFICITY=" + nstrand[strand] + " INPUT=" + in_file + " OUTPUT=" + out_file
                if i == (len(config.nannots)-1):
                    commands.append(call + sample_checker.replace("#FOLDER", path_base + folder + "/results_picard").replace("#SAMPLE", sample))
                else:
                    commands.append(call)
        else:
            print "Warning: [Picard] STAR output file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("picard", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 3
0
def jsplice(timestamp, path_base, folder, samples, nproc, wt, q, genomebuild, pheno, extra_args, strand):
    output_dir = path_base + folder + '/results_jsplice'
    secure_mkdir(path_base + folder, 'results_jsplice')
    print "## jSPLICE"
    print "> Writing jobs for jSPLICE..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg('1/NA/NA', len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    out = open(output_dir + '/expdesign.txt', 'w')
    print >> out, '#exp\tcond\tjxnFile\tbamFile'
    for sample in ksamp:
        sj_file = path_base + folder + '/results_star/' + sample + '_SJ.out.tab' # Junction file created by STAR
        sj_out_file = output_dir + '/' + sample + '.SJ.bed'
        bam_file = path_base + folder + '/results_sam2sortbam/' + sample + '.sorted.bam' # BAM file created by STAR/Picard(AddOrReplaceReadGroups)
        if os.path.exists(sj_file) and os.path.exists(bam_file) and len(pheno[sample].split(':'))==2:
            command = 'python ' + config.path_jsplice + '/starJxn2bed.py -f ' + sj_file + ' -o '+ sj_out_file
            commands.append(command + sample_checker.replace("#FOLDER", output_dir).replace("#SAMPLE", sample))
            print >> out, '\t'.join([pheno[sample].split(':')[0], pheno[sample].split(':')[1], sj_out_file, bam_file])
        else:
            print "Warning: [JSPLICE] STAR output files not found -> " + sample
    out.close()
    if strand == " --stranded=no":
        extra_args = '-s ' + extra_args
    commands.append('python ' + config.path_jsplice + '/jSplice.py -d ' + output_dir + '/expdesign.txt -o ' + output_dir + ' -a '+ config.path_annotation.replace("#LABEL", genomebuild) + ' ' + extra_args)
    create_scripts(nchild, commands, path_base, folder, 'results_jsplice')
    return submit_job_super("jsplice", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 4
0
def star(timestamp, path_base, folder, samples, nproc, wt, q, path_genome, star_params, tg):
    output = "results_star"
    secure_mkdir(path_base + folder, output)
    print "## RNAseq alignment with STAR..."
    print "> Writing jobs for STAR alignment..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    for sample in ksamp:
        gg = ""
        files = samples[sample]
        if not tg:
            if len(files) == 2:
                fn = files[0]
            else:
                fn = files[0] + " " + files[1]
            if files[0].endswith(".fastq.gz"):
                gg = " --readFilesCommand zcat"
        else:
            gg = " --readFilesCommand zcat"
            g = path_base + folder + "/results_trimgalore/"
            suf = ""
            if not files[0].split("/")[-1].endswith(".gz"):
                suf = ".gz"
            if len(files) == 2:
                fn = g + files[0].split("/")[-1] + suf
            else:
                fn = g + files[0].split("/")[-1] + suf + " " + g + files[1].split("/")[-1] + suf
        command = config.path_star + " --quantMode TranscriptomeSAM GeneCounts --runThreadN " + str(nproc) + " --genomeDir " + path_genome
        command = command + " --readFilesIn " + fn + " --outFileNamePrefix " + path_base + folder + "/results_star/" + sample + "_" + gg
        if len(star_params) > 0:
            command = command + star_params
        commands.append(command + sample_checker.replace("#FOLDER", path_base + folder + "/results_star").replace("#SAMPLE", sample))
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("star", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 5
0
def starfusion(timestamp, path_base, folder, samples, nproc, wt, q, path_star_fusion, star_fusion_params, tg):
    output = "results_star-fusion"
    secure_mkdir(path_base + folder, output)
    print "## Identification of gene fusions with star-fusion"
    print "> Writing jobs for Star-Fusion..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    for sample in ksamp:
        files = samples[sample]
        if not tg:
            fn = files
        else:
            g = path_base + folder + "/results_trimgalore/"
            suf = ""
            if not files[0].split("/")[-1].endswith(".gz"):
                suf = ".gz"
            fn = [g + files[0].split("/")[-1] + suf, g + files[1].split("/")[-1] + suf]
        prefix = path_base + folder + "/results_star-fusion/" + sample
        call = config.path_starfusion + " --output_dir " + prefix + " --genome_lib_dir " + path_star_fusion + " --left_fq " + fn[0] + " --right_fq " + fn[1] + " --CPU " + str(nproc)
        if len(star_fusion_params) > 0:
            call = call + star_fusion_params
        commands.append(call + sample_checker.replace("#FOLDER", path_base + folder + "/results_star-fusion").replace("#SAMPLE", sample))
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("star-fusion", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 6
0
def fastqc(timestamp, path_base, folder, samples, nproc, wt, q, tg):
    ########################################################################
    ## FastQC analysis
    ########################################################################
    print "## QC: FastQC"
    print "> Quality control with fastQC..."
    output = "results_fastqc"
    secure_mkdir(path_base + folder, "results_fastqc")
    output_folder = path_base + folder + "/results_fastqc"
    print "> Writing jobs for fastqc analysis..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    for sample in ksamp:
        files = samples[sample]
        if not tg:
            if len(files) == 4:
                fnames = files[0] + " " + files[1]
            else:
                fnames = files[0]
        else:
            g = path_base + folder + "/results_trimgalore/"
            suf = ""
            if not files[0].split("/")[-1].endswith(".gz"):
                suf = ".gz"
            if len(files) == 4:
                fnames = g + files[0].split("/")[-1] + suf + " " + g + files[1].split("/")[-1] + suf
            else:
                fnames = g + files[0].split("/")[-1] + suf
        call = config.path_fastqc + " -q -o " + output_folder + " " + fnames
        commands.append(call + sample_checker.replace("#FOLDER", output_folder).replace("#SAMPLE", sample))
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("fastqc", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 7
0
def trimgalore(timestamp, path_base, folder, samples, nproc, wt, q, extra_args):
    ########################################################################
    ## FastQC analysis
    ########################################################################
    print "## Trim-galore: Quality and adapter trimming"
    print "> Quality and adapter trimming with Trim Galore..."
    output = "results_trimgalore"
    secure_mkdir(path_base + folder, "results_trimgalore")
    output_folder = path_base + folder + "/results_trimgalore"

    print "> Writing jobs for TrimGalore analysis..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    for sample in ksamp:
        files = samples[sample]
        if len(files) == 4:
            args = extra_args + " --paired"
            fnames = files[0] + " " + files[1]
        else:
            args = extra_args
            fnames = files[0]
        if (args != "") and (not args.startswith(" ")):
            args = " " + args
        call = config.path_trimgalore + args + " --gzip --path_to_cutadapt " + config.path_cutadapt + " -o " + output_folder + " " + fnames
        call = call + sample_checker.replace("#FOLDER", output_folder).replace("#SAMPLE", sample) + "\n" + rename_tg_output(sample, files, path_base + folder)
        commands.append(call)
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("trimgalore", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 8
0
def htseq(timestamp, path_base, folder, samples, path_annotation, nproc, wt, q,
          mode, strand, countmode):
    output = "results_htseq-" + mode
    secure_mkdir(path_base + folder, output)
    print "## HTseq-count"
    print "> Writing jobs for HTseq-count " + mode + " analysis..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_star/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_star/" + sample + "_Aligned.out.sam"
        if sample + "_Aligned.out.sam" in proc_files:
            outputf = path_base + folder + "/results_htseq-" + mode + "/" + sample + ".tab"
            if mode == "gene":
                ld1 = config.path_htseq + strand + " -m " + countmode + " -q " + in_file + " " + path_annotation
            else:
                ld1 = config.path_htseq + strand + " -m " + countmode + " -i exon_id -q " + in_file + " " + path_annotation
            call = ld1 + " > " + outputf
            commands.append(
                call +
                sample_checker.replace("#FOLDER", path_base + folder + "/" +
                                       output).replace("#SAMPLE", sample))
        else:
            print "Warning: [HTseq-" + mode + "] STAR output file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("htseq-" + mode, path_base + folder, wt,
                            str(nproc), q, len(samples), bsub_suffix, nchild,
                            timestamp)
Exemplo n.º 9
0
def starfusion(timestamp, path_base, folder, samples, nproc, wt, q,
               genomebuild):
    output = "results_star-fusion"
    secure_mkdir(path_base + folder, output)
    print "## Identification of gene fusions with star-fusion"
    print "> Writing jobs for Star-Fusion..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_star/")
    ref_file = config.path_annotation.replace("#LABEL", genomebuild)
    for sample in ksamp:
        in_file1 = path_base + folder + "/results_star/" + sample + "_Chimeric.out.junction"
        in_file2 = path_base + folder + "/results_star/" + sample + "_Chimeric.out.sam"
        prefix = path_base + folder + "/results_star-fusion/" + sample
        if os.path.exists(in_file1) and os.path.exists(in_file2):
            call = config.path_starfusion + " -J " + in_file1 + " -S " + in_file2 + " -G " + ref_file + " --out_prefix " + prefix
            commands.append(call + sample_checker.replace(
                "#FOLDER", path_base + folder +
                "/results_star-fusion").replace("#SAMPLE", sample))
        else:
            print "Warning: [Star-Fusion] STAR output file not found -> " + in_file1
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("star-fusion", path_base + folder, wt, str(nproc),
                            q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 10
0
def kallisto(timestamp, path_base, folder, samples, path_index, bootstrap,
             nproc, wt, q, tg):
    output = "results_kallisto"
    secure_mkdir(path_base + folder, "results_kallisto")
    print "## RNAseq pseudoalignment with Kallisto"
    # Estimate counts in single-end datasss
    if len(samples[samples.keys()[0]]) == 2:
        print "> Estimating average and STD of fragment lengh required by Kalisto on single-read data..."
        outputT = path_base + folder + "/" + output + "/stats.txt"
        tid, log = compute_mean_std(path_base, folder, samples, outputT, "1",
                                    wt, q)
        vcrparser.job_wait(log, 10)
        f = open(outputT, 'r')
        i = f.readline()
        stats = dict()
        for i in f:
            i = i.strip("\n").split(" ")
            stats[i[0]] = [i[2], i[3]]
        f.close()
    print "> Writing jobs for Kallisto pseudoalignment"
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    for sample in ksamp:
        files = samples[sample]
        if not tg:
            if len(files) == 4:
                args = ""
                fnames = files[0] + " " + files[1]
            else:
                args = " --single -l mean -s var".replace(
                    "mean", stats[sample][0]).replace("var", stats[sample][1])
                fnames = files[0]
        else:
            g = path_base + folder + "/results_trimgalore/"
            suf = ""
            if not files[0].split("/")[-1].endswith(".gz"):
                suf = ".gz"
            if len(files) == 4:
                args = ""
                fnames = g + files[0].split(
                    "/")[-1] + suf + " " + g + files[1].split("/")[-1] + suf
            else:
                args = " --single -l mean -s var".replace(
                    "mean", stats[sample][0]).replace("var", stats[sample][1])
                fnames = g + files[0].split("/")[-1] + suf
        cmd = config.path_kallisto + " quant -b " + bootstrap + " -i " + path_index + " -o " + path_base + folder + "/results_kallisto/" + sample + args + " " + fnames
        commands.append(
            cmd + sample_checker.replace("#FOLDER", path_base + folder + "/" +
                                         output).replace("#SAMPLE", sample))
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("kallisto", path_base + folder, wt, str(nproc), q,
                            len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 11
0
def kallisto(timestamp, path_base, folder, samples, path_index, bootstrap, nproc, wt, q, tg):
    output = "results_kallisto"
    secure_mkdir(path_base + folder, "results_kallisto")
    print "## RNAseq pseudoalignment with Kallisto"
    # Estimate counts in single-end datasss
    if len(samples[samples.keys()[0]]) == 2:
        print "> Estimating average and STD of fragment lengh required by Kalisto on single-read data..."
        outputT = path_base + folder + "/" + output + "/stats.txt"
        tid,log = compute_mean_std(path_base, folder, samples, outputT, "1", wt, q)
        vcrparser.job_wait(log, 10)
        f = open(outputT,'r')
        i = f.readline()
        stats = dict()
        for i in f:
            i = i.strip("\n").split(" ")
            stats[i[0]] = [i[2],i[3]]
        f.close()
    print "> Writing jobs for Kallisto pseudoalignment"
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    for sample in ksamp:
        files = samples[sample]
        if not tg:
            if len(files) == 4:
                args = ""
                fnames = files[0]+" "+files[1]
            else:
                args = " --single -l mean -s var".replace("mean", stats[sample][0]).replace("var", stats[sample][1])
                fnames = files[0]
        else:
            g = path_base + folder + "/results_trimgalore/"
            suf = ""
            if not files[0].split("/")[-1].endswith(".gz"):
                suf = ".gz"
            if len(files) == 4:
                args = ""
                fnames = g + files[0].split("/")[-1] + suf + " " + g + files[1].split("/")[-1] + suf
            else:
                args = " --single -l mean -s var".replace("mean", stats[sample][0]).replace("var", stats[sample][1])
                fnames = g + files[0].split("/")[-1] + suf
        cmd = config.path_kallisto+" quant -b " + bootstrap + " -i " + path_index + " -o " + path_base+folder + "/results_kallisto/" + sample + args + " " + fnames
        commands.append(cmd + sample_checker.replace("#FOLDER", path_base + folder + "/" + output).replace("#SAMPLE", sample))
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("kallisto", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 12
0
def sam2sortbam(timestamp, path_base, folder, samples, nproc, wt, q):
    output = "results_sam2sortbam"
    secure_mkdir(path_base + folder, output)
    print "## SAM2SORTEDBAM"
    print "> Writing jobs for SAM2SORTEDBAM..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_star/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_star/" + sample + "_Aligned.out.sam"
        if sample + "_Aligned.out.sam" in proc_files:
            out_file = path_base + folder + "/results_sam2sortbam/" + sample + ".sorted.bam"
            com = "java -jar " + config.path_picard + "/AddOrReplaceReadGroups.jar I=" + in_file + " O=" + out_file +" SO=coordinate RGID=id RGLB=library RGPL=ILLUMINA RGPU=machine RGSM=sample 2> " + out_file + ".log"
            commands.append(com + sample_checker.replace("#FOLDER", path_base + folder + "/results_sam2sortbam").replace("#SAMPLE", sample))
        else:
            print "Warning: [SAM2SORTEDBAM] STAR output file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("sam2sortbam", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 13
0
def varscan(timestamp, path_base, folder, samples, nproc, wt, q, genome_build, args):
    ref = config.path_fasta.replace("#LABEL",genome_build)
    output = "results_varscan"
    secure_mkdir(path_base + folder, output)
    print "## Variang calling with VARSCAN"
    print "> Writing jobs for VARSCAN..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_sam2sortbam/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_sam2sortbam/" + sample + ".sorted.bam"
        if sample + ".sorted.bam" in proc_files:
            out_file = path_base + folder + "/results_varscan/" + sample + ".vcf"
            com = config.path_samtools + " mpileup -B -f " + ref + " " + in_file + " | java -jar " + config.path_varscan + " mpileup2cns " + args + " > " + out_file
            commands.append(com + sample_checker.replace("#FOLDER", path_base + folder + "/results_varscan").replace("#SAMPLE", sample))
        else:
            print "Warning: [VARSCAN] SORTED BAM output file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return  submit_job_super("varscan", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 14
0
def picard_IS(timestamp, path_base, folder, samples, nproc, wt, q):
    output = "results_picard_IS"
    secure_mkdir(path_base + folder, output)
    print "## Picard-InsertSize"
    print "> Writing jobs for Picard InsertSize..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_sam2sortbam/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_sam2sortbam/" + sample + ".sorted.bam"
        if sample + ".sorted.bam" in proc_files:
            for i in range(len(config.nannots)):
                out_file = in_file.replace("results_sam2sortbam/", "results_picard_IS/").replace(".sorted.bam", "")
                call = "java -jar " + config.path_picard + "/CollectInsertSizeMetrics.jar I="+in_file+" O="+out_file+".txt H="+out_file+".pdf"
                commands.append(call + sample_checker.replace("#FOLDER", path_base + folder + "/results_picard_IS").replace("#SAMPLE", sample))
        else:
            print "Warning: [Picard] Sorted BAM file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("picard_IS", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 15
0
def starfusion(timestamp, path_base, folder, samples, nproc, wt, q, genomebuild):
    output = "results_star-fusion"
    secure_mkdir(path_base + folder, output)
    print "## Identification of gene fusions with star-fusion"
    print "> Writing jobs for Star-Fusion..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_star/")
    ref_file =  config.path_annotation.replace("#LABEL", genomebuild)
    for sample in ksamp:
        in_file1 = path_base + folder + "/results_star/" + sample + "_Chimeric.out.junction"
        in_file2 = path_base + folder + "/results_star/" + sample + "_Chimeric.out.sam"
        prefix = path_base + folder + "/results_star-fusion/" + sample
        if os.path.exists(in_file1) and os.path.exists(in_file2):
            call = config.path_starfusion + " -J " + in_file1 + " -S " + in_file2 + " -G " + ref_file + " --out_prefix " + prefix
            commands.append(call + sample_checker.replace("#FOLDER", path_base + folder + "/results_star-fusion").replace("#SAMPLE", sample))
        else:
            print "Warning: [Star-Fusion] STAR output file not found -> " + in_file1
    create_scripts(nchild, commands, path_base, folder, output)
    return  submit_job_super("star-fusion", path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)
Exemplo n.º 16
0
def htseq(timestamp, path_base, folder, samples, path_annotation, nproc, wt, q, mode, strand, countmode):
    output = "results_htseq-" + mode
    secure_mkdir(path_base + folder, output)
    print "## HTseq-count"
    print "> Writing jobs for HTseq-count " + mode + " analysis..."
    nproc, nchild, bsub_suffix = manager.get_bsub_arg(nproc, len(samples))
    commands = list()
    ksamp = sortbysize(samples)
    proc_files = os.listdir(path_base + folder + "/results_star/")
    for sample in ksamp:
        in_file = path_base + folder + "/results_star/" + sample + "_Aligned.out.sam"
        if sample + "_Aligned.out.sam" in proc_files:
            outputf= path_base + folder + "/results_htseq-" + mode + "/" + sample + ".tab"
            if mode == "gene":
                ld1 = config.path_htseq + strand + " -m " + countmode  + " -q " + in_file + " " + path_annotation
            else:
                ld1 = config.path_htseq + strand + " -m " + countmode  + " -i exon_id -q " + in_file + " " + path_annotation
            call = ld1 + " > " + outputf
            commands.append(call  + sample_checker.replace("#FOLDER", path_base + folder + "/" + output).replace("#SAMPLE", sample))
        else:
            print "Warning: [HTseq-" + mode + "] STAR output file not found -> " + in_file
    create_scripts(nchild, commands, path_base, folder, output)
    return submit_job_super("htseq-" + mode, path_base + folder, wt, str(nproc), q, len(samples), bsub_suffix, nchild, timestamp)