Пример #1
0
# Read configuration files
config = util.readConfigurationFiles()

header = config.getboolean("server", "PBS_header")

# Get samples and conditions
samples = util.getMergedsamples()

# Create scripts directory, if it does not exist yet, and cd to it.
util.makeDirectory(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
util.makeDirectory(outputDirectory)

for sample in samples:
    # Create script
    scriptName = "deduplicatebismark_" + sample + ".sh"
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "deduplicate_bismark")
    script.write("deduplicate_bismark " + "\\\n")
    script.write("--paired " + "\\\n")
    inputFile = glob.glob(inputDirectory + "/" + sample + "/*" +
                          args.extension)[0]
    script.write(inputFile + " \\\n")
    script.write("&> " + scriptName + ".log")

script.close()
Пример #2
0
if not os.path.exists(outputDirectory):
    os.mkdir(outputDirectory)

# Cycle through all the samples and write the bsmap scripts.
for line in samplesFile:
    sample = line.split()[2]
    if multipleLanes:
        lane = line.split()[3]
        sample = sample + "_" + lane
    # Create output directory for the sample.
    if not os.path.exists(outputDirectory + "/" + sample):
        os.mkdir(outputDirectory + "/" + sample)
    file_R1 = line.split()[0]
    file_R2 = line.split()[1]
    # Create script file.
    scriptName = 'bsmap_' + sample + '.sh'
    script = open(scriptName, 'w')
    util.writeHeader(script, config, "bsmap")
    script.write("bsmap -r 0 -s 16 -n 1" + " \\\n")
    script.write("-a " + inputDirectory + "/" + file_R1 + " \\\n")
    script.write("-b " + inputDirectory + "/" + file_R2 + " \\\n")
    script.write("-d " + genomeFile + " \\\n")
    script.write("-p " + processors + " \\\n")
    script.write("-o " + outputDirectory + "/" + sample + ".sam" + " \\\n")
    script.write("&> " + scriptName + ".log")
    script.close()

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #3
0
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

########################
# stringtie.sh scripts #
########################
for index, row in samplesFile.iterrows():
    sample = row["sample"]
    if not os.path.exists(os.path.join(outputDirectory, sample)):
        os.makedirs(os.path.join(outputDirectory, sample))
    scriptName = "stringtie_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "stringtie")
    script.write("stringtie \\\n")
    script.write(os.path.relpath(os.path.join(inputDirectory, sample, sample + "Aligned.sortedByCoord.out.bam")) + " \\\n") 
    script.write("-G " + gtfFile + " \\\n")
    script.write("-o " + os.path.relpath(os.path.join(outputDirectory, sample, sample + ".gtf")) + " \\\n")
    script.write("-A " + os.path.relpath(os.path.join(outputDirectory, sample, sample + "_gene_abund.tab")) + " \\\n") 
    script.write("-C " + os.path.relpath(os.path.join(outputDirectory, sample, sample + "_cov_refs.gtf")) + " \\\n") 
    script.write("-p " + processors + " \\\n")
    script.write("-B" + " \\\n")
    script.write("&> " + scriptName + ".log")
if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower() == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #4
0
    sample = row["sample"]
    if "Lane" in samplesFile.columns:
        sample = sample + "_lane_" + str(row["Lane"])
    # Create output directories
    if not os.path.exists(outputDirectory + "/" + sample):
        os.mkdir(outputDirectory + "/" + sample)
    file_R1 = row["File_R1"]
    # If trimmed with trim_galore, extensions have changed.
    file_R1 = file_R1.replace("R1.fastq.gz", "R1_val_1.fq.gz")
    if "File_R2" in samplesFile.columns:
        file_R2 = row["File_R2"]
        # If trimmed with trim_galore, extensions have changed.
        file_R2 = file_R2.replace("R2.fastq.gz", "R2_val_2.fq.gz")
    # Create script file.
    scriptName = 'bismark_' + sample + '.sh'
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "bismark")
    script.write("bismark" + " \\\n")
    script.write("--bowtie2" + " \\\n")
    script.write("--basename " + sample + " \\\n")
    script.write("-output_dir " + outputDirectory + "/" + sample + " \\\n")
    script.write(bisulfiteGenomeFolder + " \\\n")
    script.write("-1 " + inputDirectory + "/" + file_R1 + " \\\n")
    script.write("-2 " + inputDirectory + "/" + file_R2 + " \\\n")
    script.write("&> " + scriptName + ".log")

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #5
0
genomeFile = config.get(genome, "genomeFile")

samples = util.getMergedsamples()

# Create script and output directories, if they do not exist yet.
util.makeDirectory(outputDirectory)
util.makeDirectory(scriptsDirectory)

# CD to scripts directory
os.chdir(scriptsDirectory)

# Write scripts
for sample in samples:
    scriptName =  "calculatehsmetrics_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "calculatehsmetrics")
    # Reorder 
    
    script.write("java -Xmx4g -Xms4g -jar " + os.path.join(picard_folder, "CalculateHsMetrics.jar") + " \\\n")
    script.write("BAIT_INTERVALS=" + os.path.join("../../results/bait_intervals", sample + "_design_bait_intervals.txt") + " \\\n")
    script.write("TARGET_INTERVALS=" + os.path.join("../../results/target_intervals", sample + "_design_target_intervals.txt") + " \\\n")
    script.write("INPUT=" + os.path.join(inputDirectory, sample + ".filtered.bam") + " \\\n")
    script.write("OUTPUT=" + os.path.join(outputDirectory, sample + "_picard_hs_metrics.txt") + " \\\n")
    script.write("METRIC_ACCUMULATION_LEVEL=ALL_READS " + "\\\n")
    script.write("REFERENCE_SEQUENCE=" + genomeFile + " \\\n")
    script.write("VALIDATION_STRINGENCY=LENIENT " + "\\\n")
    script.write("&> " + scriptName + ".log")

    script.close()
Пример #6
0
samplesFile = util.readsamplesFile()
samples = samplesFile["sample"].tolist()

# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Cycle through all the samples and write the bedtools_coverage scripts.
for sample in samples:
    # Create script file.
    scriptName = "bedtools_coverage_" + sample + ".sh"
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "bedtools_coverage")
    script.write("bedtools coverage" + " \\\n")
    script.write("-a " + os.path.relpath(a) + " \\\n")
    script.write("-b " + os.path.relpath(os.path.join(inputDirectory, sample, sample + ".bam")) + " \\\n")
    script.write("-g " + os.path.relpath(chromSizes) + " \\\n")
    script.write("-hist" + " \\\n")
    script.write("-sorted" + " \\\n")
    script.write("1> " + os.path.relpath(os.path.join(outputDirectory, sample + ".txt")) + " \\\n")
    script.write("2> " + scriptName + ".log")    

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower() == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #7
0
# Store the list of files with the extensions fastq or fastq.gz
files = glob.glob(inputDirectory + "/*.fastq") + glob.glob(inputDirectory +
                                                           "/*.fastq.gz")
files.sort()

# Write the script(s)
# Cycle through all the files, 2 by 2.
for i in range(0, len(files), 2):
    fileR1 = os.path.basename(files[i])
    fileR2 = os.path.basename(files[i + 1])
    # Create script file.
    scriptName = 'trimgalore_' + fileR1.replace("_R1", "") + '.sh'
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "trimmomatic")
    script.write("trim_galore" + " \\\n")
    script.write("--fastqc" + " \\\n")
    script.write('--fastqc_args "--outdir ' + fastqcoutputDirectory + '"' +
                 " \\\n")
    script.write("--paired" + " \\\n")
    script.write("--length 50" + " \\\n")
    script.write("--output_dir " + outputDirectory + " \\\n")
    script.write(os.path.join(inputDirectory, fileR1) + " \\\n")
    script.write(os.path.join(inputDirectory, fileR2) + " \\\n")
    script.write("&> " + scriptName + ".log")
    script.close()

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #8
0
                                    event)):
     os.mkdir(os.path.join(outputDirectory, comparison, event))
     # Create script for each count type.
 for countType in countTypes:
     # Create countType subdirectory in event directory, if it does not exist yet.
     if not os.path.exists(
             os.path.join(outputDirectory, comparison, event,
                          countType.lower())):
         os.mkdir(
             os.path.join(outputDirectory, comparison, event,
                          countType.lower()))
     scriptName = "rmats2sashimiplots_" + comparison + "_" + event + "_" + countType.lower(
     ) + ".sh"
     script = open(scriptName, 'w')
     if header:
         util.writeHeader(script, config, "rmats2sashimiplots")
     script.write(
         "# Deactivate Python 3 virtual environment, and activate Python 2 virtual environment"
         + "\n")
     script.write("source " + os.path.join(
         toolsFolder, "python_environments/python2.7/bin/activate") +
                  "\n")
     script.write("\n")
     script.write("rmats2sashimiplot" + " \\\n")
     script.write("-b1 ")
     for sample in samples1[:-1]:
         script.write(
             os.path.relpath(
                 os.path.join(bamDirectory, sample, sample + ".bam")) +
             ",")
     script.write(
Пример #9
0
# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Write assembly_GTF_list.txt
assembly_GTF_list = open("assembly_GTF_list.txt", "w")
for sample in samples:
    assembly_GTF_list.write(
        os.path.abspath(
            os.path.join("../../results/cufflinks", sample, "transcripts.gtf"))
        + "\n")

# Write the cuffmerge script.
scriptName = "cuffmerge.sh"
script = open(scriptName, 'w')
if header:
    util.writeHeader(script, config, "cuffmerge")
script.write("cuffmerge" + " \\\n")
script.write("--ref-gtf " + gtfFile + " \\\n")
script.write("--num-threads " + processors + " \\\n")
script.write("--ref-sequence " + genomeFile + " \\\n")
script.write("-o " + outputDirectory + " \\\n")
script.write("assembly_GTF_list.txt" + " \\\n")
script.write("&> " + scriptName + ".log")
script.close()

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #10
0
# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.mkdir(outputDirectory)

# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Write the scripts
for sample in samples:
    # Write the script
    scriptName = "haplotypecaller_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "haplotypecaller")
    script.write("java -Xmx" + xmx + " \\\n")
    script.write("-jar " + os.path.join(toolsFolder, "GenomeAnalysisTK.jar") +
                 " \\\n")
    script.write("--analysis_type HaplotypeCaller" + " \\\n")
    script.write("--emitRefConfidence GVCF" + " \\\n")
    script.write("--variant_index_type LINEAR" + " \\\n")
    script.write("--variant_index_parameter 128000" + " \\\n")
    script.write("--reference_sequence " + genomeFile + " \\\n")
    script.write("--input_file " +
                 os.path.join(inputDirectory, sample, sample +
                              "_realigned_reads.bam") + " \\\n")
    script.write("--out " + os.path.join(outputDirectory, sample + ".vcf") +
                 " \\\n")
    script.write("&> " + scriptName + ".log")
# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Cycle through all the samples and write the scripts.
for index, row in samplesFile.iterrows():
    sample = row["sample"]
    # Create script file.
    scriptName = 'ensemblbedgraphtoucscbedgraph_' + sample + '.sh'
    script = open(scriptName, 'w')
    util.writeHeader(script, config, "ensemblbedgraphtoucscbedgraph")
    script.write("ensemblbedgraphtoucscbedgraph.py " + "\\\n")
    script.write("--ensembl_bedgraph " +
                 os.path.join(inputDirectory, sample + ".bedgraph") + " \\\n")
    script.write("--ucsc_bedgraph " +
                 os.path.join(outputDirectory, sample + ".bedgraph") + " \\\n")
    script.write("--dictionary " + dictionary + " \\\n")
    script.write("&> " + scriptName + ".log")
    script.close()
    if stranded:
        # Create positive strand script file.
        scriptName = 'ensemblbedgraphtoucscbedgraph_' + sample + '_positive.sh'
        script = open(scriptName, 'w')
        util.writeHeader(script, config, "ensemblbedgraphtoucscbedgraph")
        script.write("ensemblbedgraphtoucscbedgraph.py " + "\\\n")
        script.write("--ensembl_bedgraph " +
Пример #12
0
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directories, if they do not exist yet..
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Cycle through all the samples and write the bwa scripts.
for index, row in samplesFile.iterrows():
    sample = row["sample"]
    if "Lane" in samplesFile.columns:
        sample = sample + "_lane_" + str(row["Lane"])
    scriptName = "markduplicates_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "markduplicates")
    # Reorder
    script.write("java -jar " + picard_folder + "/MarkDuplicates.jar " +
                 "\\\n")
    inputFile = glob.glob(inputDirectory + "/" + sample + "/*" + extension)[0]
    script.write("INPUT=" + inputFile + "\\\n")
    #script.write("OUTPUT=" + outputDirectory + "/" + sample + "/" + sample + "_deduplicated.bam " + "\\\n")
    script.write("OUTPUT=" + outputDirectory + "/" + sample +
                 "_deduplicated.bam " + "\\\n")
    #script.write("METRICS_FILE=" + outputDirectory + "/" + sample + "/" + sample + "_deduplication_metrics.txt " + "\\\n")
    script.write("METRICS_FILE=" + outputDirectory + "/" + sample +
                 "_deduplication_metrics.txt " + "\\\n")
    if remove_duplicates:
        script.write("REMOVE_DUPLICATES=true " + "\\\n")
    else:
        script.write("REMOVE_DUPLICATES=false " + "\\\n")
Пример #13
0
# Create a symbolic link to accepted_hits.bam named sampleName.bam. Useful for TopHat output, which is named accepted_hits.bam, by default
#def createSymbolicLinks():
#    for sample in samples:
#        command = "ln -fs " + os.path.join(inputDirectory, sample, "accepted_hits.bam") + " " + os.path.join(inputDirectory, sample, sample + ".bam")
#        print(command)
#        subprocess.call(command, shell=True)

#if (args.symbolicLinks.lower() == "yes") | (args.symbolicLinks.lower() == "y"):
#    createSymbolicLinks()

strands = ["positive", "negative"]

# Write script for each sample and each strand
for sample in samples:
    for strand in strands:
        # Create script file.
        scriptName = "separatebamsbystrand_" + sample + "_" + strand + ".sh"
        script = open(scriptName, "w")
        if header:
            util.writeHeader(script, config, "separatebambystrand")
        # BAM to bedgraph
        script.write("separatebambystrand.py" + " \\\n")
        script.write("--input_bam " + os.path.relpath(
            os.path.join(inputDirectory, sample, sample + ".bam")) + " \\\n")
        script.write("--strand " + strand + " \\\n")
        script.write("&> " + scriptName + ".log")

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #14
0
     lane = ""
 file_r1 = os.path.splitext(
     os.path.basename(file_r1))[0] + lane + os.path.splitext(
         os.path.basename(file_r1))[1]
 file_r2 = os.path.splitext(
     os.path.basename(file_r2))[0] + lane + os.path.splitext(
         os.path.basename(file_r2))[1]
 sample += lane
 # Create output directories
 if not os.path.exists(outputDirectory + "/" + sample):
     os.mkdir(outputDirectory + "/" + sample)
 # Create script file.
 scriptName = "star_" + sample + ".sh"
 script = open(scriptName, 'w')
 if header:
     util.writeHeader(script, config, "star")
 script.write("STAR " + "\\\n")
 script.write("--runMode alignReads" + " \\\n")
 script.write("--runThreadN " + runThreadN + " \\\n")
 script.write("--genomeDir " + starIndex + " \\\n")
 script.write("--sjdbOverhang " + str(int(readLength) - 1) + " \\\n")
 if not sjdbGTFfile == "None":
     script.write("--sjdbGTFfile " + sjdbGTFfile + " \\\n")
 script.write("--readFilesIn " + "\\\n")
 script.write(
     os.path.relpath(os.path.join(inputDirectory, file_r1)) + " \\\n")
 script.write(
     os.path.relpath(os.path.join(inputDirectory, file_r2)) + " \\\n")
 if readFilesCommand == "zcat" or readFilesCommand == "gunzip -c" or readFilesCommand == "bunzip -c":
     script.write("--readFilesCommand " + readFilesCommand + " \\\n")
 script.write(
Пример #15
0
    os.makedirs(outputDirectory)

if stranded:
    strands = ["", "_positive", "_negative"]
else:
    strands = [""]

# Cycle through all the conditions and write the meanbedgraphs scripts.
for condition in unique_conditions:
    samples = samplesFile[samplesFile.condition ==
                          condition]["sample"].tolist()
    # Create script file.
    for strand in strands:
        scriptName = 'meanbedgraphs_' + condition + strand + '.sh'
        script = open(scriptName, 'w')
        util.writeHeader(script, config, "meanbedgraphs")
        if sort:
            for sample in samples:
                script.write("sort -k 1,1 -k2,2n " + "\\\n")
                script.write(
                    os.path.relpath(
                        os.path.join(inputDirectory, sample + strand +
                                     ".bedgraph")) + " \\\n")
                script.write("--output " + os.path.relpath(
                    os.path.join(inputDirectory, sample + strand +
                                 ".bedgraph")) + " \\\n")
                script.write("&> " + scriptName + ".log")
                script.write("\n\n")
        script.write("meanbedgraphs_computation.py " + "\\\n")
        script.write("--bedgraphs " + "\\\n")
        for sample in samples[:-1]:
Пример #16
0
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

#########################
# htseqcount.sh scripts #
#########################
for sample in samples:
    scriptName = "htseqcount_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "htseqcount")
    script.write(
        "#Deactivate Python 3 virtual environment, and activate Python 2 virtual environment to be able to use TopHat.\n"
    )
    virtual_env_directory = os.path.dirname(os.environ["VIRTUAL_ENV"])
    script.write(
        "source " +
        os.path.join(virtual_env_directory, "python2.7/bin/activate") + "\n\n")
    script.write("samtools sort -n" + " \\\n")
    script.write("-o " + os.path.relpath(
        os.path.join(inputDirectory, sample, sample + "_sorted_by_read_name" +
                     extension)) + " \\\n")
    script.write(
        os.path.relpath(
            os.path.join(inputDirectory, sample, sample + extension)) +
        " \\\n")
Пример #17
0
samples = util.getMergedsamples()
samples = sorted(
    samples, reverse=True
)  # To always put wt first, put the list in reverse alpabetical order

# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

for sample in samples:
    # Create script
    scriptName = "samtools_view_" + sample + ".sh"
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "samtoolsIndex")
    script.write("samtools view -h " + "\\\n")
    inputFile = glob.glob(inputDirectory + "/" + sample + "/*" + extension)[0]
    script.write(inputFile + " \\\n")
    script.write(region + " \\\n")
    #script.write("1> " + outputDirectory + "/" + sample + "/" + sample + "_filtered_" + extension + " \\\n")
    script.write("1> " + outputDirectory + "/" + sample + "/" + sample +
                 "_ERCC_only.bam" + " \\\n")
    script.write("2> " + scriptName + ".log")

script.close()
Пример #18
0
    os.makedirs(outputDirectory)

# List comparison. Normally, should be names of directories in input directory
comparisons = os.listdir(inputDirectory)

# Annotate all the peaks called by macs callpeaks
for comparison in comparisons:
    inputDirectory_comparison = os.path.join(inputDirectory, comparison)
    # Create output directories
    outputDirectory_comparison = os.path.join(outputDirectory, comparison)
    if not os.path.exists(outputDirectory_comparison):
        os.makedirs(outputDirectory_comparison)
    # Create script file
    scriptName = 'findMotifs_' + comparison + '.sh'
    script = open(scriptName, 'w')
    util.writeHeader(script, config, "findMotifs")
    script.write("findMotifsGenome.pl " + "\\\n")
    if peaks == "narrow":
        script.write(
            os.path.join(inputDirectory_comparison, comparison +
                         "_peaks.narrowPeak") + " \\\n")
    if peaks == "broad":
        script.write(
            os.path.join(inputDirectory_comparison, comparison +
                         "_peaks.broadPeak") + " \\\n")
    script.write(genome + " \\\n")
    script.write(os.path.join(outputDirectory_comparison) + " \\\n")
    script.write("-size 150 " + "\\\n")
    script.write("&> " + scriptName + ".log")
    script.close()
Пример #19
0
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Store the list of files with the extensions fastq or fastq.gz 
files = glob.glob(inputDirectory + "/*.fastq") + glob.glob(inputDirectory + "/*.fastq.gz")

# Create script files.
for file in files:
    file = os.path.basename(file)
    scriptName = 'trimfastq_' + file + '.sh'
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "trimfastq")
    script.write("#Deactivate Python 3 virtual environment, and activate Python 2 virtual environment to be able to use TopHat.\n")
    virtual_env_directory = os.path.dirname(os.environ["VIRTUAL_ENV"])
    script.write("source " + os.path.join(virtual_env_directory, "python2.7/bin/activate") + "\n\n")    
    script.write("trimFastq.py"   + " \\\n")
    script.write(os.path.relpath(os.path.join(inputDirectory, file)) + " \\\n")
    script.write(os.path.relpath(os.path.join(outputDirectory, file)) + " \\\n")
    script.write(minlength + " \\\n")
    script.write("&> " + scriptName + ".log")
    script.close()    

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower() == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #20
0
samples = pandas.read_csv("../../scripts/samples.txt", sep="\t")

for index, row in samples.iterrows():
    # Create directories
    if not os.path.exists(row["sample"]):
        os.mkdir(row["sample"])
    os.chdir(row["sample"])
    # Symbolic links to FASTQ files
    subprocess.call("ln -s " +
                    os.path.join(inputDirectory, row["file_r1"][:-3]),
                    shell=True)
    # Mapper script
    mapper_script = open("mapper.sh", "w")
    if header:
        util.writeHeader(mapper_script, config, "mirdeep")
    mapper_script.write("mapper.pl \\\n")
    mapper_script.write("*.fastq \\\n")
    mapper_script.write("-h -n -o 4 -e -m -v \\\n")
    mapper_script.write("-p " + bowtieIndex + " \\\n")
    mapper_script.write("-s " + row["sample"] + "_collapsed.fa \\\n")
    mapper_script.write("-t " + row["sample"] + "_collapsed_vs_genome_" +
                        genome + ".arf" + " \\\n")
    mapper_script.write("1> mapper.sh_output \\\n")
    mapper_script.write("2> mapper.sh_error")
    mapper_script.write("\n\n")
    mapper_script.close()
    if (args.submitJobsToQueue.lower()
            == "yes") | (args.submitJobsToQueue.lower() == "y"):
        subprocess.call("submitJobs.py", shell=True)
    # Mirdeep script
Пример #21
0
# Read input file.
samplesFile = pandas.read_csv(samplesFile, sep="\t")

# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Cycle through all the samples and write the star scripts.
for index, row in samplesFile.iterrows():
    run = row["Run_s"]
    # Create script file.
    scriptName = "getsra_" + run + ".sh"
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "getsra")
    script.write("wget" + " \\\n")
    script.write(
        "ftp://ftp.ncbi.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/" +
        os.path.join(run[0:6], run, run + ".sra") + " \\\n")

    script.write("&> " + scriptName + ".log")

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #22
0
samplesFile = util.readSamplesFile()
samples = samplesFile["sample"]

# Create script and output directories, if they do not exist yet.
util.makeDirectory(outputDirectory)
util.makeDirectory(scriptsDirectory)

# CD to scripts directory
os.chdir(scriptsDirectory)

# Write scripts
for sample in samples:
    scriptName = "collectinsertsizemetrics_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "collectinsertsizemetrics")
    # Reorder

    script.write("java -Xmx4g -jar " +
                 os.path.join(picard_folder, "CollectInsertSizeMetrics.jar") +
                 " \\\n")
    script.write("VALIDATION_STRINGENCY=LENIENT " + "\\\n")
    script.write("HISTOGRAM_FILE=" +
                 os.path.join(outputDirectory, sample +
                              "_picard_insert_size_plot.pdf") + " \\\n")
    script.write("INPUT=" +
                 os.path.join(inputDirectory, sample + ".filtered.bam") +
                 " \\\n")
    script.write("OUTPUT=" +
                 os.path.join(outputDirectory, sample +
                              "_picard_insert_size_metrics.txt") + " \\\n")
Пример #23
0
# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

############################
# featurecounts.sh scripts #
############################
for index, row in samplesFile.iterrows():
    sample = row["sample"]
    if "lane" in samplesFile.columns:
        sample += "_" + row["lane"]   
    scriptName = "featurecounts_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "featurecounts")
    script.write("featureCounts \\\n")
    script.write("-T " + T + " \\\n")
    if p:
        script.write("-p" + " \\\n")
    if B:
        script.write("-B" + " \\\n")
    if C:
        script.write("-C" + " \\\n")
    script.write("-s " + s + " \\\n")
    if M:
        script.write("-M" + " \\\n")
    if O:
        script.write("-O" + " \\\n")

    script.write("-a " + gtfFile + " \\\n")
Пример #24
0
samples = util.getMergedsamples()

# Create script and output directories, if they do not exist yet.
util.makeDirectory(outputDirectory)
util.makeDirectory(scriptsDirectory)

# CD to scripts directory
os.chdir(scriptsDirectory)

# Write scripts
for sample in samples:
    scriptName = "target_interval_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "target_interval")
    # Reorder

    script.write("samtools view -H" + " \\\n")
    script.write(
        os.path.join(inputDirectory, sample + ".filtered.bam") + " \\\n")
    script.write("1> " +
                 os.path.join(outputDirectory, sample + "_bam_header.txt") +
                 " \\\n")
    script.write("2> " + scriptName + ".log")

    script.write("\n\n")

    script.write("cat " + target_BED + " | " + "\\\n")
    script.write(
        "gawk '{print $1 \"\\t\" $2+1 \"\\t\" $3 \"\\t+\\tinterval_\" NR}' " +
Пример #25
0
genomeFile = config.get(genome, "genomeFile")

samples = util.getMergedsamples()

# Create script and output directories, if they do not exist yet.
util.makeDirectory(outputDirectory)
util.makeDirectory(scriptsDirectory)

# CD to scripts directory
os.chdir(scriptsDirectory)

# Write scripts
for sample in samples:
    scriptName =  "bsmap_methratio_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "bsmap_methratio")
    # Reorder 
    
    script.write("java -Xmx4g -Xms4g -jar " + os.path.join(picard_folder, "CalculateHsMetrics.jar") + " \\\n")
    script.write("BAIT_INTERVALS=" + os.path.join("../../results/bait_intervals", sample + "_design_bait_intervals.txt") + " \\\n")
    script.write("TARGET_INTERVALS=" + os.path.join("../../results/target_intervals", sample + "_design_target_intervals.txt") + " \\\n")
    script.write("INPUT=" + os.path.join(inputDirectory, sample + ".filtered.bam") + " \\\n")
    script.write("OUTPUT=" + os.path.join(outputDirectory, sample + "_picard_hs_metrics.txt") + " \\\n")
    script.write("METRIC_ACCUMULATION_LEVEL=ALL_READS " + "\\\n")
    script.write("REFERENCE_SEQUENCE=" + genomeFile + " \\\n")
    script.write("VALIDATION_STRINGENCY=LENIENT " + "\\\n")
    script.write("&> " + scriptName + ".log")

    script.close()
Пример #26
0
    OrderedDict.fromkeys(conditions))  # Remove duplicates.

# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

# Write the cuffdiff script.
scriptName = "cuffdiff.sh"
script = open(scriptName, 'w')
if header:
    util.writeHeader(script, config, "cuffdiff")
script.write("cuffdiff" + " \\\n")
script.write("--labels ")
script.write(",".join(unique_conditions) + " \\\n")
script.write("-p " + processors + " \\\n")
script.write("--no-effective-length-correction " + "\\\n")
if stranded:
    script.write("--library-type fr-firststrand" + " \\\n")
script.write("-u -b " + genomeFile + " \\\n")
script.write("-o " + os.path.relpath(os.path.join(outputDirectory)) + " \\\n")
script.write(gtfFile + " \\\n")
script.write(
    os.path.relpath(
        os.path.join(inputDirectory, samples[0], "accepted_hits.bam")))
previous_condition = conditions[0]
for sample, condition in zip(samples[1:], conditions[1:]):
Пример #27
0
    os.makedirs(os.path.join(outputDirectory, "too_long_after_trimming"))

# Store the list of files with the extensions fastq or fastq.gz
files = glob.glob(inputDirectory + "/*.fastq") + glob.glob(inputDirectory +
                                                           "/*.fastq.gz")
files.sort()

# Write the script(s)
# Cycle through all the R1 files.
for file in files:
    fileR1 = os.path.basename(file)
    # Create script file.
    scriptName = 'cutadapt_' + fileR1 + '.sh'
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "cutadapt")
    script.write("cutadapt" + " \\\n")
    if not adapter == "None":
        script.write("--adapter " + adapter + " \\\n")
    if not minlength == "None":
        script.write("--minimum-length " + minlength + " \\\n")
    if not maxlength == "None":
        script.write("--maximum-length " + maxlength + " \\\n")
    if not qualitycutoff == "None":
        script.write("--quality-cutoff " + qualitycutoff + " \\\n")
    if trimn:
        script.write("--trim-n" + " \\\n")
    if not cut == "None":
        script.write("--cut " + cut + " \\\n")
    if (args.gzip.lower() == "no") | (args.gzip.lower() == "n"):
        script.write(
Пример #28
0
# Read samples file.
samplesDataFrame = util.readSamplesFile()

# Create scripts directory, if it does not exist yet, and cd to it.
if not os.path.exists(scriptsDirectory):
    os.mkdir(scriptsDirectory)
os.chdir(scriptsDirectory)

# Cycle through all the samples and write the cellrangercount scripts.
for index, row in samplesDataFrame.iterrows():
    sample = row["sample"]
    # Create script file.
    scriptName = "cellrangercount_" + sample + ".sh"
    script = open(scriptName, 'w')
    if header:
        util.writeHeader(script, config, "cellrangercount")
    script.write("cellranger count " + "\\\n")
    script.write("--localcores=" + localcores + " \\\n")
    script.write("--localmem=" + localmem + " \\\n")
    script.write("--id=" + sample + " \\\n")
    script.write("--fastqs " + os.path.relpath(os.path.join(inputDirectory)) +
                 " \\\n")
    script.write("--sample=" + sample + " \\\n")
    script.write("--transcriptome=" + cellrangerTranscriptome + " \\\n")

    script.write("&> " + scriptName + ".log")

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
    subprocess.call("submitJobs.py", shell=True)
Пример #29
0
    fields = file.split(".")
    samples.append(".".join(fields[-3:]))
# Remove duplicates, and sort lanes.
samples = sorted(list(set(samples)))

all_samples_grouped_by_lane = []
for sample in samples:
    one_sample_grouped_by_lane = sorted(
        glob.glob(inputDirectory + "/*" + sample))
    all_samples_grouped_by_lane.append(one_sample_grouped_by_lane)

# Write the script
scriptName = 'catFASTQFiles.sh'
script = open(scriptName, 'w')
if header:
    util.writeHeader(script, config, "catFASTQFiles")

for sample, one_sample_grouped_by_lane in zip(samples,
                                              all_samples_grouped_by_lane):
    script.write("cat " + "\\\n")
    script.write(" \\\n".join(one_sample_grouped_by_lane) + " \\\n")
    script.write("1> " +
                 os.path.relpath(os.path.join(outputDirectory, sample)) +
                 " \\\n")
    script.write("2>> " + scriptName + ".log")
    script.write("\n\n")

script.close()

if (args.submitJobsToQueue.lower() == "yes") | (args.submitJobsToQueue.lower()
                                                == "y"):
Пример #30
0
# Create output directory, if it does not exist yet.
if not os.path.exists(outputDirectory):
    os.makedirs(outputDirectory)

############################
# dexseqcounts.sh scripts #
############################
for index, row in samplesFile.iterrows():
    sample = row["sample"]
    if "Lane" in samplesFile.columns:
        sample = sample + "_lane_" + str(row["lane"])
    scriptName = "dexseqcounts_" + sample + ".sh"
    script = open(scriptName, "w")
    if header:
        util.writeHeader(script, config, "dexseqcounts")
    script.write("source " + os.path.join(
        toolsFolder, "python_environments/python2.7/bin/activate"))
    script.write("\n\n")
    script.write("dexseq_count.py" + " \\\n")
    script.write("--paired=yes" + " \\\n")
    if stranded:
        script.write("--stranded=reverse" + " \\\n")
    else:
        script.write("--stranded=no" + " \\\n")
    script.write("--format=bam" + " \\\n")
    script.write("--order=pos" + " \\\n")
    script.write(dexseq_gtfFile + " \\\n")
    script.write(
        os.path.relpath(
            os.path.join(inputDirectory, sample, "accepted_hits.bam")) +