def runPicard(listOfBams): readGroupsCommands = [] markDuplicatesCommands = [] for bamFile in listOfBams: cellLine = fileUtil.getCellLineFromFilename(bamFile) # Runs AddOrReplaceReadGroups # Template: java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=star_output.sam O=rg_added_sorted.bam SO=coordinate RGID=id RGLB=library RGPL=platform RGPU=machine RGSM=sample commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar AddOrReplaceReadGroups I=%s O=%s_rg_added_sorted.bam SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=%s" command = commandTemplate % (bamFile, cellLine, cellLine) readGroupsCommands.append(command) # command = command + bamFile + " " # command = command + "O=" + cellLine + "_rg_added_sorted.bam " # command = command + "SO=coordinate RGID=1 RGLB=illumina RGPL=illumina RGPU=illumina RGSM=" + cellLine + "\n" # outputScript.write(command) # Runs MarkDuplicates # Template: java -jar MarkDuplicates I=rg_added_sorted.bam O=dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=output.metrics commandTemplate = "java -Xmx8g -jar ~/software/picard/picard/dist/picard.jar MarkDuplicates I=%s_rg_added_sorted.bam O=%s_dedupped.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=%s_dedupped.metrics" command = commandTemplate % (cellLine, cellLine, cellLine) markDuplicatesCommands.append(command) # command = command + "O=" + cellLine + "_dedupped.bam " # command = command + "CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT M=" + cellLine + "output.metrics\n" # outputScript.write(command) # outputScript.write("\n") s.executeFunctions(readGroupsCommands, parallel = True, simulate = True) s.executeFunctions(markDuplicatesCommands, parallel = True, simulate = True)
def runStar(fastq1, fastq2, genome = "/media/Data/genomes/STAR_index_hg19_vGATK/STAR_genomeDir_hg19_vGATK"): # Runs STAR to output a coordinate soorted BAM file that is compatible with cuff commandTemplate = "STAR --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outSAMtype BAM SortedByCoordinate --genomeDir %s --readFilesIn %s %s --runThreadN 16" command = commandTemplate % (genome, fastq1, fastq2) s.executeFunctions(command)