def run_phlat(job, fastqs, sample_type, univ_options, phlat_options): """ Run PHLAT on a pair of input fastqs of type `sample_type`. :param list fastqs: List of input fastq files :param str sample_type: Description of the sample type to inject into the file name. :param dict univ_options: Dict of universal options used by almost all tools :param dict phlat_options: Options specific to PHLAT :return: fsID for the HLA haplotype called from teh input fastqs :rtype: toil.fileStore.FileID """ job.fileStore.logToMaster('Running phlat on %s:%s' % (univ_options['patient'], sample_type)) work_dir = os.getcwd() input_files = { 'input_1.fastq': fastqs[0], 'input_2.fastq': fastqs[1], 'phlat_index.tar.gz': phlat_options['index'] } input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped files gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else '' if gz: for read_file in 'input_1.fastq', 'input_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = [ '-1', input_files['input_1.fastq' + gz], '-2', input_files['input_2.fastq' + gz], '-index', input_files['phlat_index'], '-b2url', '/usr/local/bin/bowtie2', '-tag', sample_type, '-e', '/home/phlat-1.0', # Phlat directory home '-o', '/data', # Output directory '-p', str(phlat_options['n']) ] # Number of threads docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], tool_version=phlat_options['version']) output_file = job.fileStore.writeGlobalFile(''.join( [work_dir, '/', sample_type, '_HLA.sum'])) return output_file
def run_bwa(job, fastqs, sample_type, univ_options, bwa_options): """ Align a pair of fastqs with bwa. :param list fastqs: The input fastqs for alignment :param str sample_type: Description of the sample to inject into the filename :param dict univ_options: Dict of universal options used by almost all tools :param dict bwa_options: Options specific to bwa :return: fsID for the generated sam :rtype: toil.fileStore.FileID """ work_dir = os.getcwd() input_files = { 'dna_1.fastq': fastqs[0], 'dna_2.fastq': fastqs[1], 'bwa_index.tar.gz': bwa_options['index'] } input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['dna_1.fastq']) else '' if gz: for read_file in 'dna_1.fastq', 'dna_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = [ 'mem', '-t', str(bwa_options['n']), '-v', '1', # Don't print INFO messages to the stderr '/'.join([input_files['bwa_index'], univ_options['ref']]), input_files['dna_1.fastq' + gz], input_files['dna_2.fastq' + gz] ] with open(''.join([work_dir, '/', sample_type, '.sam']), 'w') as samfile: docker_call(tool='bwa', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], outfile=samfile, tool_version=bwa_options['version']) # samfile.name retains the path info output_file = job.fileStore.writeGlobalFile(samfile.name) job.fileStore.logToMaster('Ran bwa on %s:%s successfully' % (univ_options['patient'], sample_type)) return output_file
def run_bwa(job, fastqs, sample_type, univ_options, bwa_options): """ This module aligns the SAMPLE_TYPE dna fastqs to the reference ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor'/'normal' 1. fastqs: Dict of list of input WGS/WXS fastqs fastqs +- '<ST>_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>] 2. sample_type: string of 'tumor_dna' or 'normal_dna' 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. bwa_options: Dict of parameters specific to bwa bwa_options |- 'tool_index': <JSid for the bwa index tarball> +- 'n': <number of threads to allocate> RETURN VALUES 1. output_files: Dict of aligned bam + reference (nested return) output_files |- '<ST>_fix_pg_sorted.bam': <JSid> +- '<ST>_fix_pg_sorted.bam.bai': <JSid> This module corresponds to nodes 3 and 4 on the tree """ job.fileStore.logToMaster('Running bwa on %s:%s' % (univ_options['patient'], sample_type)) work_dir = os.getcwd() input_files = { 'dna_1.fastq': fastqs[0], 'dna_2.fastq': fastqs[1], 'bwa_index.tar.gz': bwa_options['tool_index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['dna_1.fastq']) else '' if gz: for read_file in 'dna_1.fastq', 'dna_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = ['mem', '-t', str(bwa_options['n']), '-v', '1', # Don't print INFO messages to the stderr '/'.join([input_files['bwa_index'], 'hg19']), input_files['dna_1.fastq' + gz], input_files['dna_2.fastq' + gz]] with open(''.join([work_dir, '/', sample_type, '_aligned.sam']), 'w') as samfile: docker_call(tool='bwa', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], outfile=samfile) # samfile.name retains the path info output_file = job.fileStore.writeGlobalFile(samfile.name) return output_file
def run_phlat(job, fastqs, sample_type, univ_options, phlat_options): """ This module will run PHLAT on SAMPLE_TYPE fastqs. ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor_dna', 'normal_dna', or 'tumor_rna' 1. fastqs: Dict of list of input WGS/WXS fastqs fastqs +- '<ST>': [<JSid for 1.fastq> , <JSid for 2.fastq>] 2. sample_type: string of 'tumor' or 'normal' 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. phlat_options: Dict of parameters specific to phlat phlat_options |- 'tool_index': <JSid for the PHLAT index tarball> +- 'n': <number of threads to allocate> RETURN VALUES 1. output_file: <JSid for the allele predictions for ST> This module corresponds to nodes 5, 6 and 7 on the tree """ job.fileStore.logToMaster('Running phlat on %s:%s' % (univ_options['patient'], sample_type)) print(phlat_options, file=sys.stderr) work_dir = os.getcwd() input_files = { 'input_1.fastq': fastqs[0], 'input_2.fastq': fastqs[1], 'phlat_index.tar.gz': phlat_options['tool_index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped files gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else '' if gz: for read_file in 'input_1.fastq', 'input_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = ['-1', input_files['input_1.fastq' + gz], '-2', input_files['input_2.fastq' + gz], '-index', input_files['phlat_index'], '-b2url', '/usr/local/bin/bowtie2', '-tag', sample_type, '-e', '/home/phlat-1.0', # Phlat directory home '-o', '/data', # Output directory '-p', str(phlat_options['n'])] # Number of threads docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) output_file = job.fileStore.writeGlobalFile(''.join([work_dir, '/', sample_type, '_HLA.sum'])) return output_file
def run_phlat(job, fastqs, sample_type, univ_options, phlat_options): """ This module will run PHLAT on SAMPLE_TYPE fastqs. ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor_dna', 'normal_dna', or 'tumor_rna' 1. fastqs: Dict of list of input WGS/WXS fastqs fastqs +- '<ST>': [<JSid for 1.fastq> , <JSid for 2.fastq>] 2. sample_type: string of 'tumor' or 'normal' 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. phlat_options: Dict of parameters specific to phlat phlat_options |- 'tool_index': <JSid for the PHLAT index tarball> +- 'n': <number of threads to allocate> RETURN VALUES 1. output_file: <JSid for the allele predictions for ST> This module corresponds to nodes 5, 6 and 7 on the tree """ job.fileStore.logToMaster('Running phlat on %s:%s' % (univ_options['patient'], sample_type)) work_dir = os.getcwd() input_files = { 'input_1.fastq': fastqs[0], 'input_2.fastq': fastqs[1], 'phlat_index.tar.gz': phlat_options['tool_index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped files gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else '' if gz: for read_file in 'input_1.fastq', 'input_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = ['-1', input_files['input_1.fastq' + gz], '-2', input_files['input_2.fastq' + gz], '-index', input_files['phlat_index'], '-b2url', '/usr/local/bin/bowtie2', '-tag', sample_type, '-e', '/home/phlat-1.0', # Phlat directory home '-o', '/data', # Output directory '-p', str(phlat_options['n'])] # Number of threads docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) output_file = job.fileStore.writeGlobalFile(''.join([work_dir, '/', sample_type, '_HLA.sum'])) return output_file
def run_cutadapt(job, fastqs, univ_options, cutadapt_options): """ Runs cutadapt on the input RNA fastq files. :param list fastqs: List of fsIDs for input an RNA-Seq fastq pair :param dict univ_options: Dict of universal options used by almost all tools :param dict cutadapt_options: Options specific to cutadapt :return: List of fsIDs of cutadapted fastqs :rtype: list[toil.fileStore.FileID] """ work_dir = os.getcwd() input_files = {'rna_1.fastq': fastqs[0], 'rna_2.fastq': fastqs[1]} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['rna_1.fastq']) else '' if gz: for read_file in 'rna_1.fastq', 'rna_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz input_files = { key: docker_path(path) for key, path in list(input_files.items()) } parameters = [ '-a', cutadapt_options['a'], # Fwd read 3' adapter '-A', cutadapt_options['A'], # Rev read 3' adapter '-m', '35', # Minimum size of read '-o', docker_path('rna_cutadapt_1.fastq.gz'), # Output for R1 '-p', docker_path('rna_cutadapt_2.fastq.gz'), # Output for R2 input_files['rna_1.fastq' + gz], input_files['rna_2.fastq' + gz] ] docker_call(tool='cutadapt', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], tool_version=cutadapt_options['version']) output_files = [] for fastq_file in ['rna_cutadapt_1.fastq.gz', 'rna_cutadapt_2.fastq.gz']: output_files.append( job.fileStore.writeGlobalFile('/'.join([work_dir, fastq_file]))) job.fileStore.logToMaster('Ran cutadapt on %s successfully' % univ_options['patient']) return output_files
def get_patient_vcf(job, patient_dict): """ Convenience function to get the vcf from the patient dict :param dict patient_dict: dict of patient info :return: The vcf :rtype: toil.fileStore.FileID """ temp = job.fileStore.readGlobalFile(patient_dict['mutation_vcf'], os.path.join(os.getcwd(), 'temp.gz')) if is_gzipfile(temp): outfile = gunzip(temp) job.fileStore.deleteGlobalFile(patient_dict['mutation_vcf']) else: outfile = patient_dict['mutation_vcf'] return outfile
def run_cutadapt(job, fastqs, univ_options, cutadapt_options): """ This module runs cutadapt on the input RNA fastq files and then calls the RNA aligners. ARGUMENTS 1. fastqs: List of input RNA-Seq fastqs [<JSid for 1.fastq> , <JSid for 2.fastq>] 2. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 3. cutadapt_options: Dict of parameters specific to cutadapt cutadapt_options |- 'a': <sequence of 3' adapter to trim from fwd read> +- 'A': <sequence of 3' adapter to trim from rev read> RETURN VALUES 1. output_files: Dict of cutadapted fastqs output_files |- 'rna_cutadapt_1.fastq': <JSid> +- 'rna_cutadapt_2.fastq': <JSid> This module corresponds to node 2 on the tree """ job.fileStore.logToMaster('Running cutadapt on %s' % univ_options['patient']) work_dir = os.getcwd() input_files = { 'rna_1.fastq': fastqs[0], 'rna_2.fastq': fastqs[1]} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['rna_1.fastq']) else '' if gz: for read_file in 'rna_1.fastq', 'rna_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = ['-a', cutadapt_options['a'], # Fwd read 3' adapter '-A', cutadapt_options['A'], # Rev read 3' adapter '-m', '35', # Minimum size of read '-o', docker_path('rna_cutadapt_1.fastq.gz'), # Output for R1 '-p', docker_path('rna_cutadapt_2.fastq.gz'), # Output for R2 input_files['rna_1.fastq' + gz], input_files['rna_2.fastq' + gz]] docker_call(tool='cutadapt', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) output_files = [] for fastq_file in ['rna_cutadapt_1.fastq.gz', 'rna_cutadapt_2.fastq.gz']: output_files.append(job.fileStore.writeGlobalFile('/'.join([work_dir, fastq_file]))) return output_files
def run_bwa(job, fastqs, sample_type, univ_options, bwa_options): """ This module aligns the SAMPLE_TYPE dna fastqs to the reference ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor'/'normal' 1. fastqs: Dict of list of input WGS/WXS fastqs fastqs +- '<ST>_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>] 2. sample_type: string of 'tumor_dna' or 'normal_dna' 3. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 4. bwa_options: Dict of parameters specific to bwa bwa_options |- 'tool_index': <JSid for the bwa index tarball> +- 'n': <number of threads to allocate> RETURN VALUES 1. output_files: Dict of aligned bam + reference (nested return) output_files |- '<ST>_fix_pg_sorted.bam': <JSid> +- '<ST>_fix_pg_sorted.bam.bai': <JSid> This module corresponds to nodes 3 and 4 on the tree """ job.fileStore.logToMaster('Running bwa on %s:%s' % (univ_options['patient'], sample_type)) work_dir = os.getcwd() input_files = { 'dna_1.fastq': fastqs[0], 'dna_2.fastq': fastqs[1], 'bwa_index.tar.gz': bwa_options['tool_index'] } input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['dna_1.fastq']) else '' if gz: for read_file in 'dna_1.fastq', 'dna_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = [ 'mem', '-t', str(bwa_options['n']), '-v', '1', # Don't print INFO messages to the stderr '/'.join([input_files['bwa_index'], 'hg19']), input_files['dna_1.fastq' + gz], input_files['dna_2.fastq' + gz] ] with open(''.join([work_dir, '/', sample_type, '_aligned.sam']), 'w') as samfile: docker_call(tool='bwa', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], outfile=samfile) # samfile.name retains the path info output_file = job.fileStore.writeGlobalFile(samfile.name) return output_file
def run_star(job, fastqs, univ_options, star_options): """ Align a pair of fastqs with STAR. :param list fastqs: The input fastqs for alignment :param dict univ_options: Dict of universal options used by almost all tools :param dict star_options: Options specific to star :return: Dict containing output genome bam, genome bai, and transcriptome bam output_files: |- 'rnaAligned.toTranscriptome.out.bam': fsID +- 'rnaAligned.out.bam': fsID +- 'rnaChimeric.out.junction': fsID :rtype: dict """ assert star_options['type'] in ('star', 'starlong') work_dir = os.getcwd() input_files = { 'rna_cutadapt_1.fastq': fastqs[0], 'rna_cutadapt_2.fastq': fastqs[1], 'star_index.tar.gz': star_options['index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['rna_cutadapt_1.fastq']) else '' if gz: for read_file in 'rna_cutadapt_1.fastq', 'rna_cutadapt_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['star_index'] = untargz(input_files['star_index.tar.gz'], work_dir) # Check to see if user is using a STAR-Fusion index star_fusion_idx = os.path.join(input_files['star_index'], 'ref_genome.fa.star.idx') if os.path.exists(star_fusion_idx): input_files['star_index'] = star_fusion_idx input_files = {key: docker_path(path, work_dir=work_dir) for key, path in input_files.items()} # Using recommended STAR-Fusion parameters: # https://github.com/STAR-Fusion/STAR-Fusion/wiki parameters = ['--runThreadN', str(star_options['n']), '--genomeDir', input_files['star_index'], '--twopassMode', 'Basic', '--outReadsUnmapped', 'None', '--chimSegmentMin', '12', '--chimJunctionOverhangMin', '12', '--alignSJDBoverhangMin', '10', '--alignMatesGapMax', '200000', '--alignIntronMax', '200000', '--chimSegmentReadGapMax', 'parameter', '3', '--alignSJstitchMismatchNmax', '5', '-1', '5', '5', '--outFileNamePrefix', 'rna', '--readFilesIn', input_files['rna_cutadapt_1.fastq' + gz], input_files['rna_cutadapt_2.fastq' + gz], '--outSAMattributes', 'NH', 'HI', 'AS', 'NM', 'MD', '--outSAMtype', 'BAM', 'Unsorted', '--quantMode', 'TranscriptomeSAM'] if gz: parameters.extend(['--readFilesCommand', 'zcat']) if star_options['type'] == 'star': docker_call(tool='star', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], tool_version=star_options['version']) else: docker_call(tool='starlong', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], tool_version=star_options['version']) output_files = defaultdict() for output_file in ['rnaAligned.toTranscriptome.out.bam', 'rnaAligned.out.bam', 'rnaChimeric.out.junction']: output_files[output_file] = job.fileStore.writeGlobalFile('/'.join([work_dir, output_file])) export_results(job, output_files['rnaAligned.toTranscriptome.out.bam'], 'rna_transcriptome.bam', univ_options, subfolder='alignments') export_results(job, output_files['rnaChimeric.out.junction'], 'rna_chimeric.junction', univ_options, subfolder='mutations/fusions') job.fileStore.logToMaster('Ran STAR on %s successfully' % univ_options['patient']) return output_files
def run_star(job, fastqs, univ_options, star_options): """ This module uses STAR to align the RNA fastqs to the reference ARGUMENTS 1. fastqs: REFER RETURN VALUE of run_cutadapt() 2. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 3. star_options: Dict of parameters specific to STAR star_options |- 'tool_index': <JSid for the STAR index tarball> +- 'n': <number of threads to allocate> RETURN VALUES 1. output_files: Dict of aligned bams output_files |- 'rnaAligned.toTranscriptome.out.bam': <JSid> +- 'rnaAligned.sortedByCoord.out.bam': Dict of genome bam + bai |- 'rna_fix_pg_sorted.bam': <JSid> +- 'rna_fix_pg_sorted.bam.bai': <JSid> This module corresponds to node 9 on the tree """ assert star_options['type'] in ('star', 'starlong') job.fileStore.logToMaster('Running STAR on %s' % univ_options['patient']) work_dir = os.getcwd() input_files = { 'rna_cutadapt_1.fastq': fastqs[0], 'rna_cutadapt_2.fastq': fastqs[1], 'star_index.tar.gz': star_options['tool_index']} input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['rna_cutadapt_1.fastq']) else '' if gz: for read_file in 'rna_cutadapt_1.fastq', 'rna_cutadapt_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['star_index'] = untargz(input_files['star_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = ['--runThreadN', str(star_options['n']), '--genomeDir', input_files['star_index'], '--outFileNamePrefix', 'rna', '--readFilesIn', input_files['rna_cutadapt_1.fastq' + gz], input_files['rna_cutadapt_2.fastq' + gz], '--outSAMattributes', 'NH', 'HI', 'AS', 'NM', 'MD', '--outSAMtype', 'BAM', 'SortedByCoordinate', '--quantMode', 'TranscriptomeSAM'] if gz: parameters.extend(['--readFilesCommand', 'zcat']) if star_options['type'] == 'star': docker_call(tool='star', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) else: docker_call(tool='starlong', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) output_files = defaultdict() for bam_file in ['rnaAligned.toTranscriptome.out.bam', 'rnaAligned.sortedByCoord.out.bam']: output_files[bam_file] = job.fileStore.writeGlobalFile('/'.join([ work_dir, bam_file])) return output_files
def run_star(job, fastqs, univ_options, star_options): """ This module uses STAR to align the RNA fastqs to the reference ARGUMENTS 1. fastqs: REFER RETURN VALUE of run_cutadapt() 2. univ_options: Dict of universal arguments used by almost all tools univ_options +- 'dockerhub': <dockerhub to use> 3. star_options: Dict of parameters specific to STAR star_options |- 'tool_index': <JSid for the STAR index tarball> +- 'n': <number of threads to allocate> RETURN VALUES 1. output_files: Dict of aligned bams output_files |- 'rnaAligned.toTranscriptome.out.bam': <JSid> +- 'rnaAligned.sortedByCoord.out.bam': Dict of genome bam + bai |- 'rna_fix_pg_sorted.bam': <JSid> +- 'rna_fix_pg_sorted.bam.bai': <JSid> This module corresponds to node 9 on the tree """ assert star_options['type'] in ('star', 'starlong') job.fileStore.logToMaster('Running STAR on %s' % univ_options['patient']) work_dir = os.getcwd() input_files = { 'rna_cutadapt_1.fastq': fastqs[0], 'rna_cutadapt_2.fastq': fastqs[1], 'star_index.tar.gz': star_options['tool_index'] } input_files = get_files_from_filestore(job, input_files, work_dir, docker=False) # Handle gzipped file gz = '.gz' if is_gzipfile(input_files['rna_cutadapt_1.fastq']) else '' if gz: for read_file in 'rna_cutadapt_1.fastq', 'rna_cutadapt_2.fastq': os.symlink(read_file, read_file + gz) input_files[read_file + gz] = input_files[read_file] + gz # Untar the index input_files['star_index'] = untargz(input_files['star_index.tar.gz'], work_dir) input_files = {key: docker_path(path) for key, path in input_files.items()} parameters = [ '--runThreadN', str(star_options['n']), '--genomeDir', input_files['star_index'], '--outFileNamePrefix', 'rna', '--readFilesIn', input_files['rna_cutadapt_1.fastq' + gz], input_files['rna_cutadapt_2.fastq' + gz], '--outSAMattributes', 'NH', 'HI', 'AS', 'NM', 'MD', '--outSAMtype', 'BAM', 'SortedByCoordinate', '--quantMode', 'TranscriptomeSAM' ] if gz: parameters.extend(['--readFilesCommand', 'zcat']) if star_options['type'] == 'star': docker_call(tool='star', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) else: docker_call(tool='starlong', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub']) output_files = defaultdict() for bam_file in [ 'rnaAligned.toTranscriptome.out.bam', 'rnaAligned.sortedByCoord.out.bam' ]: output_files[bam_file] = job.fileStore.writeGlobalFile('/'.join( [work_dir, bam_file])) return output_files