Exemplo n.º 1
0
def run_pileup(job, tumor_bam, univ_options, somaticsniper_options):
    """
    Runs a samtools pileup on the tumor bam.

    :param toil.Job job: job
    :param dict tumor_bam: Tumor bam file
    :param dict univ_options: Universal Options
    :returns: jsID for the chromsome pileup file
    :rtype: str
    """
    job.fileStore.logToMaster(
        'Running samtools pileup on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': somaticsniper_options['genome_fasta'],
        'genome.fa.fai.tar.gz': somaticsniper_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['pileup',
                  '-cvi',
                  '-f', docker_path(input_files['genome.fa']),
                  docker_path(input_files['tumor.bam'])]

    with open(os.path.join(work_dir, 'pileup.txt'), 'w') as pileup_file:
        docker_call(tool='samtools:0.1.8', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=pileup_file)
    outfile = job.fileStore.writeGlobalFile(pileup_file.name)
    return outfile
Exemplo n.º 2
0
def add_readgroups(job, bamfile, sample_type, univ_options, picard_options):
    """
    Add read groups to the bam.

    :param dict bamfile: The input bam file
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict picard_options: Options specific to picard
    :return: fsID for the output bam
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        sample_type + '.bam': bamfile}
    get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = ['AddOrReplaceReadGroups',
                  'CREATE_INDEX=false',
                  'I=/data/' + sample_type + '.bam',
                  'O=/data/' + sample_type + '_reheader.bam',
                  'SO=coordinate',
                  'ID=1',
                  ''.join(['LB=', univ_options['patient']]),
                  'PL=ILLUMINA',
                  'PU=12345',
                  ''.join(['SM=', sample_type.rstrip('_dna')])]
    docker_call(tool='picard', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'], java_xmx=univ_options['java_Xmx'],
                tool_version=picard_options['version'])
    output_file = job.fileStore.writeGlobalFile(
        '/'.join([work_dir, sample_type + '_reheader.bam']))
    # Delete the old bam file
    job.fileStore.deleteGlobalFile(bamfile)
    job.fileStore.logToMaster('Ran add_read_groups on %s:%s successfully'
                              % (univ_options['patient'], sample_type))
    return output_file
Exemplo n.º 3
0
def mark_duplicates(job, bamfile, sample_type, univ_options, picard_options):
    """
    Mark duplicates within the bam.

    :param dict bamfile: The input bam file
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict picard_options: Options specific to picard
    :return: fsID for the output bam
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        sample_type + '.bam': bamfile}
    get_files_from_filestore(job, input_files, work_dir, docker=True)

    parameters = ['MarkDuplicates',
                  'I=/data/' + sample_type + '.bam',
                  'O=/data/' + sample_type + '_mkdup.bam',
                  'M=/data/' + sample_type + '_mkdup.metrics',
                  'AS=true',
                  'CREATE_INDEX=true']

    docker_call(tool='picard', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'], java_xmx=univ_options['java_Xmx'],
                tool_version=picard_options['version'])
    output_file = job.fileStore.writeGlobalFile(
        '/'.join([work_dir, sample_type + '_mkdup.bam']))
    # Delete the old bam file
    job.fileStore.deleteGlobalFile(bamfile)
    job.fileStore.logToMaster('Ran mark_duplicates on %s:%s successfully'
                              % (univ_options['patient'], sample_type))
    return output_file
Exemplo n.º 4
0
def run_muse_sump_perchrom(job, muse_output, univ_options, muse_options, chrom):
    """
    This module will run muse sump on the muse output
    """
    job.fileStore.logToMaster('Running muse sump on %s:%s' % (univ_options['patient'], chrom))
    work_dir = os.getcwd()
    input_files = {
        'MuSE.txt': muse_output,
        'dbsnp_coding.vcf.gz': muse_options['dbsnp_vcf'],
        'dbsnp_coding.vcf.gz.tbi.tmp': muse_options['dbsnp_tbi']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    tbi = os.path.splitext(input_files['dbsnp_coding.vcf.gz.tbi.tmp'])[0]
    print({x: os.stat(x) for x in os.listdir(work_dir)}, file=sys.stderr)
    time.sleep(2)
    shutil.copy(input_files['dbsnp_coding.vcf.gz.tbi.tmp'], tbi)
    os.chmod(tbi, 0777)
    open(tbi, 'a').close()
    input_files = {key: docker_path(path) for key, path in input_files.items()}
    print({x: os.stat(x) for x in os.listdir(work_dir)}, file=sys.stderr)
    output_file = ''.join([work_dir, '/', chrom, '.vcf'])

    parameters = ['sump',
                  '-I', input_files['MuSE.txt'],
                  '-O', docker_path(output_file),
                  '-D', input_files['dbsnp_coding.vcf.gz'],
                  '-E']

    docker_call(tool='muse', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    export_results(job, output_file, univ_options, subfolder='mutations/muse')
    outfile = job.fileStore.writeGlobalFile(output_file)
    return outfile
Exemplo n.º 5
0
def run_pileup(job, tumor_bam, univ_options, somaticsniper_options):
    """
    Runs a samtools pileup on the tumor bam.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict somaticsniper_options: Options specific to SomaticSniper
    :return: fsID for the pileup file
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': somaticsniper_options['genome_fasta'],
        'genome.fa.fai.tar.gz': somaticsniper_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in list(input_files.items())}

    parameters = ['pileup',
                  '-cvi',
                  '-f', docker_path(input_files['genome.fa']),
                  docker_path(input_files['tumor.bam'])]

    with open(os.path.join(work_dir, 'pileup.txt'), 'w') as pileup_file:
        docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=pileup_file,
                    tool_version=somaticsniper_options['samtools']['version'])
    outfile = job.fileStore.writeGlobalFile(pileup_file.name)
    job.fileStore.logToMaster('Ran samtools pileup on %s successfully' % univ_options['patient'])
    return outfile
Exemplo n.º 6
0
def bam_conversion(job, samfile, sample_type, univ_options):
    """
    This module converts SAMFILE from sam to bam

    ARGUMENTS
    1. samfile: <JSid for a sam file>
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    RETURN VALUES
    1. output_files: REFER output_files in run_bwa()
    """
    job.fileStore.logToMaster('Running sam2bam on %s:%s' %
                              (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {sample_type + '_aligned.sam': samfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    bamfile = '/'.join([work_dir, sample_type + '_aligned.bam'])
    parameters = [
        'view', '-bS', '-o',
        docker_path(bamfile), input_files[sample_type + '_aligned.sam']
    ]
    docker_call(tool='samtools',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_file = job.fileStore.writeGlobalFile(bamfile)
    # The samfile is no longer useful so delete it
    job.fileStore.deleteGlobalFile(samfile)
    return output_file
Exemplo n.º 7
0
def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
    """
    Run PHLAT on a pair of input fastqs of type `sample_type`.

    :param list fastqs: List of input fastq files
    :param str sample_type: Description of the sample type to inject into the file name.
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict phlat_options: Options specific to PHLAT
    :return: fsID for the HLA haplotype called from teh input fastqs
    :rtype: toil.fileStore.FileID
    """
    job.fileStore.logToMaster('Running phlat on %s:%s' %
                              (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {
        'input_1.fastq': fastqs[0],
        'input_2.fastq': fastqs[1],
        'phlat_index.tar.gz': phlat_options['index']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)
    # Handle gzipped files
    gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else ''
    if gz:
        for read_file in 'input_1.fastq', 'input_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    # Untar the index
    input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'],
                                         work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = [
        '-1',
        input_files['input_1.fastq' + gz],
        '-2',
        input_files['input_2.fastq' + gz],
        '-index',
        input_files['phlat_index'],
        '-b2url',
        '/usr/local/bin/bowtie2',
        '-tag',
        sample_type,
        '-e',
        '/home/phlat-1.0',  # Phlat directory home
        '-o',
        '/data',  # Output directory
        '-p',
        str(phlat_options['n'])
    ]  # Number of threads
    docker_call(tool='phlat',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=phlat_options['version'])
    output_file = job.fileStore.writeGlobalFile(''.join(
        [work_dir, '/', sample_type, '_HLA.sum']))
    return output_file
Exemplo n.º 8
0
def index_bamfile(job,
                  bamfile,
                  sample_type,
                  univ_options,
                  samtools_options,
                  sample_info=None,
                  export=True):
    """
    Index `bamfile` using samtools

    :param toil.fileStore.FileID bamfile: fsID for the bam file
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict samtools_options: Options specific to samtools
    :param str sample_info: Information regarding the sample that will beinjected into the filename
               as `sample_type`_`sample_info`.bam(.bai)
    :param bool export: Should the bam and bai be exported to the output directory?
    :return: Dict containing input bam and the generated index (.bam.bai)
             output_files:
                 |- '<sample_type>(_<sample_info>).bam': fsID
                 +- '<sample_type>(_<sample_info>).bam.bai': fsID
    :rtype: dict
    """
    work_dir = os.getcwd()
    in_bamfile = sample_type
    if sample_info is not None:
        assert isinstance(sample_info, str)
        in_bamfile = '_'.join([in_bamfile, sample_info])
    in_bamfile += '.bam'
    input_files = {in_bamfile: bamfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    parameters = ['index', input_files[in_bamfile]]
    docker_call(tool='samtools',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=samtools_options['version'])
    out_bai = '/'.join([work_dir, in_bamfile + '.bai'])
    output_files = {
        in_bamfile: bamfile,
        in_bamfile + '.bai': job.fileStore.writeGlobalFile(out_bai)
    }
    if export:
        export_results(job,
                       bamfile,
                       os.path.splitext(out_bai)[0],
                       univ_options,
                       subfolder='alignments')
        export_results(job,
                       output_files[in_bamfile + '.bai'],
                       out_bai,
                       univ_options,
                       subfolder='alignments')

    job.fileStore.logToMaster('Ran samtools-index on %s:%s successfully' %
                              (univ_options['patient'], sample_type))
    return output_files
Exemplo n.º 9
0
def index_bamfile(job, bamfile, sample_type, univ_options):
    """
    This module indexes BAMFILE
    ARGUMENTS
    1. bamfile: <JSid for a bam file>
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    RETURN VALUES
    1. output_files: REFER output_files in run_bwa(). This module is the one is
                     the one that generates the files.
    """
    job.fileStore.logToMaster('Running samtools-index on %s:%s' % (univ_options['patient'],
                                                                   sample_type))
    work_dir = os.getcwd()
    in_bamfile = '_'.join([sample_type, 'fix_pg_sorted.bam'])
    input_files = {
        in_bamfile: bamfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = ['index',
                  input_files[in_bamfile]]
    docker_call(tool='samtools', tool_parameters=parameters,
                work_dir=work_dir, dockerhub=univ_options['dockerhub'])
    out_bai = '/'.join([work_dir, in_bamfile + '.bai'])
    output_files = {in_bamfile: bamfile,
                    in_bamfile + '.bai': job.fileStore.writeGlobalFile(out_bai)}
    export_results(job, os.path.splitext(out_bai)[0], univ_options, subfolder='alignments')
    export_results(job, out_bai, univ_options, subfolder='alignments')
    return output_files
Exemplo n.º 10
0
def predict_mhci_binding(job, peptfile, allele, peplen, univ_options, mhci_options):
    """
    Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhci binding
    prediction tool.

    :param toil.fileStore.FileID peptfile: The input peptide fasta
    :param str allele: Allele to predict binding against
    :param str peplen: Length of peptides to process
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict mhci_options: Options specific to mhci binding prediction
    :return: fsID for file containing the predictions
    :rtype: toil.fileStore.FileID
    """
    job.fileStore.logToMaster('Running mhci on %s:%s:%s' % (univ_options['patient'], allele,
                                                            peplen))
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa'))
    if not peptides:
        return job.fileStore.writeGlobalFile(job.fileStore.getLocalTempFile())
    parameters = [mhci_options['pred'],
                  allele,
                  peplen,
                  input_files['peptfile.faa']]
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhci', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True,
                    tool_version=mhci_options['version'])
    output_file = job.fileStore.writeGlobalFile(predfile.name)
    return output_file
Exemplo n.º 11
0
def bam_conversion(job, samfile, sample_type, univ_options):
    """
    This module converts SAMFILE from sam to bam

    ARGUMENTS
    1. samfile: <JSid for a sam file>
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    RETURN VALUES
    1. output_files: REFER output_files in run_bwa()
    """
    job.fileStore.logToMaster('Running sam2bam on %s:%s' % (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {
        sample_type + '_aligned.sam': samfile}
    input_files = get_files_from_filestore(job, input_files, work_dir,
                                           docker=True)
    bamfile = '/'.join([work_dir, sample_type + '_aligned.bam'])
    parameters = ['view',
                  '-bS',
                  '-o', docker_path(bamfile),
                  input_files[sample_type + '_aligned.sam']
                  ]
    docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_file = job.fileStore.writeGlobalFile(bamfile)
    # The samfile is no longer useful so delete it
    job.fileStore.deleteGlobalFile(samfile)
    return output_file
Exemplo n.º 12
0
def bam_conversion(job, samfile, sample_type, univ_options, samtools_options):
    """
    Convert a sam to a bam.

    :param dict samfile: The input sam file
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict samtools_options: Options specific to samtools
    :return: fsID for the generated bam
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        sample_type + '.sam': samfile}
    input_files = get_files_from_filestore(job, input_files, work_dir,
                                           docker=True)
    bamfile = '/'.join([work_dir, sample_type + '.bam'])
    parameters = ['view',
                  '-bS',
                  '-o', docker_path(bamfile),
                  input_files[sample_type + '.sam']
                  ]
    docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'], tool_version=samtools_options['version'])
    output_file = job.fileStore.writeGlobalFile(bamfile)
    # The samfile is no longer useful so delete it
    job.fileStore.deleteGlobalFile(samfile)
    job.fileStore.logToMaster('Ran sam2bam on %s:%s successfully'
                              % (univ_options['patient'], sample_type))
    return output_file
Exemplo n.º 13
0
def predict_netmhcii_binding(job, peptfile, allele, univ_options):
    """
    This module will predict MHC:peptide binding for peptides in the files created in node YY to
    ALLELE.  ALLELE represents an MHCII allele.

    This module corresponds to node 19 on the tree
    """
    job.fileStore.logToMaster('Running netmhciipan on %s' % allele)
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    # netMHCIIpan accepts differently formatted alleles so we need to modify the input alleles
    if allele.startswith('HLA-DQA') or allele.startswith('HLA-DPA'):
        allele = re.sub(r'[*:]', '', allele)
        allele = re.sub(r'/', '-', allele)
    elif allele.startswith('HLA-DRB'):
        allele = re.sub(r':', '', allele)
        allele = re.sub(r'\*', '_', allele)
        allele = allele.lstrip('HLA-')
    else:
        raise RuntimeError('Unknown allele seen')
    parameters = ['-a', allele,
                  '-xls', '1',
                  '-xlsfile', 'predictions.tsv',
                  '-f', input_files['peptfile.faa']]
    # netMHC writes a lot of useless stuff to sys.stdout so we open /dev/null and dump output there.
    with open(os.devnull, 'w') as output_catcher:
        docker_call(tool='netmhciipan:final', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=output_catcher)
    output_file = job.fileStore.writeGlobalFile('/'.join([work_dir, 'predictions.tsv']))
    return output_file, 'netMHCIIpan'
Exemplo n.º 14
0
def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
    """
    Run snpeff on an input vcf.

    :param toil.fileStore.FileID merged_mutation_file: fsID for input vcf
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict snpeff_options: Options specific to snpeff
    :return: fsID for the snpeffed vcf
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'merged_mutations.vcf': merged_mutation_file,
        'snpeff_index.tar.gz': snpeff_options['index']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)
    input_files['snpeff_index'] = untargz(input_files['snpeff_index.tar.gz'],
                                          work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = [
        'eff',
        '-dataDir',
        input_files['snpeff_index'],
        '-c',
        '/'.join([
            input_files['snpeff_index'],
            'snpEff_' + univ_options['ref'] + '_gencode.config'
        ]),
        '-no-intergenic',
        '-no-downstream',
        '-no-upstream',
        # '-canon',
        '-noStats',
        univ_options['ref'] + '_gencode',
        input_files['merged_mutations.vcf']
    ]
    xmx = snpeff_options['java_Xmx'] if snpeff_options[
        'java_Xmx'] else univ_options['java_Xmx']
    with open('/'.join([work_dir, 'mutations.vcf']), 'w') as snpeff_file:
        docker_call(tool='snpeff',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    java_xmx=xmx,
                    outfile=snpeff_file,
                    tool_version=snpeff_options['version'])
    output_file = job.fileStore.writeGlobalFile(snpeff_file.name)
    export_results(job,
                   output_file,
                   snpeff_file.name,
                   univ_options,
                   subfolder='mutations/snpeffed')

    job.fileStore.logToMaster('Ran snpeff on %s successfully' %
                              univ_options['patient'])
    return output_file
Exemplo n.º 15
0
def predict_mhci_binding(job, peptfile, allele, peplen, univ_options,
                         mhci_options):
    """
    This module will predict MHC:peptide binding for peptides in the files created in node XX to
    ALLELE.  ALLELE represents an MHCI allele.

    This module corresponds to node 18 on the tree
    """
    job.fileStore.logToMaster('Running mhci on %s:%s:%s' %
                              (univ_options['patient'], allele, peplen))
    work_dir = os.getcwd()
    input_files = {'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    parameters = [
        mhci_options['pred'], allele, peplen, input_files['peptfile.faa']
    ]
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhci',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    outfile=predfile,
                    interactive=True)
    output_file = job.fileStore.writeGlobalFile(predfile.name)
    return output_file
Exemplo n.º 16
0
def sort_bamfile(job, bamfile, sample_type, univ_options, samtools_options):
    """
    Sort `bamfile` using samtools

    :param toil.fileStore.FileID bamfile: fsID for the bam file
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict samtools_options: Options specific to samtools
    :return: fsID for the sorted bamfile
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    in_bamfile = ''.join([sample_type, '.bam'])
    out_bamfile = '_'.join([sample_type, 'sorted.bam'])
    input_files = {in_bamfile: bamfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    parameters = [
        'sort', '-o',
        docker_path(out_bamfile), '-O', 'bam', '-T', 'temp_sorted', '-@',
        str(samtools_options['n']), input_files[in_bamfile]
    ]
    docker_call(tool='samtools',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=samtools_options['version'])
    job.fileStore.deleteGlobalFile(bamfile)
    job.fileStore.logToMaster('Ran samtools-sort on %s:%s successfully' %
                              (univ_options['patient'], sample_type))
    return job.fileStore.writeGlobalFile(out_bamfile)
Exemplo n.º 17
0
def run_filter_radia(job, bams, radia_file, univ_options, radia_options, chrom):
    """
    This module will run filterradia on the RNA and DNA bams.

    ARGUMENTS
    1. bams: REFER ARGUMENTS of run_radia()
    2. univ_options: REFER ARGUMENTS of run_radia()
    3. radia_file: <JSid of vcf generated by run_radia()>
    3. radia_options: REFER ARGUMENTS of run_radia()
    4. chrom: REFER ARGUMENTS of run_radia()

    RETURN VALUES
    1. output_file: <JSid of radia_filtered_CHROM.vcf>
    """
    job.fileStore.logToMaster('Running filter-radia on %s:%s' % (univ_options['patient'], chrom))
    work_dir = os.getcwd()
    input_files = {
        'rna.bam': bams['tumor_rna'],
        'rna.bam.bai': bams['tumor_rnai'],
        'tumor.bam': bams['tumor_dna'],
        'tumor.bam.bai': bams['tumor_dnai'],
        'normal.bam': bams['normal_dna'],
        'normal.bam.bai': bams['normal_dnai'],
        'radia.vcf': radia_file,
        'genome.fa.tar.gz': radia_options['genome_fasta'],
        'genome.fa.fai.tar.gz': radia_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    filterradia_log = ''.join([work_dir, '/radia_filtered_', chrom, '_radia.log'])
    parameters = [univ_options['patient'],  # shortID
                  chrom.lstrip('chr'),
                  input_files['radia.vcf'],
                  '/data',
                  '/home/radia/scripts',
                  '-d', '/home/radia/data/hg19/snp135',
                  '-r', '/home/radia/data/hg19/retroGenes/',
                  '-p', '/home/radia/data/hg19/pseudoGenes/',
                  '-c', '/home/radia/data/hg19/cosmic/',
                  '-t', '/home/radia/data/hg19/gaf/2_1',
                  '--noSnpEff',
                  '--noBlacklist',
                  '--noTargets',
                  '--noRnaBlacklist',
                  '-f', input_files['genome.fa'],
                  '--log=INFO',
                  '-g', docker_path(filterradia_log)]
    docker_call(tool='filterradia', tool_parameters=parameters,
                work_dir=work_dir, dockerhub=univ_options['dockerhub'])
    output_file = ''.join([work_dir, '/', chrom, '.vcf'])
    os.rename(''.join([work_dir, '/', univ_options['patient'], '_', chrom, '.vcf']), output_file)
    export_results(job, output_file, univ_options, subfolder='mutations/radia')
    output_file = job.fileStore.writeGlobalFile(output_file)
    return output_file
Exemplo n.º 18
0
def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options, chrom):
    """
    Run MuTect call on a single chromosome in the input bams.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict mutect_options: Options specific to MuTect
    :param str chrom: Chromosome to process
    :return: fsID for the chromsome vcf
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': mutect_options['genome_fasta'],
        'genome.fa.fai.tar.gz': mutect_options['genome_fai'],
        'genome.dict.tar.gz': mutect_options['genome_dict'],
        'cosmic.vcf.tar.gz': mutect_options['cosmic_vcf'],
        'cosmic.vcf.idx.tar.gz': mutect_options['cosmic_idx'],
        'dbsnp.vcf.gz': mutect_options['dbsnp_vcf'],
        'dbsnp.vcf.idx.tar.gz': mutect_options['dbsnp_idx']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # dbsnp.vcf should be bgzipped, but all others should be tar.gz'd
    input_files['dbsnp.vcf'] = gunzip(input_files['dbsnp.vcf.gz'])
    for key in ('genome.fa', 'genome.fa.fai', 'genome.dict', 'cosmic.vcf', 'cosmic.vcf.idx',
                'dbsnp.vcf.idx'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    mutout = ''.join([work_dir, '/', chrom, '.out'])
    mutvcf = ''.join([work_dir, '/', chrom, '.vcf'])
    parameters = ['-R', input_files['genome.fa'],
                  '--cosmic', input_files['cosmic.vcf'],
                  '--dbsnp', input_files['dbsnp.vcf'],
                  '--input_file:normal', input_files['normal.bam'],
                  '--input_file:tumor', input_files['tumor.bam'],
                  # '--tumor_lod', str(10),
                  # '--initial_tumor_lod', str(4.0),
                  '-L', chrom,
                  '--out', docker_path(mutout),
                  '--vcf', docker_path(mutvcf)
                  ]
    java_xmx = mutect_options['java_Xmx'] if mutect_options['java_Xmx'] \
        else univ_options['java_Xmx']
    docker_call(tool='mutect', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'], java_xmx=java_xmx,
                tool_version=mutect_options['version'])
    output_file = job.fileStore.writeGlobalFile(mutvcf)
    export_results(job, output_file, mutvcf, univ_options, subfolder='mutations/mutect')

    job.fileStore.logToMaster('Ran MuTect on %s:%s successfully' % (univ_options['patient'], chrom))
    return output_file
Exemplo n.º 19
0
def run_strelka_full(job, tumor_bam, normal_bam, univ_options,
                     strelka_options):
    """
    Run strelka on the DNA bams.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict strelka_options: Options specific to strelka
    :return: Dict of fsIDs snv and indel prediction files
             output_dict:
                 |-'snvs': fsID
                 +-'indels': fsID
    :rtype: dict
    """
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': strelka_options['genome_fasta'],
        'genome.fa.fai.tar.gz': strelka_options['genome_fai'],
        'config.ini.tar.gz': strelka_options['config_file']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)

    for key in ('genome.fa', 'genome.fa.fai', 'config.ini'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {
        key: docker_path(path)
        for key, path in list(input_files.items())
    }

    parameters = [
        input_files['config.ini'], input_files['tumor.bam'],
        input_files['normal.bam'], input_files['genome.fa'],
        str(job.cores)
    ]
    docker_call(tool='strelka',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=strelka_options['version'])
    output_dict = {}
    for mutation_type in ['snvs', 'indels']:
        output_dict[mutation_type] = job.fileStore.writeGlobalFile(
            os.path.join(work_dir, 'strelka_out', 'results',
                         'passed.somatic.' + mutation_type + '.vcf'))
    job.fileStore.logToMaster('Ran strelka on %s successfully' %
                              univ_options['patient'])
    return output_dict
Exemplo n.º 20
0
def run_bwa(job, fastqs, sample_type, univ_options, bwa_options):
    """
    Align a pair of fastqs with bwa.

    :param list fastqs: The input fastqs for alignment
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict bwa_options: Options specific to bwa
    :return: fsID for the generated sam
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'dna_1.fastq': fastqs[0],
        'dna_2.fastq': fastqs[1],
        'bwa_index.tar.gz': bwa_options['index']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)
    # Handle gzipped file
    gz = '.gz' if is_gzipfile(input_files['dna_1.fastq']) else ''
    if gz:
        for read_file in 'dna_1.fastq', 'dna_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    # Untar the index
    input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'],
                                       work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = [
        'mem',
        '-t',
        str(bwa_options['n']),
        '-v',
        '1',  # Don't print INFO messages to the stderr
        '/'.join([input_files['bwa_index'], univ_options['ref']]),
        input_files['dna_1.fastq' + gz],
        input_files['dna_2.fastq' + gz]
    ]
    with open(''.join([work_dir, '/', sample_type, '.sam']), 'w') as samfile:
        docker_call(tool='bwa',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    outfile=samfile,
                    tool_version=bwa_options['version'])
    # samfile.name retains the path info
    output_file = job.fileStore.writeGlobalFile(samfile.name)

    job.fileStore.logToMaster('Ran bwa on %s:%s successfully' %
                              (univ_options['patient'], sample_type))
    return output_file
Exemplo n.º 21
0
def run_bwa(job, fastqs, sample_type, univ_options, bwa_options):
    """
    This module aligns the SAMPLE_TYPE dna fastqs to the reference

    ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor'/'normal'
    1. fastqs: Dict of list of input WGS/WXS fastqs
         fastqs
              +- '<ST>_dna': [<JSid for 1.fastq> , <JSid for 2.fastq>]
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. bwa_options: Dict of parameters specific to bwa
         bwa_options
              |- 'tool_index': <JSid for the bwa index tarball>
              +- 'n': <number of threads to allocate>

    RETURN VALUES
    1. output_files: Dict of aligned bam + reference (nested return)
         output_files
             |- '<ST>_fix_pg_sorted.bam': <JSid>
             +- '<ST>_fix_pg_sorted.bam.bai': <JSid>

    This module corresponds to nodes 3 and 4 on the tree
    """
    job.fileStore.logToMaster('Running bwa on %s:%s' % (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {
        'dna_1.fastq': fastqs[0],
        'dna_2.fastq': fastqs[1],
        'bwa_index.tar.gz': bwa_options['tool_index']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # Handle gzipped file
    gz = '.gz' if is_gzipfile(input_files['dna_1.fastq']) else ''
    if gz:
        for read_file in 'dna_1.fastq', 'dna_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    # Untar the index
    input_files['bwa_index'] = untargz(input_files['bwa_index.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['mem',
                  '-t', str(bwa_options['n']),
                  '-v', '1',  # Don't print INFO messages to the stderr
                  '/'.join([input_files['bwa_index'], 'hg19']),
                  input_files['dna_1.fastq' + gz],
                  input_files['dna_2.fastq' + gz]]
    with open(''.join([work_dir, '/', sample_type, '_aligned.sam']), 'w') as samfile:
        docker_call(tool='bwa', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=samfile)
    # samfile.name retains the path info
    output_file = job.fileStore.writeGlobalFile(samfile.name)
    return output_file
Exemplo n.º 22
0
def run_rsem(job, rna_bam, univ_options, rsem_options):
    """
    Run rsem on the input RNA bam.

    ARGUMENTS
    :param toil.fileStore.FileID rna_bam: fsID of a transcriptome bam generated by STAR
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict rsem_options: Options specific to rsem
    :return: Dict of gene- and isoform-level expression calls
             output_files:
                 |- 'rsem.genes.results': fsID
                 +- 'rsem.isoforms.results': fsID
    :rtype: dict
    """
    work_dir = os.getcwd()
    input_files = {
        'star_transcriptome.bam': rna_bam,
        'rsem_index.tar.gz': rsem_options['index']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)

    input_files['rsem_index'] = untargz(input_files['rsem_index.tar.gz'],
                                        work_dir)
    input_files = {
        key: docker_path(path)
        for key, path in list(input_files.items())
    }

    parameters = [
        '--paired-end', '-p',
        str(20), '--bam', input_files['star_transcriptome.bam'],
        '--no-bam-output',
        '/'.join([input_files['rsem_index'], univ_options['ref']]), 'rsem'
    ]
    docker_call(tool='rsem',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=rsem_options['version'])
    output_files = {}
    for filename in ('rsem.genes.results', 'rsem.isoforms.results'):
        output_files[filename] = job.fileStore.writeGlobalFile('/'.join(
            [work_dir, filename]))
        export_results(job,
                       output_files[filename],
                       '/'.join([work_dir, filename]),
                       univ_options,
                       subfolder='expression')
    job.fileStore.logToMaster('Ran rsem on %s successfully' %
                              univ_options['patient'])
    return output_files
Exemplo n.º 23
0
def run_rsem(job, rna_bam, univ_options, rsem_options):
    """
    This module will run rsem on the RNA Bam file.

    ARGUMENTS
    1. rna_bam: <JSid of rnaAligned.toTranscriptome.out.bam>
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    3. rsem_options: Dict of parameters specific to rsem
         rsem_options
              |- 'tool_index': <JSid for the rsem index tarball>
              +- 'n': <number of threads to allocate>

    RETURN VALUES
    1. output_file: <Jsid of rsem.isoforms.results>

    This module corresponds to node 9 on the tree
    """
    job.fileStore.logToMaster('Running rsem on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'star_transcriptome.bam': rna_bam,
        'rsem_index.tar.gz': rsem_options['tool_index']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)

    input_files['rsem_index'] = untargz(input_files['rsem_index.tar.gz'],
                                        work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = [
        '--paired-end', '-p',
        str(rsem_options['n']), '--bam', input_files['star_transcriptome.bam'],
        '--no-bam-output', '/'.join([input_files['rsem_index'],
                                     'hg19']), 'rsem'
    ]
    docker_call(tool='rsem',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_files = {}
    for filename in ('rsem.genes.results', 'rsem.isoforms.results'):
        output_files[filename] = job.fileStore.writeGlobalFile('/'.join(
            [work_dir, filename]))
        export_results(job,
                       output_files[filename],
                       '/'.join([work_dir, filename]),
                       univ_options,
                       subfolder='expression')
    return output_files
Exemplo n.º 24
0
def fix_bam_header(job, bamfile, sample_type, univ_options):
    """
    This module modified the header in BAMFILE

    ARGUMENTS
    1. bamfile: <JSid for a bam file>
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    RETURN VALUES
    1. output_files: REFER output_files in run_bwa()
    """
    job.fileStore.logToMaster('Running reheader on %s:%s' %
                              (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {sample_type + '_aligned.bam': bamfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    parameters = ['view', '-H', input_files[sample_type + '_aligned.bam']]
    with open('/'.join([work_dir, sample_type + '_aligned_bam.header']),
              'w') as headerfile:
        docker_call(tool='samtools',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    outfile=headerfile)
    with open(headerfile.name, 'r') as headerfile, \
            open('/'.join([work_dir, sample_type + '_output_bam.header']), 'w') as outheaderfile:
        for line in headerfile:
            if line.startswith('@PG'):
                line = '\t'.join([
                    x for x in line.strip().split('\t')
                    if not x.startswith('CL')
                ])
            print(line.strip(), file=outheaderfile)
    parameters = [
        'reheader',
        docker_path(outheaderfile.name),
        input_files[sample_type + '_aligned.bam']
    ]
    with open('/'.join([work_dir, sample_type + '_aligned_fixPG.bam']),
              'w') as fixpg_bamfile:
        docker_call(tool='samtools',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    outfile=fixpg_bamfile)
    output_file = job.fileStore.writeGlobalFile(fixpg_bamfile.name)
    # The old bam file is now useless.
    job.fileStore.deleteGlobalFile(bamfile)
    return output_file
Exemplo n.º 25
0
def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options):
    """
    This module will run transgene on the input vcf file from the aggregator and produce the
    peptides for MHC prediction

    ARGUMENTS
    1. snpeffed_file: <JSid for snpeffed vcf>
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    3. transgene_options: Dict of parameters specific to transgene
         transgene_options
                +- 'gencode_peptide_fasta': <JSid for the gencode protein fasta>

    RETURN VALUES
    1. output_files: Dict of transgened n-mer peptide fastas
         output_files
                |- 'transgened_tumor_9_mer_snpeffed.faa': <JSid>
                |- 'transgened_tumor_10_mer_snpeffed.faa': <JSid>
                +- 'transgened_tumor_15_mer_snpeffed.faa': <JSid>

    This module corresponds to node 17 on the tree
    """
    job.fileStore.logToMaster('Running transgene on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    rna_bam_key = 'rnaAligned.sortedByCoord.out.bam'  # to reduce next line size
    input_files = {
        'snpeffed_muts.vcf': snpeffed_file,
        'rna.bam': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam'],
        'rna.bam.bai': rna_bam[rna_bam_key]['rna_fix_pg_sorted.bam.bai'],
        'pepts.fa.tar.gz': transgene_options['gencode_peptide_fasta']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    input_files['pepts.fa'] = untargz(input_files['pepts.fa.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['--peptides', input_files['pepts.fa'],
                  '--snpeff', input_files['snpeffed_muts.vcf'],
                  '--rna_file', input_files['rna.bam'],
                  '--prefix', 'transgened',
                  '--pep_lens', '9,10,15']
    docker_call(tool='transgene', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_files = defaultdict()
    for peplen in ['9', '10', '15']:
        peptfile = '_'.join(['transgened_tumor', peplen, 'mer_snpeffed.faa'])
        mapfile = '_'.join(['transgened_tumor', peplen, 'mer_snpeffed.faa.map'])
        export_results(job, peptfile, univ_options, subfolder='peptides')
        export_results(job, mapfile, univ_options, subfolder='peptides')
        output_files[peptfile] = job.fileStore.writeGlobalFile(os.path.join(work_dir, peptfile))
        output_files[mapfile] = job.fileStore.writeGlobalFile(os.path.join(work_dir, mapfile))
    os.rename('transgened_transgened.vcf', 'mutations.vcf')
    export_results(job, 'mutations.vcf', univ_options, subfolder='mutations/transgened')
    return output_files
Exemplo n.º 26
0
def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
    """
    This module will run PHLAT on SAMPLE_TYPE fastqs.

    ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor_dna',
                 'normal_dna', or 'tumor_rna'
    1. fastqs: Dict of list of input WGS/WXS fastqs
         fastqs
              +- '<ST>': [<JSid for 1.fastq> , <JSid for 2.fastq>]
    2. sample_type: string of 'tumor' or 'normal'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. phlat_options: Dict of parameters specific to phlat
         phlat_options
              |- 'tool_index': <JSid for the PHLAT index tarball>
              +- 'n': <number of threads to allocate>

    RETURN VALUES
    1. output_file: <JSid for the allele predictions for ST>

    This module corresponds to nodes 5, 6 and 7 on the tree
    """
    job.fileStore.logToMaster('Running phlat on %s:%s' % (univ_options['patient'], sample_type))
    print(phlat_options, file=sys.stderr)
    work_dir = os.getcwd()
    input_files = {
        'input_1.fastq': fastqs[0],
        'input_2.fastq': fastqs[1],
        'phlat_index.tar.gz': phlat_options['tool_index']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # Handle gzipped files
    gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else ''
    if gz:
        for read_file in 'input_1.fastq', 'input_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    # Untar the index
    input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['-1', input_files['input_1.fastq' + gz],
                  '-2', input_files['input_2.fastq' + gz],
                  '-index', input_files['phlat_index'],
                  '-b2url', '/usr/local/bin/bowtie2',
                  '-tag', sample_type,
                  '-e', '/home/phlat-1.0',  # Phlat directory home
                  '-o', '/data',  # Output directory
                  '-p', str(phlat_options['n'])]  # Number of threads
    docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_file = job.fileStore.writeGlobalFile(''.join([work_dir, '/', sample_type, '_HLA.sum']))
    return output_file
Exemplo n.º 27
0
def predict_mhcii_binding(job, peptfile, allele, univ_options, mhcii_options):
    """
    Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhcii binding
    prediction tool.

    :param toil.fileStore.FileID peptfile: The input peptide fasta
    :param str allele: Allele to predict binding against
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict mhcii_options: Options specific to mhcii binding prediction
    :return: tuple of fsID for file containing the predictions and the predictor used
    :rtype: tuple(toil.fileStore.FileID, str|None)
    """
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa'))
    parameters = [mhcii_options['pred'],
                  allele,
                  input_files['peptfile.faa']]
    if not peptides:
        return job.fileStore.writeGlobalFile(job.fileStore.getLocalTempFile()), None
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhcii', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True,
                    tool_version=mhcii_options['version'])
    run_netmhciipan = True
    predictor = None
    with open(predfile.name, 'r') as predfile:
        for line in predfile:
            if not line.startswith('HLA'):
                continue
            if line.strip().split('\t')[5] == 'NetMHCIIpan':
                break
            # If the predictor type is sturniolo then it needs to be processed differently
            elif line.strip().split('\t')[5] == 'Sturniolo':
                predictor = 'Sturniolo'
            else:
                predictor = 'Consensus'
            run_netmhciipan = False
            break
    if run_netmhciipan:
        netmhciipan = job.addChildJobFn(predict_netmhcii_binding, peptfile, allele, univ_options,
                                        mhcii_options['netmhciipan'], disk='100M', memory='100M',
                                        cores=1)
        job.fileStore.logToMaster('Ran mhcii on %s:%s successfully'
                                  % (univ_options['patient'], allele))
        return netmhciipan.rv()
    else:
        output_file = job.fileStore.writeGlobalFile(predfile.name)
        job.fileStore.logToMaster('Ran mhcii on %s:%s successfully'
                                  % (univ_options['patient'], allele))
        return output_file, predictor
Exemplo n.º 28
0
def run_phlat(job, fastqs, sample_type, univ_options, phlat_options):
    """
    This module will run PHLAT on SAMPLE_TYPE fastqs.

    ARGUMENTS -- <ST> depicts the sample type. Substitute with 'tumor_dna',
                 'normal_dna', or 'tumor_rna'
    1. fastqs: Dict of list of input WGS/WXS fastqs
         fastqs
              +- '<ST>': [<JSid for 1.fastq> , <JSid for 2.fastq>]
    2. sample_type: string of 'tumor' or 'normal'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    4. phlat_options: Dict of parameters specific to phlat
         phlat_options
              |- 'tool_index': <JSid for the PHLAT index tarball>
              +- 'n': <number of threads to allocate>

    RETURN VALUES
    1. output_file: <JSid for the allele predictions for ST>

    This module corresponds to nodes 5, 6 and 7 on the tree
    """
    job.fileStore.logToMaster('Running phlat on %s:%s' % (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {
        'input_1.fastq': fastqs[0],
        'input_2.fastq': fastqs[1],
        'phlat_index.tar.gz': phlat_options['tool_index']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # Handle gzipped files
    gz = '.gz' if is_gzipfile(input_files['input_1.fastq']) else ''
    if gz:
        for read_file in 'input_1.fastq', 'input_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    # Untar the index
    input_files['phlat_index'] = untargz(input_files['phlat_index.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['-1', input_files['input_1.fastq' + gz],
                  '-2', input_files['input_2.fastq' + gz],
                  '-index', input_files['phlat_index'],
                  '-b2url', '/usr/local/bin/bowtie2',
                  '-tag', sample_type,
                  '-e', '/home/phlat-1.0',  # Phlat directory home
                  '-o', '/data',  # Output directory
                  '-p', str(phlat_options['n'])]  # Number of threads
    docker_call(tool='phlat', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_file = job.fileStore.writeGlobalFile(''.join([work_dir, '/', sample_type, '_HLA.sum']))
    return output_file
Exemplo n.º 29
0
def run_cutadapt(job, fastqs, univ_options, cutadapt_options):
    """
    Runs cutadapt on the input RNA fastq files.

    :param list fastqs: List of fsIDs for input an RNA-Seq fastq pair
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict cutadapt_options: Options specific to cutadapt
    :return: List of fsIDs of cutadapted fastqs
    :rtype: list[toil.fileStore.FileID]
    """
    work_dir = os.getcwd()
    input_files = {'rna_1.fastq': fastqs[0], 'rna_2.fastq': fastqs[1]}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)
    # Handle gzipped file
    gz = '.gz' if is_gzipfile(input_files['rna_1.fastq']) else ''
    if gz:
        for read_file in 'rna_1.fastq', 'rna_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    input_files = {
        key: docker_path(path)
        for key, path in list(input_files.items())
    }
    parameters = [
        '-a',
        cutadapt_options['a'],  # Fwd read 3' adapter
        '-A',
        cutadapt_options['A'],  # Rev read 3' adapter
        '-m',
        '35',  # Minimum size of read
        '-o',
        docker_path('rna_cutadapt_1.fastq.gz'),  # Output for R1
        '-p',
        docker_path('rna_cutadapt_2.fastq.gz'),  # Output for R2
        input_files['rna_1.fastq' + gz],
        input_files['rna_2.fastq' + gz]
    ]
    docker_call(tool='cutadapt',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=cutadapt_options['version'])
    output_files = []
    for fastq_file in ['rna_cutadapt_1.fastq.gz', 'rna_cutadapt_2.fastq.gz']:
        output_files.append(
            job.fileStore.writeGlobalFile('/'.join([work_dir, fastq_file])))
    job.fileStore.logToMaster('Ran cutadapt on %s successfully' %
                              univ_options['patient'])
    return output_files
Exemplo n.º 30
0
def run_muse_sump_perchrom(job, muse_output, univ_options, muse_options,
                           chrom):
    """
    Run MuSE sump on the MuSE call generated vcf.

    :param toil.fileStore.FileID muse_output: vcf generated by MuSE call
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict muse_options: Options specific to MuSE
    :param str chrom: Chromosome to process
    :return: fsID for the chromsome vcf
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'MuSE.txt': muse_output,
        'dbsnp_coding.vcf.gz': muse_options['dbsnp_vcf'],
        'dbsnp_coding.vcf.gz.tbi.tmp': muse_options['dbsnp_tbi']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)
    tbi = os.path.splitext(input_files['dbsnp_coding.vcf.gz.tbi.tmp'])[0]
    time.sleep(2)
    shutil.copy(input_files['dbsnp_coding.vcf.gz.tbi.tmp'], tbi)
    os.chmod(tbi, 0777)
    open(tbi, 'a').close()
    input_files = {key: docker_path(path) for key, path in input_files.items()}
    output_file = ''.join([work_dir, '/', chrom, '.vcf'])

    parameters = [
        'sump', '-I', input_files['MuSE.txt'], '-O',
        docker_path(output_file), '-D', input_files['dbsnp_coding.vcf.gz'],
        '-E'
    ]

    docker_call(tool='muse',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=muse_options['version'])
    outfile = job.fileStore.writeGlobalFile(output_file)
    export_results(job,
                   outfile,
                   output_file,
                   univ_options,
                   subfolder='mutations/muse')

    job.fileStore.logToMaster('Ran MuSE sump on %s:%s successfully' %
                              (univ_options['patient'], chrom))
    return outfile
Exemplo n.º 31
0
def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options,
                      chrom):
    """
    This module will run muse on the DNA bams

    ARGUMENTS
    1. tumor_bam: REFER ARGUMENTS of spawn_muse()
    2. normal_bam: REFER ARGUMENTS of spawn_muse()
    3. univ_options: REFER ARGUMENTS of spawn_muse()
    4. muse_options: REFER ARGUMENTS of spawn_muse()
    5. chrom: String containing chromosome name with chr appended

    RETURN VALUES
    1. output_files: <JSid for CHROM.MuSe.txt>

    This module corresponds to node 12 on the tree
    """
    job.fileStore.logToMaster('Running muse on %s:%s' %
                              (univ_options['patient'], chrom))
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': muse_options['genome_fasta'],
        'genome.fa.fai.tar.gz': muse_options['genome_fai']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    output_prefix = os.path.join(work_dir, chrom)

    parameters = [
        'call', '-f', input_files['genome.fa'], '-r', chrom, '-O',
        docker_path(output_prefix), input_files['tumor.bam'],
        input_files['normal.bam']
    ]
    docker_call(tool='muse',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    outfile = job.fileStore.writeGlobalFile(''.join(
        [output_prefix, '.MuSE.txt']))
    return outfile
Exemplo n.º 32
0
def run_radia_perchrom(job, bams, univ_options, radia_options, chrom):
    """
    Run RADIA call on a single chromosome in the input bams.

    :param dict bams: Dict of bam and bai for tumor DNA-Seq, normal DNA-Seq and tumor RNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict radia_options: Options specific to RADIA
    :param str chrom: Chromosome to process
    :return: fsID for the chromsome vcf
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'rna.bam': bams['tumor_rna'],
        'rna.bam.bai': bams['tumor_rnai'],
        'tumor.bam': bams['tumor_dna'],
        'tumor.bam.bai': bams['tumor_dnai'],
        'normal.bam': bams['normal_dna'],
        'normal.bam.bai': bams['normal_dnai'],
        'genome.fa.tar.gz': radia_options['genome_fasta'],
        'genome.fa.fai.tar.gz': radia_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    radia_output = ''.join([work_dir, '/radia_', chrom, '.vcf'])
    radia_log = ''.join([work_dir, '/radia_', chrom, '_radia.log'])
    parameters = [univ_options['patient'],  # shortID
                  chrom,
                  '-n', input_files['normal.bam'],
                  '-t', input_files['tumor.bam'],
                  '-r', input_files['rna.bam'],
                  ''.join(['--rnaTumorFasta=', input_files['genome.fa']]),
                  '-f', input_files['genome.fa'],
                  '-o', docker_path(radia_output),
                  '-i', univ_options['ref'],
                  '-m', input_files['genome.fa'],
                  '-d', '*****@*****.**',
                  '-q', 'Illumina',
                  '--disease', 'CANCER',
                  '-l', 'INFO',
                  '-g', docker_path(radia_log)]
    docker_call(tool='radia', tool_parameters=parameters,
                work_dir=work_dir, dockerhub=univ_options['dockerhub'],
                tool_version=radia_options['version'])
    output_file = job.fileStore.writeGlobalFile(radia_output)

    job.fileStore.logToMaster('Ran radia on %s:%s successfully' % (univ_options['patient'], chrom))
    return output_file
Exemplo n.º 33
0
def run_strelka_full(job, tumor_bam, normal_bam, univ_options,
                     strelka_options):
    """
    This module will run strelka on the DNA bams.

    ARGUMENTS
    :param dict tumor_bam: REFER ARGUMENTS of spawn_strelka()
    :param dict normal_bam: REFER ARGUMENTS of spawn_strelka()
    :param dict univ_options: REFER ARGUMENTS of spawn_strelka()
    :param dict strelka_options: REFER ARGUMENTS of spawn_strelka()

    RETURN VALUES
    :returns: dict of output vcfs for each chromosome
    :rtype: dict
    """
    job.fileStore.logToMaster('Running strelka on %s' %
                              univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': strelka_options['genome_fasta'],
        'genome.fa.fai.tar.gz': strelka_options['genome_fai'],
        'config.ini.tar.gz': strelka_options['strelka_config']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)

    for key in ('genome.fa', 'genome.fa.fai', 'config.ini'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = [
        input_files['config.ini'], input_files['tumor.bam'],
        input_files['normal.bam'], input_files['genome.fa'],
        str(job.cores)
    ]
    docker_call(tool='strelka',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_dict = {}
    for mutation_type in ['snvs', 'indels']:
        output_dict[mutation_type] = job.fileStore.writeGlobalFile(
            os.path.join(work_dir, 'strelka_out', 'results',
                         'passed.somatic.' + mutation_type + '.vcf'))
    return output_dict
Exemplo n.º 34
0
def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options,
                      chrom):
    """
    Run MuSE call on a single chromosome in the input bams.

    :param dict tumor_bam: Dict of bam and bai for tumor DNA-Seq
    :param dict normal_bam: Dict of bam and bai for normal DNA-Seq
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict muse_options: Options specific to MuSE
    :param str chrom: Chromosome to process
    :return: fsID for the chromsome vcf
    :rtype: toil.fileStore.FileID
    """
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': muse_options['genome_fasta'],
        'genome.fa.fai.tar.gz': muse_options['genome_fai']
    }
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    output_prefix = os.path.join(work_dir, chrom)

    parameters = [
        'call', '-f', input_files['genome.fa'], '-r', chrom, '-O',
        docker_path(output_prefix), input_files['tumor.bam'],
        input_files['normal.bam']
    ]
    docker_call(tool='muse',
                tool_parameters=parameters,
                work_dir=work_dir,
                dockerhub=univ_options['dockerhub'],
                tool_version=muse_options['version'])
    outfile = job.fileStore.writeGlobalFile(''.join(
        [output_prefix, '.MuSE.txt']))

    job.fileStore.logToMaster('Ran MuSE on %s:%s successfully' %
                              (univ_options['patient'], chrom))
    return outfile
Exemplo n.º 35
0
def fix_bam_header(job, bamfile, sample_type, univ_options, samtools_options, retained_chroms=None):
    """
    Fix the bam header to remove the command line call.  Failing to do this causes Picard to reject
    the bam.

    :param dict bamfile: The input bam file
    :param str sample_type: Description of the sample to inject into the filename
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict samtools_options: Options specific to samtools
    :param list retained_chroms: A list of chromosomes to retain
    :return: fsID for the output bam
    :rtype: toil.fileStore.FileID
    """
    if retained_chroms is None:
        retained_chroms = []

    work_dir = os.getcwd()
    input_files = {
        sample_type + '.bam': bamfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = ['view',
                  '-H',
                  input_files[sample_type + '.bam']]
    with open('/'.join([work_dir, sample_type + '_input_bam.header']), 'w') as headerfile:
        docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=headerfile,
                    tool_version=samtools_options['version'])
    with open(headerfile.name, 'r') as headerfile, \
            open('/'.join([work_dir, sample_type + '_output_bam.header']), 'w') as outheaderfile:
        for line in headerfile:
            if line.startswith('@PG'):
                line = '\t'.join([x for x in line.strip().split('\t') if not x.startswith('CL')])
            if retained_chroms and line.startswith('@SQ'):
                if line.strip().split()[1].lstrip('SN:') not in retained_chroms:
                    continue
            print(line.strip(), file=outheaderfile)
    parameters = ['reheader',
                  docker_path(outheaderfile.name),
                  input_files[sample_type + '.bam']]
    with open('/'.join([work_dir, sample_type + '_fixPG.bam']), 'w') as fixpg_bamfile:
        docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=fixpg_bamfile,
                    tool_version=samtools_options['version'])
    output_file = job.fileStore.writeGlobalFile(fixpg_bamfile.name)
    # The old bam file is now useless.
    job.fileStore.deleteGlobalFile(bamfile)
    job.fileStore.logToMaster('Ran reheader on %s:%s successfully'
                              % (univ_options['patient'], sample_type))
    return output_file
Exemplo n.º 36
0
def predict_netmhcii_binding(job, peptfile, allele, univ_options,
                             netmhciipan_options):
    """
    Predict binding for each peptide in `peptfile` to `allele` using netMHCIIpan.

    :param toil.fileStore.FileID peptfile: The input peptide fasta
    :param str allele: Allele to predict binding against
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict netmhciipan_options: Options specific to netmhciipan binding prediction
    :return: tuple of fsID for file containing the predictions and the predictor used (netMHCIIpan)
    :rtype: tuple(toil.fileStore.FileID, str)
    """
    work_dir = os.getcwd()
    input_files = {'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa'))
    if not peptides:
        return job.fileStore.writeGlobalFile(
            job.fileStore.getLocalTempFile()), None
    # netMHCIIpan accepts differently formatted alleles so we need to modify the input alleles
    if allele.startswith('HLA-DQA') or allele.startswith('HLA-DPA'):
        allele = re.sub(r'[*:]', '', allele)
        allele = re.sub(r'/', '-', allele)
    elif allele.startswith('HLA-DRB'):
        allele = re.sub(r':', '', allele)
        allele = re.sub(r'\*', '_', allele)
        allele = allele.lstrip('HLA-')
    else:
        raise RuntimeError('Unknown allele seen')
    parameters = [
        '-a', allele, '-xls', '1', '-xlsfile', 'predictions.tsv', '-f',
        input_files['peptfile.faa']
    ]
    # netMHC writes a lot of useless stuff to sys.stdout so we open /dev/null and dump output there.
    with open(os.devnull, 'w') as output_catcher:
        docker_call(tool='netmhciipan',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    outfile=output_catcher,
                    tool_version=netmhciipan_options['version'])
    output_file = job.fileStore.writeGlobalFile('/'.join(
        [work_dir, 'predictions.tsv']))

    job.fileStore.logToMaster('Ran netmhciipan on %s successfully' % allele)
    return output_file, 'netMHCIIpan'
Exemplo n.º 37
0
def run_cutadapt(job, fastqs, univ_options, cutadapt_options):
    """
    This module runs cutadapt on the input RNA fastq files and then calls the RNA aligners.

    ARGUMENTS
    1. fastqs: List of input RNA-Seq fastqs [<JSid for 1.fastq> , <JSid for 2.fastq>]
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
              +- 'dockerhub': <dockerhub to use>
    3. cutadapt_options: Dict of parameters specific to cutadapt
         cutadapt_options
              |- 'a': <sequence of 3' adapter to trim from fwd read>
              +- 'A': <sequence of 3' adapter to trim from rev read>
    RETURN VALUES
    1. output_files: Dict of cutadapted fastqs
         output_files
             |- 'rna_cutadapt_1.fastq': <JSid>
             +- 'rna_cutadapt_2.fastq': <JSid>

    This module corresponds to node 2 on the tree
    """
    job.fileStore.logToMaster('Running cutadapt on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'rna_1.fastq': fastqs[0],
        'rna_2.fastq': fastqs[1]}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # Handle gzipped file
    gz = '.gz' if is_gzipfile(input_files['rna_1.fastq']) else ''
    if gz:
        for read_file in 'rna_1.fastq', 'rna_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    input_files = {key: docker_path(path) for key, path in input_files.items()}
    parameters = ['-a', cutadapt_options['a'],  # Fwd read 3' adapter
                  '-A', cutadapt_options['A'],  # Rev read 3' adapter
                  '-m', '35',  # Minimum size of read
                  '-o', docker_path('rna_cutadapt_1.fastq.gz'),  # Output for R1
                  '-p', docker_path('rna_cutadapt_2.fastq.gz'),  # Output for R2
                  input_files['rna_1.fastq' + gz],
                  input_files['rna_2.fastq' + gz]]
    docker_call(tool='cutadapt', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_files = []
    for fastq_file in ['rna_cutadapt_1.fastq.gz', 'rna_cutadapt_2.fastq.gz']:
        output_files.append(job.fileStore.writeGlobalFile('/'.join([work_dir, fastq_file])))
    return output_files
Exemplo n.º 38
0
def run_cutadapt(job, fastqs, univ_options, cutadapt_options):
    """
    This module runs cutadapt on the input RNA fastq files and then calls the RNA aligners.

    ARGUMENTS
    1. fastqs: List of input RNA-Seq fastqs [<JSid for 1.fastq> , <JSid for 2.fastq>]
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
              +- 'dockerhub': <dockerhub to use>
    3. cutadapt_options: Dict of parameters specific to cutadapt
         cutadapt_options
              |- 'a': <sequence of 3' adapter to trim from fwd read>
              +- 'A': <sequence of 3' adapter to trim from rev read>
    RETURN VALUES
    1. output_files: Dict of cutadapted fastqs
         output_files
             |- 'rna_cutadapt_1.fastq': <JSid>
             +- 'rna_cutadapt_2.fastq': <JSid>

    This module corresponds to node 2 on the tree
    """
    job.fileStore.logToMaster('Running cutadapt on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'rna_1.fastq': fastqs[0],
        'rna_2.fastq': fastqs[1]}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # Handle gzipped file
    gz = '.gz' if is_gzipfile(input_files['rna_1.fastq']) else ''
    if gz:
        for read_file in 'rna_1.fastq', 'rna_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    input_files = {key: docker_path(path) for key, path in input_files.items()}
    parameters = ['-a', cutadapt_options['a'],  # Fwd read 3' adapter
                  '-A', cutadapt_options['A'],  # Rev read 3' adapter
                  '-m', '35',  # Minimum size of read
                  '-o', docker_path('rna_cutadapt_1.fastq.gz'),  # Output for R1
                  '-p', docker_path('rna_cutadapt_2.fastq.gz'),  # Output for R2
                  input_files['rna_1.fastq' + gz],
                  input_files['rna_2.fastq' + gz]]
    docker_call(tool='cutadapt', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_files = []
    for fastq_file in ['rna_cutadapt_1.fastq.gz', 'rna_cutadapt_2.fastq.gz']:
        output_files.append(job.fileStore.writeGlobalFile('/'.join([work_dir, fastq_file])))
    return output_files
Exemplo n.º 39
0
def run_snpeff(job, merged_mutation_file, univ_options, snpeff_options):
    """
    This module will run snpeff on the aggregated mutation calls.  Currently the only mutations
    called are SNPs hence SnpEff suffices. This node will be replaced in the future with another
    translator.

    ARGUMENTS
    1. merged_mutation_file: <JSid for merged vcf>
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    3. snpeff_options: Dict of parameters specific to snpeff
         snpeff_options
                +- 'tool_index': <JSid for the snpEff index tarball>

    RETURN VALUES
    1. output_file: <JSid for the snpeffed vcf>

    This node corresponds to node 16 on the tree
    """
    job.fileStore.logToMaster('Running snpeff on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'merged_mutations.vcf': merged_mutation_file,
        'snpeff_index.tar.gz': snpeff_options['tool_index']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    input_files['snpeff_index'] = untargz(input_files['snpeff_index.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['eff',
                  '-dataDir', input_files['snpeff_index'],
                  '-c', '/'.join([input_files['snpeff_index'], 'snpEff_hg19_gencode.config']),
                  '-no-intergenic',
                  '-no-downstream',
                  '-no-upstream',
                  # '-canon',
                  '-noStats',
                  'hg19_gencode',
                  input_files['merged_mutations.vcf']]
    xmx = snpeff_options['java_Xmx'] if snpeff_options['java_Xmx'] else univ_options['java_Xmx']
    with open('/'.join([work_dir, 'mutations.vcf']), 'w') as snpeff_file:
        docker_call(tool='snpeff', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], java_opts=xmx, outfile=snpeff_file)
    export_results(job, snpeff_file.name, univ_options, subfolder='mutations/snpeffed')
    output_file = job.fileStore.writeGlobalFile(snpeff_file.name)
    return output_file
Exemplo n.º 40
0
def run_rsem(job, rna_bam, univ_options, rsem_options):
    """
    This module will run rsem on the RNA Bam file.

    ARGUMENTS
    1. rna_bam: <JSid of rnaAligned.toTranscriptome.out.bam>
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    3. rsem_options: Dict of parameters specific to rsem
         rsem_options
              |- 'tool_index': <JSid for the rsem index tarball>
              +- 'n': <number of threads to allocate>

    RETURN VALUES
    1. output_file: <Jsid of rsem.isoforms.results>

    This module corresponds to node 9 on the tree
    """
    job.fileStore.logToMaster('Running rsem on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'star_transcriptome.bam': rna_bam,
        'rsem_index.tar.gz': rsem_options['tool_index']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    input_files['rsem_index'] = untargz(input_files['rsem_index.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    print(os.listdir('.'), file=sys.stderr)
    parameters = ['--paired-end',
                  '-p', str(rsem_options['n']),
                  '--bam',
                  input_files['star_transcriptome.bam'],
                  '--no-bam-output',
                  '/'.join([input_files['rsem_index'], 'hg19']),
                  'rsem']
    print(parameters, file=sys.stderr)
    docker_call(tool='rsem', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    print(os.listdir('.'), file=sys.stderr)
    output_files = {}
    for filename in ('rsem.genes.results', 'rsem.isoforms.results'):
        output_files[filename] = job.fileStore.writeGlobalFile('/'.join([work_dir, filename]))
        export_results(job, '/'.join([work_dir, filename]), univ_options, subfolder='expression')
    return output_files
Exemplo n.º 41
0
def run_strelka_full(job, tumor_bam, normal_bam, univ_options, strelka_options):
    """
    This module will run strelka on the DNA bams.

    ARGUMENTS
    :param dict tumor_bam: REFER ARGUMENTS of spawn_strelka()
    :param dict normal_bam: REFER ARGUMENTS of spawn_strelka()
    :param dict univ_options: REFER ARGUMENTS of spawn_strelka()
    :param dict strelka_options: REFER ARGUMENTS of spawn_strelka()

    RETURN VALUES
    :returns: dict of output vcfs for each chromosome
    :rtype: dict
    """
    job.fileStore.logToMaster('Running strelka on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': strelka_options['genome_fasta'],
        'genome.fa.fai.tar.gz': strelka_options['genome_fai'],
        'config.ini.tar.gz': strelka_options['strelka_config']
    }
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai', 'config.ini'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = [input_files['config.ini'],
                  input_files['tumor.bam'],
                  input_files['normal.bam'],
                  input_files['genome.fa'],
                  str(job.cores)
                  ]
    docker_call(tool='strelka', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_dict = {}
    for mutation_type in ['snvs', 'indels']:
        output_dict[mutation_type] = job.fileStore.writeGlobalFile(os.path.join(
            work_dir, 'strelka_out', 'results', 'passed.somatic.' + mutation_type + '.vcf'))
    return output_dict
Exemplo n.º 42
0
def boost_ranks(job, isoform_expression, merged_mhc_calls, transgene_out, univ_options,
                rank_boost_options):
    """
    This is the final module in the pipeline.  It will call the rank boosting R
    script.

    This module corresponds to node 21 in the tree
    """
    job.fileStore.logToMaster('Running boost_ranks on %s' % univ_options['patient'])
    work_dir = os.path.abspath(univ_options['patient'])
    os.mkdir(work_dir)
    input_files = {
        'rsem_quant.tsv': isoform_expression,
        'mhci_merged_files.tsv': merged_mhc_calls['mhci_merged_files.list'],
        'mhcii_merged_files.tsv': merged_mhc_calls['mhcii_merged_files.list'],
        'mhci_peptides.faa': transgene_out['transgened_tumor_10_mer_snpeffed.faa'],
        'mhcii_peptides.faa': transgene_out['transgened_tumor_15_mer_snpeffed.faa']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    output_files = {}
    for mhc in ('mhci', 'mhcii'):
        parameters = [mhc,
                      input_files[''.join([mhc, '_merged_files.tsv'])],
                      input_files['rsem_quant.tsv'],
                      input_files[''.join([mhc, '_peptides.faa'])],
                      rank_boost_options[''.join([mhc, '_combo'])]
                      ]
        docker_call(tool='rankboost', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'])
        mhc_concise = ''.join([work_dir, '/', mhc, '_merged_files_concise_results.tsv'])
        mhc_detailed = ''.join([work_dir, '/', mhc, '_merged_files_detailed_results.tsv'])
        output_files[mhc] = {}
        if os.path.exists(mhc_concise):
            output_files[os.path.basename(mhc_concise)] = job.fileStore.writeGlobalFile(mhc_concise)
            export_results(job, mhc_concise, univ_options, subfolder='rankboost')
        else:
            output_files[os.path.basename(mhc_concise)] = None
        if os.path.exists(mhc_detailed):
            output_files[os.path.basename(mhc_detailed)] = \
                job.fileStore.writeGlobalFile(mhc_detailed)
            export_results(job, mhc_detailed, univ_options, subfolder='rankboost')
        else:
            output_files[os.path.basename(mhc_detailed)] = None
    return output_files
Exemplo n.º 43
0
def run_muse_perchrom(job, tumor_bam, normal_bam, univ_options, muse_options, chrom):
    """
    This module will run muse on the DNA bams

    ARGUMENTS
    1. tumor_bam: REFER ARGUMENTS of spawn_muse()
    2. normal_bam: REFER ARGUMENTS of spawn_muse()
    3. univ_options: REFER ARGUMENTS of spawn_muse()
    4. muse_options: REFER ARGUMENTS of spawn_muse()
    5. chrom: String containing chromosome name with chr appended

    RETURN VALUES
    1. output_files: <JSid for CHROM.MuSe.txt>

    This module corresponds to node 12 on the tree
    """
    job.fileStore.logToMaster('Running muse on %s:%s' % (univ_options['patient'], chrom))
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': muse_options['genome_fasta'],
        'genome.fa.fai.tar.gz': muse_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    output_prefix = os.path.join(work_dir, chrom)

    parameters = ['call',
                  '-f', input_files['genome.fa'],
                  '-r', chrom,
                  '-O', docker_path(output_prefix),
                  input_files['tumor.bam'],
                  input_files['normal.bam']]
    docker_call(tool='muse', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    outfile = job.fileStore.writeGlobalFile(''.join([output_prefix, '.MuSE.txt']))
    return outfile
Exemplo n.º 44
0
def predict_mhcii_binding(job, peptfile, allele, univ_options, mhcii_options):
    """
    This module will predict MHC:peptide binding for peptides in the files created in node YY to
    ALLELE.  ALLELE represents an MHCII allele.

    The module returns (PREDFILE, PREDICTOR) where PREDFILE contains the predictions and PREDICTOR
    is the predictor used (Consensus, NetMHCIIpan, or Sturniolo).

    This module corresponds to node 19 on the tree
    """
    job.fileStore.logToMaster('Running mhcii on %s:%s' % (univ_options['patient'], allele))
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = [mhcii_options['pred'],
                  allele,
                  input_files['peptfile.faa']]
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhcii', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True)
    run_netmhciipan = True
    predictor = None
    with open(predfile.name, 'r') as predfile:
        for line in predfile:
            if not line.startswith('HLA'):
                continue
            if line.strip().split('\t')[5] == 'NetMHCIIpan':
                break
            # If the predictor type is sturniolo then it needs to be processed differently
            elif line.strip().split('\t')[5] == 'Sturniolo':
                predictor = 'Sturniolo'
            else:
                predictor = 'Consensus'
            run_netmhciipan = False
            break
    if run_netmhciipan:
        netmhciipan = job.addChildJobFn(predict_netmhcii_binding, peptfile, allele, univ_options,
                                        disk='100M', memory='100M', cores=1)
        return netmhciipan.rv()
    else:
        output_file = job.fileStore.writeGlobalFile(predfile.name)
        return output_file, predictor
Exemplo n.º 45
0
def run_somaticsniper_full(job, tumor_bam, normal_bam, univ_options, somaticsniper_options):
    """
    This module will run somaticsniper on the DNA bams.

    ARGUMENTS
    :param dict tumor_bam: REFER ARGUMENTS of spawn_somaticsniper()
    :param dict normal_bam: REFER ARGUMENTS of spawn_somaticsniper()
    :param dict univ_options: REFER ARGUMENTS of spawn_somaticsniper()
    :param dict somaticsniper_options: REFER ARGUMENTS of spawn_somaticsniper()

    RETURN VALUES
    :returns: dict of output vcfs for each chromosome
    :rtype: dict
    """
    job.fileStore.logToMaster('Running somaticsniper on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': somaticsniper_options['genome_fasta'],
        'genome.fa.fai.tar.gz': somaticsniper_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    output_file = os.path.join(work_dir, 'somatic-sniper_full.vcf')
    parameters = ['-f', input_files['genome.fa'],
                  '-F', 'vcf',
                  '-G',
                  '-L',
                  '-q', '1',
                  '-Q', '15',
                  input_files['tumor.bam'],
                  input_files['normal.bam'],
                  docker_path(output_file)]
    docker_call(tool='somaticsniper', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    outfile = job.fileStore.writeGlobalFile(output_file)
    return outfile
Exemplo n.º 46
0
def fix_bam_header(job, bamfile, sample_type, univ_options):
    """
    This module modified the header in BAMFILE

    ARGUMENTS
    1. bamfile: <JSid for a bam file>
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    RETURN VALUES
    1. output_files: REFER output_files in run_bwa()
    """
    job.fileStore.logToMaster('Running reheader on %s:%s' % (univ_options['patient'], sample_type))
    work_dir = os.getcwd()
    input_files = {
        sample_type + '_aligned.bam': bamfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = ['view',
                  '-H',
                  input_files[sample_type + '_aligned.bam']]
    with open('/'.join([work_dir, sample_type + '_aligned_bam.header']), 'w') as headerfile:
        docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=headerfile)
    with open(headerfile.name, 'r') as headerfile, \
            open('/'.join([work_dir, sample_type + '_output_bam.header']), 'w') as outheaderfile:
        for line in headerfile:
            if line.startswith('@PG'):
                line = '\t'.join([x for x in line.strip().split('\t') if not x.startswith('CL')])
            print(line.strip(), file=outheaderfile)
    parameters = ['reheader',
                  docker_path(outheaderfile.name),
                  input_files[sample_type + '_aligned.bam']]
    with open('/'.join([work_dir, sample_type + '_aligned_fixPG.bam']), 'w') as fixpg_bamfile:
        docker_call(tool='samtools', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=fixpg_bamfile)
    output_file = job.fileStore.writeGlobalFile(fixpg_bamfile.name)
    # The old bam file is now useless.
    job.fileStore.deleteGlobalFile(bamfile)
    return output_file
Exemplo n.º 47
0
def predict_mhci_binding(job, peptfile, allele, peplen, univ_options,
                         mhci_options):
    """
    This module will predict MHC:peptide binding for peptides in the files created in node XX to
    ALLELE.  ALLELE represents an MHCI allele.

    This module corresponds to node 18 on the tree
    """
    job.fileStore.logToMaster('Running mhci on %s:%s:%s' % (univ_options['patient'], allele,
                                                            peplen))
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = [mhci_options['pred'],
                  allele,
                  peplen,
                  input_files['peptfile.faa']]
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhci', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True)
    output_file = job.fileStore.writeGlobalFile(predfile.name)
    return output_file
Exemplo n.º 48
0
def add_readgroups(job, bamfile, sample_type, univ_options):
    """
    This module adds the appropriate read groups to the bam file
    ARGUMENTS
    1. bamfile: <JSid for a bam file>
    2. sample_type: string of 'tumor_dna' or 'normal_dna'
    3. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                |- 'dockerhub': <dockerhub to use>
                +- 'java_Xmx': value for max heap passed to java
    RETURN VALUES
    1. output_files: REFER output_files in run_bwa()
    """
    job.fileStore.logToMaster('Running add_read_groups on %s:%s' % (univ_options['patient'],
                                                                    sample_type))
    work_dir = os.getcwd()
    input_files = {
        sample_type + '_aligned_fixpg.bam': bamfile}
    get_files_from_filestore(job, input_files, work_dir, docker=True)
    parameters = ['AddOrReplaceReadGroups',
                  'CREATE_INDEX=false',
                  'I=/data/' + sample_type + '_aligned_fixpg.bam',
                  'O=/data/' + sample_type + '_aligned_fixpg_sorted_reheader.bam',
                  'SO=coordinate',
                  'ID=1',
                  ''.join(['LB=', univ_options['patient']]),
                  'PL=ILLUMINA',
                  'PU=12345',
                  ''.join(['SM=', sample_type.rstrip('_dna')])]
    docker_call(tool='picard', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'], java_opts=univ_options['java_Xmx'])
    output_file = job.fileStore.writeGlobalFile(
        '/'.join([work_dir, sample_type + '_aligned_fixpg_sorted_reheader.bam']))
    # Delete the old bam file
    job.fileStore.deleteGlobalFile(bamfile)
    return output_file
Exemplo n.º 49
0
def filter_somaticsniper(job, tumor_bam, somaticsniper_output, tumor_pileup, univ_options,
                         somaticsniper_options):
    """
    This module will filter the somaticsniper output for a single chromosome

    :param toil.Job job: Job
    :param dict tumor_bam: Tumor bam file and it's bai
    :param str somaticsniper_output: jsID from somatic sniper
    :param str tumor_pileup: jsID for pileup file for this chromsome
    :param dict univ_options: Universal options
    :param dict somaticsniper_options: Options specific to Somatic Sniper
    :returns: filtered chromsome vcf
    :rtype: str
    """
    job.fileStore.logToMaster('Filtering somaticsniper for %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'input.vcf': somaticsniper_output,
        'pileup.txt': tumor_pileup,
        'genome.fa.tar.gz': somaticsniper_options['genome_fasta'],
        'genome.fa.fai.tar.gz': somaticsniper_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    # Run snpfilter.pl
    parameters = ['snpfilter.pl',
                  '--snp-file', input_files['input.vcf'],
                  '--indel-file', input_files['pileup.txt']]
    # Creates /data/input.vcf.SNPfilter
    docker_call(tool='somaticsniper-addons', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])

    # Run prepare_for_readcount.pl
    parameters = ['prepare_for_readcount.pl',
                  '--snp-file', input_files['input.vcf'] + '.SNPfilter']
    # Creates /data/input.vcf.SNPfilter.pos
    docker_call(tool='somaticsniper-addons', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])

    # Run  bam-readcount
    parameters = ['-b', '15',
                  '-f', input_files['genome.fa'],
                  '-l', input_files['input.vcf'] + '.SNPfilter.pos',
                  '-w', '1',
                  input_files['tumor.bam']]
    # Creates the read counts file
    with open(os.path.join(work_dir, 'readcounts.txt'), 'w') as readcounts_file:
        docker_call(tool='bam-readcount', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=readcounts_file)

    # Run fpfilter.pl
    parameters = ['fpfilter.pl',
                  '--snp-file', input_files['input.vcf'] + '.SNPfilter',
                  '--readcount-file', docker_path(readcounts_file.name)]

    # Creates input.vcf.SNPfilter.fp_pass and input.vcf.SNPfilter.fp_fail
    docker_call(tool='somaticsniper-addons', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])

    # Run highconfidence.pl
    parameters = ['highconfidence.pl',
                  '--snp-file', input_files['input.vcf'] + '.SNPfilter.fp_pass']

    # Creates input.vcf.SNPfilter.fp_pass.hc
    docker_call(tool='somaticsniper-addons', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])

    outfile = job.fileStore.writeGlobalFile(os.path.join(os.getcwd(),
                                                         'input.vcf.SNPfilter.fp_pass.hc'))
    return outfile
Exemplo n.º 50
0
def run_mutect_perchrom(job, tumor_bam, normal_bam, univ_options, mutect_options, chrom):
    """
    This module will run mutect on the DNA bams

    ARGUMENTS
    1. tumor_bam: REFER ARGUMENTS of spawn_mutect()
    2. normal_bam: REFER ARGUMENTS of spawn_mutect()
    3. univ_options: REFER ARGUMENTS of spawn_mutect()
    4. mutect_options: REFER ARGUMENTS of spawn_mutect()
    5. chrom: String containing chromosome name with chr appended

    RETURN VALUES
    1. output_files: Dict of results of mutect for chromosome
            output_files
              |- 'mutect_CHROM.vcf': <JSid>
              +- 'mutect_CHROM.out': <JSid>

    This module corresponds to node 12 on the tree
    """
    job.fileStore.logToMaster('Running mutect on %s:%s' % (univ_options['patient'], chrom))
    work_dir = os.getcwd()
    input_files = {
        'tumor.bam': tumor_bam['tumor_dna_fix_pg_sorted.bam'],
        'tumor.bam.bai': tumor_bam['tumor_dna_fix_pg_sorted.bam.bai'],
        'normal.bam': normal_bam['normal_dna_fix_pg_sorted.bam'],
        'normal.bam.bai': normal_bam['normal_dna_fix_pg_sorted.bam.bai'],
        'genome.fa.tar.gz': mutect_options['genome_fasta'],
        'genome.fa.fai.tar.gz': mutect_options['genome_fai'],
        'genome.dict.tar.gz': mutect_options['genome_dict'],
        'cosmic.vcf.tar.gz': mutect_options['cosmic_vcf'],
        'cosmic.vcf.idx.tar.gz': mutect_options['cosmic_idx'],
        'dbsnp.vcf.gz': mutect_options['dbsnp_vcf'],
        'dbsnp.vcf.idx.tar.gz': mutect_options['dbsnp_idx']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)
    # dbsnp.vcf should be bgzipped, but all others should be tar.gz'd
    input_files['dbsnp.vcf'] = gunzip(input_files['dbsnp.vcf.gz'])
    for key in ('genome.fa', 'genome.fa.fai', 'genome.dict', 'cosmic.vcf', 'cosmic.vcf.idx',
                'dbsnp.vcf.idx'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    mutout = ''.join([work_dir, '/', chrom, '.out'])
    mutvcf = ''.join([work_dir, '/', chrom, '.vcf'])
    parameters = ['-R', input_files['genome.fa'],
                  '--cosmic', input_files['cosmic.vcf'],
                  '--dbsnp', input_files['dbsnp.vcf'],
                  '--input_file:normal', input_files['normal.bam'],
                  '--input_file:tumor', input_files['tumor.bam'],
                  # '--tumor_lod', str(10),
                  # '--initial_tumor_lod', str(4.0),
                  '-L', chrom,
                  '--out', docker_path(mutout),
                  '--vcf', docker_path(mutvcf)
                  ]
    print(parameters, file=sys.stderr)
    java_xmx = mutect_options['java_Xmx'] if mutect_options['java_Xmx'] \
        else univ_options['java_Xmx']
    docker_call(tool='mutect:1.1.7', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'], java_opts=java_xmx)
    export_results(job, mutvcf, univ_options, subfolder='mutations/mutect')
    output_file = job.fileStore.writeGlobalFile(mutvcf)
    return output_file
Exemplo n.º 51
0
def run_star(job, fastqs, univ_options, star_options):
    """
    This module uses STAR to align the RNA fastqs to the reference

    ARGUMENTS
    1. fastqs: REFER RETURN VALUE of run_cutadapt()
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
              +- 'dockerhub': <dockerhub to use>
    3. star_options: Dict of parameters specific to STAR
         star_options
             |- 'tool_index': <JSid for the STAR index tarball>
             +- 'n': <number of threads to allocate>
    RETURN VALUES
    1. output_files: Dict of aligned bams
         output_files
             |- 'rnaAligned.toTranscriptome.out.bam': <JSid>
             +- 'rnaAligned.sortedByCoord.out.bam': Dict of genome bam + bai
                                |- 'rna_fix_pg_sorted.bam': <JSid>
                                +- 'rna_fix_pg_sorted.bam.bai': <JSid>

    This module corresponds to node 9 on the tree
    """
    assert star_options['type'] in ('star', 'starlong')
    job.fileStore.logToMaster('Running STAR on %s' % univ_options['patient'])
    work_dir = os.getcwd()
    input_files = {
        'rna_cutadapt_1.fastq': fastqs[0],
        'rna_cutadapt_2.fastq': fastqs[1],
        'star_index.tar.gz': star_options['tool_index']}
    input_files = get_files_from_filestore(job, input_files, work_dir,
                                           docker=False)
    # Handle gzipped file
    gz = '.gz' if is_gzipfile(input_files['rna_cutadapt_1.fastq']) else ''
    if gz:
        for read_file in 'rna_cutadapt_1.fastq', 'rna_cutadapt_2.fastq':
            os.symlink(read_file, read_file + gz)
            input_files[read_file + gz] = input_files[read_file] + gz
    # Untar the index
    input_files['star_index'] = untargz(input_files['star_index.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    parameters = ['--runThreadN', str(star_options['n']),
                  '--genomeDir', input_files['star_index'],
                  '--outFileNamePrefix', 'rna',
                  '--readFilesIn',
                  input_files['rna_cutadapt_1.fastq' + gz],
                  input_files['rna_cutadapt_2.fastq' + gz],
                  '--outSAMattributes', 'NH', 'HI', 'AS', 'NM', 'MD',
                  '--outSAMtype', 'BAM', 'SortedByCoordinate',
                  '--quantMode', 'TranscriptomeSAM']
    if gz:
        parameters.extend(['--readFilesCommand', 'zcat'])
    if star_options['type'] == 'star':
        docker_call(tool='star', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'])
    else:
        docker_call(tool='starlong', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'])
    output_files = defaultdict()
    for bam_file in ['rnaAligned.toTranscriptome.out.bam',
                     'rnaAligned.sortedByCoord.out.bam']:
        output_files[bam_file] = job.fileStore.writeGlobalFile('/'.join([
            work_dir, bam_file]))
    return output_files
Exemplo n.º 52
0
def run_radia_perchrom(job, bams, univ_options, radia_options, chrom):
    """
    This module will run radia on the RNA and DNA bams

    ARGUMENTS
    1. bams: Dict of bams and their indexes
        bams
         |- 'tumor_rna': <JSid>
         |- 'tumor_rnai': <JSid>
         |- 'tumor_dna': <JSid>
         |- 'tumor_dnai': <JSid>
         |- 'normal_dna': <JSid>
         +- 'normal_dnai': <JSid>
    2. univ_options: Dict of universal arguments used by almost all tools
         univ_options
                +- 'dockerhub': <dockerhub to use>
    3. radia_options: Dict of parameters specific to radia
         radia_options
              |- 'dbsnp_vcf': <JSid for dnsnp vcf file>
              +- 'genome': <JSid for genome fasta file>
    4. chrom: String containing chromosome name with chr appended

    RETURN VALUES
    1. Dict of filtered radia output vcf and logfile (Nested return)
        |- 'radia_filtered_CHROM.vcf': <JSid>
        +- 'radia_filtered_CHROM_radia.log': <JSid>
    """
    job.fileStore.logToMaster('Running radia on %s:%s' % (univ_options['patient'], chrom))
    work_dir = os.getcwd()
    input_files = {
        'rna.bam': bams['tumor_rna'],
        'rna.bam.bai': bams['tumor_rnai'],
        'tumor.bam': bams['tumor_dna'],
        'tumor.bam.bai': bams['tumor_dnai'],
        'normal.bam': bams['normal_dna'],
        'normal.bam.bai': bams['normal_dnai'],
        'genome.fa.tar.gz': radia_options['genome_fasta'],
        'genome.fa.fai.tar.gz': radia_options['genome_fai']}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=False)

    for key in ('genome.fa', 'genome.fa.fai'):
        input_files[key] = untargz(input_files[key + '.tar.gz'], work_dir)
    input_files = {key: docker_path(path) for key, path in input_files.items()}

    radia_output = ''.join([work_dir, '/radia_', chrom, '.vcf'])
    radia_log = ''.join([work_dir, '/radia_', chrom, '_radia.log'])
    parameters = [univ_options['patient'],  # shortID
                  chrom,
                  '-n', input_files['normal.bam'],
                  '-t', input_files['tumor.bam'],
                  '-r', input_files['rna.bam'],
                  ''.join(['--rnaTumorFasta=', input_files['genome.fa']]),
                  '-f', input_files['genome.fa'],
                  '-o', docker_path(radia_output),
                  '-i', 'hg19_M_rCRS',
                  '-m', input_files['genome.fa'],
                  '-d', '*****@*****.**',
                  '-q', 'Illumina',
                  '--disease', 'CANCER',
                  '-l', 'INFO',
                  '-g', docker_path(radia_log)]
    docker_call(tool='radia', tool_parameters=parameters, work_dir=work_dir,
                dockerhub=univ_options['dockerhub'])
    output_file = job.fileStore.writeGlobalFile(radia_output)
    return output_file