Exemplo n.º 1
0
def predict_mhci_binding(job, peptfile, allele, peplen, univ_options, mhci_options):
    """
    Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhci binding
    prediction tool.

    :param toil.fileStore.FileID peptfile: The input peptide fasta
    :param str allele: Allele to predict binding against
    :param str peplen: Length of peptides to process
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict mhci_options: Options specific to mhci binding prediction
    :return: fsID for file containing the predictions
    :rtype: toil.fileStore.FileID
    """
    job.fileStore.logToMaster('Running mhci on %s:%s:%s' % (univ_options['patient'], allele,
                                                            peplen))
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa'))
    if not peptides:
        return job.fileStore.writeGlobalFile(job.fileStore.getLocalTempFile())
    parameters = [mhci_options['pred'],
                  allele,
                  peplen,
                  input_files['peptfile.faa']]
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhci', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True,
                    tool_version=mhci_options['version'])
    output_file = job.fileStore.writeGlobalFile(predfile.name)
    return output_file
Exemplo n.º 2
0
def predict_mhcii_binding(job, peptfile, allele, univ_options, mhcii_options):
    """
    Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhcii binding
    prediction tool.

    :param toil.fileStore.FileID peptfile: The input peptide fasta
    :param str allele: Allele to predict binding against
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict mhcii_options: Options specific to mhcii binding prediction
    :return: tuple of fsID for file containing the predictions and the predictor used
    :rtype: tuple(toil.fileStore.FileID, str|None)
    """
    work_dir = os.getcwd()
    input_files = {
        'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job, input_files, work_dir, docker=True)
    peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa'))
    parameters = [mhcii_options['pred'],
                  allele,
                  input_files['peptfile.faa']]
    if not peptides:
        return job.fileStore.writeGlobalFile(job.fileStore.getLocalTempFile()), None
    with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile:
        docker_call(tool='mhcii', tool_parameters=parameters, work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True,
                    tool_version=mhcii_options['version'])
    run_netmhciipan = True
    predictor = None
    with open(predfile.name, 'r') as predfile:
        for line in predfile:
            if not line.startswith('HLA'):
                continue
            if line.strip().split('\t')[5] == 'NetMHCIIpan':
                break
            # If the predictor type is sturniolo then it needs to be processed differently
            elif line.strip().split('\t')[5] == 'Sturniolo':
                predictor = 'Sturniolo'
            else:
                predictor = 'Consensus'
            run_netmhciipan = False
            break
    if run_netmhciipan:
        netmhciipan = job.addChildJobFn(predict_netmhcii_binding, peptfile, allele, univ_options,
                                        mhcii_options['netmhciipan'], disk='100M', memory='100M',
                                        cores=1)
        job.fileStore.logToMaster('Ran mhcii on %s:%s successfully'
                                  % (univ_options['patient'], allele))
        return netmhciipan.rv()
    else:
        output_file = job.fileStore.writeGlobalFile(predfile.name)
        job.fileStore.logToMaster('Ran mhcii on %s:%s successfully'
                                  % (univ_options['patient'], allele))
        return output_file, predictor
Exemplo n.º 3
0
def predict_netmhcii_binding(job, peptfile, allele, univ_options,
                             netmhciipan_options):
    """
    Predict binding for each peptide in `peptfile` to `allele` using netMHCIIpan.

    :param toil.fileStore.FileID peptfile: The input peptide fasta
    :param str allele: Allele to predict binding against
    :param dict univ_options: Dict of universal options used by almost all tools
    :param dict netmhciipan_options: Options specific to netmhciipan binding prediction
    :return: tuple of fsID for file containing the predictions and the predictor used (netMHCIIpan)
    :rtype: tuple(toil.fileStore.FileID, str)
    """
    work_dir = os.getcwd()
    input_files = {'peptfile.faa': peptfile}
    input_files = get_files_from_filestore(job,
                                           input_files,
                                           work_dir,
                                           docker=True)
    peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa'))
    if not peptides:
        return job.fileStore.writeGlobalFile(
            job.fileStore.getLocalTempFile()), None
    # netMHCIIpan accepts differently formatted alleles so we need to modify the input alleles
    if allele.startswith('HLA-DQA') or allele.startswith('HLA-DPA'):
        allele = re.sub(r'[*:]', '', allele)
        allele = re.sub(r'/', '-', allele)
    elif allele.startswith('HLA-DRB'):
        allele = re.sub(r':', '', allele)
        allele = re.sub(r'\*', '_', allele)
        allele = allele.lstrip('HLA-')
    else:
        raise RuntimeError('Unknown allele seen')
    parameters = [
        '-a', allele, '-xls', '1', '-xlsfile', 'predictions.tsv', '-f',
        input_files['peptfile.faa']
    ]
    # netMHC writes a lot of useless stuff to sys.stdout so we open /dev/null and dump output there.
    with open(os.devnull, 'w') as output_catcher:
        docker_call(tool='netmhciipan',
                    tool_parameters=parameters,
                    work_dir=work_dir,
                    dockerhub=univ_options['dockerhub'],
                    outfile=output_catcher,
                    tool_version=netmhciipan_options['version'])
    output_file = job.fileStore.writeGlobalFile('/'.join(
        [work_dir, 'predictions.tsv']))

    job.fileStore.logToMaster('Ran netmhciipan on %s successfully' % allele)
    return output_file, 'netMHCIIpan'
Exemplo n.º 4
0
def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files,
                            univ_options):
    """
    Merge all the calls generated by spawn_antigen_predictors.

    :param dict antigen_predictions: The return value from running :meth:`spawn_antigen_predictors`
    :param dict transgened_files: The transgened peptide files
    :param dict univ_options: Universal options for ProTECT
    :return: merged binding predictions
             output_files:
                 |- 'mhcii_merged_files.list': fsID
                 +- 'mhci_merged_files.list': fsID
    :rtype: dict
    """
    job.fileStore.logToMaster('Merging MHC calls')
    work_dir = os.getcwd()
    pept_files = {
        '10_mer.faa':
        transgened_files['transgened_tumor_10_mer_snpeffed.faa'],
        '10_mer.faa.map':
        transgened_files['transgened_tumor_10_mer_snpeffed.faa.map'],
        '15_mer.faa':
        transgened_files['transgened_tumor_15_mer_snpeffed.faa'],
        '15_mer.faa.map':
        transgened_files['transgened_tumor_15_mer_snpeffed.faa.map']
    }
    pept_files = get_files_from_filestore(job, pept_files, work_dir)
    mhci_preds, mhcii_preds = antigen_predictions

    mhci_called = mhcii_called = False
    # Merge MHCI calls
    # Read 10-mer pepts into memory
    peptides = read_peptide_file(pept_files['10_mer.faa'])
    with open(pept_files['10_mer.faa.map'], 'r') as mapfile:
        pepmap = json.load(mapfile)
    with open('/'.join([work_dir, 'mhci_merged_files.list']),
              'w') as mhci_resfile:
        for key in mhci_preds:
            tumor_file = job.fileStore.readGlobalFile(mhci_preds[key]['tumor'])
            with open(tumor_file) as t_f:
                tumor_df = pandas.read_json(eval(t_f.read()))
            if tumor_df.empty:
                continue
            mhci_called = True
            # TODO: There must be a better way of doing this
            normal_df = _process_mhci(job.fileStore.readGlobalFile(
                mhci_preds[key]['normal']),
                                      normal=True)
            normal_dict = normal_df.set_index('pept')['tumor_pred']
            normal_preds = [
                normal_dict[x] for x in list(tumor_df['normal_pept'])
            ]
            tumor_df['normal_pred'] = normal_preds
            for pred in tumor_df.itertuples():
                print_mhc_peptide(pred, peptides, pepmap, mhci_resfile)
    # Merge MHCII calls
    # read 15-mer pepts into memory
    peptides = read_peptide_file(pept_files['15_mer.faa'])
    with open(pept_files['15_mer.faa.map'], 'r') as mapfile:
        pepmap = json.load(mapfile)
    # Incorporate peptide names into the merged calls
    with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \
            mhcii_resfile:
        for key in mhcii_preds:
            if mhcii_preds[key]['predictor'] is None:
                continue
            mhcii_called = True
            tumor_file = job.fileStore.readGlobalFile(
                mhcii_preds[key]['tumor'])
            with open(tumor_file) as t_f:
                tumor_df = pandas.read_json(eval(t_f.read()))
            if tumor_df.empty:
                continue
            # TODO: There must be a better way of doing this
            if mhcii_preds[key]['predictor'] == 'Consensus':
                normal_df = _process_consensus_mhcii(
                    job.fileStore.readGlobalFile(
                        mhcii_preds[key]['normal'][0]),
                    normal=True)
            elif mhcii_preds[key]['predictor'] == 'Sturniolo':
                normal_df = _process_sturniolo_mhcii(
                    job.fileStore.readGlobalFile(
                        mhcii_preds[key]['normal'][0]),
                    normal=True)
            elif mhcii_preds[key]['predictor'] == 'netMHCIIpan':
                normal_df = _process_net_mhcii(job.fileStore.readGlobalFile(
                    mhcii_preds[key]['normal'][0]),
                                               normal=True)
            else:
                assert False
            normal_dict = normal_df.set_index('pept')['tumor_pred']
            normal_preds = [
                normal_dict[x] for x in list(tumor_df['normal_pept'])
            ]
            tumor_df['normal_pred'] = normal_preds
            for pred in tumor_df.itertuples():
                print_mhc_peptide(
                    pred,
                    peptides,
                    pepmap,
                    mhcii_resfile,
                    netmhc=mhcii_preds[key]['predictor'] == 'netMHCIIpan')
    if not (mhci_called or mhcii_called):
        raise RuntimeError('No peptides available for ranking')
    output_files = defaultdict()
    for mhc_file in [mhci_resfile.name, mhcii_resfile.name]:
        output_files[os.path.split(mhc_file)
                     [1]] = job.fileStore.writeGlobalFile(mhc_file)
        export_results(job,
                       output_files[os.path.split(mhc_file)[1]],
                       mhc_file,
                       univ_options,
                       subfolder='binding_predictions')

    return output_files