def predict_mhci_binding(job, peptfile, allele, peplen, univ_options, mhci_options): """ Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhci binding prediction tool. :param toil.fileStore.FileID peptfile: The input peptide fasta :param str allele: Allele to predict binding against :param str peplen: Length of peptides to process :param dict univ_options: Dict of universal options used by almost all tools :param dict mhci_options: Options specific to mhci binding prediction :return: fsID for file containing the predictions :rtype: toil.fileStore.FileID """ job.fileStore.logToMaster('Running mhci on %s:%s:%s' % (univ_options['patient'], allele, peplen)) work_dir = os.getcwd() input_files = { 'peptfile.faa': peptfile} input_files = get_files_from_filestore(job, input_files, work_dir, docker=True) peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa')) if not peptides: return job.fileStore.writeGlobalFile(job.fileStore.getLocalTempFile()) parameters = [mhci_options['pred'], allele, peplen, input_files['peptfile.faa']] with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile: docker_call(tool='mhci', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True, tool_version=mhci_options['version']) output_file = job.fileStore.writeGlobalFile(predfile.name) return output_file
def predict_mhcii_binding(job, peptfile, allele, univ_options, mhcii_options): """ Predict binding for each peptide in `peptfile` to `allele` using the IEDB mhcii binding prediction tool. :param toil.fileStore.FileID peptfile: The input peptide fasta :param str allele: Allele to predict binding against :param dict univ_options: Dict of universal options used by almost all tools :param dict mhcii_options: Options specific to mhcii binding prediction :return: tuple of fsID for file containing the predictions and the predictor used :rtype: tuple(toil.fileStore.FileID, str|None) """ work_dir = os.getcwd() input_files = { 'peptfile.faa': peptfile} input_files = get_files_from_filestore(job, input_files, work_dir, docker=True) peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa')) parameters = [mhcii_options['pred'], allele, input_files['peptfile.faa']] if not peptides: return job.fileStore.writeGlobalFile(job.fileStore.getLocalTempFile()), None with open('/'.join([work_dir, 'predictions.tsv']), 'w') as predfile: docker_call(tool='mhcii', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], outfile=predfile, interactive=True, tool_version=mhcii_options['version']) run_netmhciipan = True predictor = None with open(predfile.name, 'r') as predfile: for line in predfile: if not line.startswith('HLA'): continue if line.strip().split('\t')[5] == 'NetMHCIIpan': break # If the predictor type is sturniolo then it needs to be processed differently elif line.strip().split('\t')[5] == 'Sturniolo': predictor = 'Sturniolo' else: predictor = 'Consensus' run_netmhciipan = False break if run_netmhciipan: netmhciipan = job.addChildJobFn(predict_netmhcii_binding, peptfile, allele, univ_options, mhcii_options['netmhciipan'], disk='100M', memory='100M', cores=1) job.fileStore.logToMaster('Ran mhcii on %s:%s successfully' % (univ_options['patient'], allele)) return netmhciipan.rv() else: output_file = job.fileStore.writeGlobalFile(predfile.name) job.fileStore.logToMaster('Ran mhcii on %s:%s successfully' % (univ_options['patient'], allele)) return output_file, predictor
def predict_netmhcii_binding(job, peptfile, allele, univ_options, netmhciipan_options): """ Predict binding for each peptide in `peptfile` to `allele` using netMHCIIpan. :param toil.fileStore.FileID peptfile: The input peptide fasta :param str allele: Allele to predict binding against :param dict univ_options: Dict of universal options used by almost all tools :param dict netmhciipan_options: Options specific to netmhciipan binding prediction :return: tuple of fsID for file containing the predictions and the predictor used (netMHCIIpan) :rtype: tuple(toil.fileStore.FileID, str) """ work_dir = os.getcwd() input_files = {'peptfile.faa': peptfile} input_files = get_files_from_filestore(job, input_files, work_dir, docker=True) peptides = read_peptide_file(os.path.join(os.getcwd(), 'peptfile.faa')) if not peptides: return job.fileStore.writeGlobalFile( job.fileStore.getLocalTempFile()), None # netMHCIIpan accepts differently formatted alleles so we need to modify the input alleles if allele.startswith('HLA-DQA') or allele.startswith('HLA-DPA'): allele = re.sub(r'[*:]', '', allele) allele = re.sub(r'/', '-', allele) elif allele.startswith('HLA-DRB'): allele = re.sub(r':', '', allele) allele = re.sub(r'\*', '_', allele) allele = allele.lstrip('HLA-') else: raise RuntimeError('Unknown allele seen') parameters = [ '-a', allele, '-xls', '1', '-xlsfile', 'predictions.tsv', '-f', input_files['peptfile.faa'] ] # netMHC writes a lot of useless stuff to sys.stdout so we open /dev/null and dump output there. with open(os.devnull, 'w') as output_catcher: docker_call(tool='netmhciipan', tool_parameters=parameters, work_dir=work_dir, dockerhub=univ_options['dockerhub'], outfile=output_catcher, tool_version=netmhciipan_options['version']) output_file = job.fileStore.writeGlobalFile('/'.join( [work_dir, 'predictions.tsv'])) job.fileStore.logToMaster('Ran netmhciipan on %s successfully' % allele) return output_file, 'netMHCIIpan'
def merge_mhc_peptide_calls(job, antigen_predictions, transgened_files, univ_options): """ Merge all the calls generated by spawn_antigen_predictors. :param dict antigen_predictions: The return value from running :meth:`spawn_antigen_predictors` :param dict transgened_files: The transgened peptide files :param dict univ_options: Universal options for ProTECT :return: merged binding predictions output_files: |- 'mhcii_merged_files.list': fsID +- 'mhci_merged_files.list': fsID :rtype: dict """ job.fileStore.logToMaster('Merging MHC calls') work_dir = os.getcwd() pept_files = { '10_mer.faa': transgened_files['transgened_tumor_10_mer_snpeffed.faa'], '10_mer.faa.map': transgened_files['transgened_tumor_10_mer_snpeffed.faa.map'], '15_mer.faa': transgened_files['transgened_tumor_15_mer_snpeffed.faa'], '15_mer.faa.map': transgened_files['transgened_tumor_15_mer_snpeffed.faa.map'] } pept_files = get_files_from_filestore(job, pept_files, work_dir) mhci_preds, mhcii_preds = antigen_predictions mhci_called = mhcii_called = False # Merge MHCI calls # Read 10-mer pepts into memory peptides = read_peptide_file(pept_files['10_mer.faa']) with open(pept_files['10_mer.faa.map'], 'r') as mapfile: pepmap = json.load(mapfile) with open('/'.join([work_dir, 'mhci_merged_files.list']), 'w') as mhci_resfile: for key in mhci_preds: tumor_file = job.fileStore.readGlobalFile(mhci_preds[key]['tumor']) with open(tumor_file) as t_f: tumor_df = pandas.read_json(eval(t_f.read())) if tumor_df.empty: continue mhci_called = True # TODO: There must be a better way of doing this normal_df = _process_mhci(job.fileStore.readGlobalFile( mhci_preds[key]['normal']), normal=True) normal_dict = normal_df.set_index('pept')['tumor_pred'] normal_preds = [ normal_dict[x] for x in list(tumor_df['normal_pept']) ] tumor_df['normal_pred'] = normal_preds for pred in tumor_df.itertuples(): print_mhc_peptide(pred, peptides, pepmap, mhci_resfile) # Merge MHCII calls # read 15-mer pepts into memory peptides = read_peptide_file(pept_files['15_mer.faa']) with open(pept_files['15_mer.faa.map'], 'r') as mapfile: pepmap = json.load(mapfile) # Incorporate peptide names into the merged calls with open('/'.join([work_dir, 'mhcii_merged_files.list']), 'w') as \ mhcii_resfile: for key in mhcii_preds: if mhcii_preds[key]['predictor'] is None: continue mhcii_called = True tumor_file = job.fileStore.readGlobalFile( mhcii_preds[key]['tumor']) with open(tumor_file) as t_f: tumor_df = pandas.read_json(eval(t_f.read())) if tumor_df.empty: continue # TODO: There must be a better way of doing this if mhcii_preds[key]['predictor'] == 'Consensus': normal_df = _process_consensus_mhcii( job.fileStore.readGlobalFile( mhcii_preds[key]['normal'][0]), normal=True) elif mhcii_preds[key]['predictor'] == 'Sturniolo': normal_df = _process_sturniolo_mhcii( job.fileStore.readGlobalFile( mhcii_preds[key]['normal'][0]), normal=True) elif mhcii_preds[key]['predictor'] == 'netMHCIIpan': normal_df = _process_net_mhcii(job.fileStore.readGlobalFile( mhcii_preds[key]['normal'][0]), normal=True) else: assert False normal_dict = normal_df.set_index('pept')['tumor_pred'] normal_preds = [ normal_dict[x] for x in list(tumor_df['normal_pept']) ] tumor_df['normal_pred'] = normal_preds for pred in tumor_df.itertuples(): print_mhc_peptide( pred, peptides, pepmap, mhcii_resfile, netmhc=mhcii_preds[key]['predictor'] == 'netMHCIIpan') if not (mhci_called or mhcii_called): raise RuntimeError('No peptides available for ranking') output_files = defaultdict() for mhc_file in [mhci_resfile.name, mhcii_resfile.name]: output_files[os.path.split(mhc_file) [1]] = job.fileStore.writeGlobalFile(mhc_file) export_results(job, output_files[os.path.split(mhc_file)[1]], mhc_file, univ_options, subfolder='binding_predictions') return output_files