def check_netmhc_pan(program_name, fail_if_no_such_program=True): try: predictor = NetMHCpan(alleles=[DEFAULT_ALLELE], program_name=program_name) except FileNotFoundError: if fail_if_no_such_program: raise print("Skipping because no such program: %s" % program_name) return binding_predictions = predictor.predict_subsequences(protein_sequence_dict, peptide_lengths=[9]) assert len(binding_predictions) == 4, \ "Expected 4 binding predictions from %s" % (binding_predictions,) for x in binding_predictions: # recompute the peptide from the offset and starting sequence, and make sure it matches. # this is currently wrong in netMHCpan-3.0 and we want to test our wrapper fix to that offset = x.offset length = x.length seq_name = x.source_sequence_name expected_peptide = protein_sequence_dict[seq_name][offset:offset + length] eq_( expected_peptide, x.peptide, "Peptide mismatch: expected %s but got %s in binding prediction '%s'" % ( expected_peptide, x.peptide, x, ))
def test_netmhc_pan(): alleles = [normalize_allele_name(DEFAULT_ALLELE)] pan_predictor = NetMHCpan( alleles=alleles, epitope_lengths=[9]) fasta_dictionary = { "SMAD4-001": "ASIINFKELA", "TP53-001": "ASILLLVFYW" } epitope_collection = pan_predictor.predict( fasta_dictionary=fasta_dictionary) assert len(epitope_collection) == 4, \ "Expected 4 epitopes from %s" % (epitope_collection,)
def run_wt_pep_single_netMHCpan(): size = 8 hlas = ["A*02:01"] MUT_peps = { "chr3_52533601_ACGCCGGGGCAGTGGG/A-L6-WT25-MUT25-M1": "NLYIRSSG", } for allele in hlas: predictor = NetMHCpan(alleles=allele) for _id, pep in MUT_peps.items(): mut_binding_prediction = \ predictor.predict_subsequences({_id : pep}, peptide_lengths = [size]) for binding_prediction in mut_binding_prediction: print(binding_prediction.affinity)
def run_single_netMHCpan(): predictor = NetMHCpan(alleles=["A*02:01"]) wt_protein_sequence = { "chr3_52533601_ACGCCGGGGCAGTGGG/A-L6-WT25-MUT25-M1": "NLYIQSSG", } binding_predictions = predictor.predict_subsequences(wt_protein_sequence, peptide_lengths=[8]) for binding_prediction in binding_predictions: print("Binder: %s" % (binding_prediction, )) mut_protein_sequence = { "chr3_52533601_ACGCCGGGGCAGTGGG/A-L6-WT25-MUT25-M1": "NLYIRSSG", } binding_predictions = predictor.predict_subsequences(mut_protein_sequence, peptide_lengths=[8]) for binding_prediction in binding_predictions: print("Binder: %s" % binding_prediction) print("Binder allele: %s" % binding_prediction.allele)
def run_netMHCpan(): # Run NetMHCpan for alleles HLA-A*01:01 and HLA-A*02:01 predictor = NetMHCpan(alleles=["A*02:01", "hla-a0101"]) predictor = NetMHC(alleles=["A*02:01", "hla-a0101"]) # scan the short proteins 1L2Y and 1L3Y for epitopes protein_sequences = { "chr3_52533601_ACGCCGGGGCAGTGGG/A-L6-WT25-MUT25-M1": "NLYIQSSGRPPPSWLKDGGP", "chr8_127738959_G/A-1": "NLYIQSSGRPPPSWLKDGGP", "1L2Y": "NLYIQWLKDGGPSSGRPPPS", "1L3Y": "ECDTINCERYNGQVCGGPGRGLCFCGKCRCHPGFEGSACQA" } binding_predictions = predictor.predict_subsequences(protein_sequences, peptide_lengths=[9]) # flatten binding predictions into a Pandas DataFrame df = binding_predictions.to_dataframe() # epitope collection is sorted by percentile rank # of binding predictions for binding_prediction in binding_predictions: print("Binder: %s" % (binding_prediction, ))
start=32861682, ref='G', alt='A', ensembl=ensembl_grch37)]) alleles = [ 'A02:01', 'a0204', 'B*07:02', 'HLA-B14:02', 'HLA-C*07:02', 'hla-c07:01' ] mhc_model = NetMHCpan( alleles=alleles, default_peptide_lengths=[9]) def test_epitope_prediction_without_padding(): output_without_padding = TopiaryPredictor( mhc_model=mhc_model, only_novel_epitopes=True).predict_from_variants(variants=variants) # one prediction for each variant * number of alleles strong_binders = output_without_padding[output_without_padding.affinity <= 500] eq_(len(strong_binders), 5) @raises(ValueError) def test_epitope_prediction_with_invalid_padding(): TopiaryPredictor( mhc_model=mhc_model,
# Downloaded from https://github.com/hammerlab/mhctools#Example from mhctools import NetMHCpan # Run NetMHCpan for alleles HLA-A*01:01 and HLA-A*02:01 predictor = NetMHCpan(alleles=["A*02:01", "hla-a0101"]) # scan the short proteins 1L2Y and 1L3Y for epitopes protein_sequences = { "1L2Y": "NLYIQWLKDGGPSSGRPPPS", "1L3Y": "ECDTINCERYNGQVCGGPGRGLCFCGKCRCHPGFEGSACQA" } epitope_collection = predictor.predict(protein_sequences) # flatten binding predictions into a Pandas DataFrame df = epitope_collection.dataframe() # epitope collection is sorted by percentile rank # of binding predictions strongest_predicted_binder = epitope_collection[0] # should be NLYIQWLKDGGPSSGRPPPS print strongest_predicted_binder.source_sequence
alt='T', ensembl=ensembl_grch37), Variant(contig=11, start=32861682, ref='G', alt='A', ensembl=ensembl_grch37) ]) alleles = [ 'A02:01', 'a0204', 'B*07:02', 'HLA-B14:02', 'HLA-C*07:02', 'hla-c07:01' ] epitope_lengths = [9] mhc_model = NetMHCpan(alleles=alleles, epitope_lengths=epitope_lengths) def test_epitope_prediction_without_padding(): output_without_padding = predict_epitopes_from_variants( variants=variants, mhc_model=mhc_model, transcript_expression_dict=None, only_novel_epitopes=True) # one prediction for each variant * number of alleles strong_binders = [ epitope_prediction for epitope_prediction in output_without_padding if epitope_prediction.value <= 500.0 ] eq_(len(strong_binders), 4)
def predict_binding(hla_alleles, WT_peps, MUT_peps, size, bind_pred_software, extra_flag=""): logging.info("Starting binding prediction for '%s', size: %i.", \ bind_pred_software + extra_flag, size) logging.info("Alleles: '%s'.", hla_alleles) hlas = hla_alleles.split(",") data_netMHC = [] for allele in hlas: if bind_pred_software.lower() == "netmhcpan" and extra_flag == "": predictor = NetMHCpan(alleles = allele) elif bind_pred_software.lower() == "netmhcpan" and extra_flag.lower() == "-ba": predictor = NetMHCpan(alleles = allele, extra_flags=[extra_flag]) elif bind_pred_software.lower() == "netmhc": try: predictor = NetMHC(alleles = allele) except UnsupportedAllele: logging.warning("Unsupported Allele: '%s'", allele) break else: raise Exception("Unknown binding prediction software: {}".format(bind_pred_software)) for var_id, MUT_pep in MUT_peps.items(): is_frameshift = helper.check_if_frameshift(var_id.split("-")[0]) mut_binding_prediction = \ predictor.predict_subsequences({var_id : MUT_pep}, peptide_lengths = [size]) # A sequence of the form "X" * size comes from the case of an empty_WT if not is_frameshift and not WT_peps[var_id] == "X" * size: wt_binding_prediction = \ predictor.predict_subsequences({var_id : WT_peps[var_id]}, peptide_lengths = [size]) for wt_bp, mut_bp in zip(wt_binding_prediction, mut_binding_prediction): bind_level = "N/A" if mut_bp.percentile_rank <= 2.0: bind_level = "WB" if mut_bp.percentile_rank <= 0.5: bind_level = "SB" if extra_flag == "-BA" or bind_pred_software.lower() == "netmhc": dai = wt_bp.affinity - mut_bp.affinity data_netMHC.append((var_id, mut_bp.percentile_rank, \ bind_level, WT_peps[var_id], MUT_pep, \ wt_bp.affinity, mut_bp.affinity, dai, \ allele, size, bind_pred_software + extra_flag)) else: data_netMHC.append((var_id, mut_bp.percentile_rank, \ bind_level, WT_peps[var_id], MUT_pep, \ "N/A", "N/A", "N/A", \ allele, size, bind_pred_software + extra_flag)) else: for mut_bp in mut_binding_prediction: bind_level = "N/A" if mut_bp.percentile_rank <= 2.0: bind_level = "WB" if mut_bp.percentile_rank <= 0.5: bind_level = "SB" if extra_flag == "-BA" or bind_pred_software.lower() == "netmhc": data_netMHC.append((var_id, mut_bp.percentile_rank, \ bind_level, "N/A", MUT_pep, \ "N/A", mut_bp.affinity, "N/A", \ allele, size, bind_pred_software + extra_flag)) else: data_netMHC.append((var_id, mut_bp.percentile_rank, \ bind_level, "N/A", MUT_pep, \ "N/A", "N/A", "N/A", \ allele, size, bind_pred_software + extra_flag)) write_netMHC_type_file(data_netMHC, size, bind_pred_software, extra_flag) logging.info("Finished binding prediction for '%s', size: %i.", \ bind_pred_software + extra_flag, size)