def test_bad_type_raises_exception(self): self.assertRaises( NeofoxDataValidationException, ModelValidator.validate, Neoantigen( patient_identifier= 1234, # this should be a string instead of an integer rna_expression=0.45, ), ) self.assertRaises( NeofoxDataValidationException, ModelValidator.validate, Neoantigen(patient_identifier="1234", rna_expression="0.45"), ) # this should be a float) self.assertRaises( NeofoxDataValidationException, ModelValidator.validate, Patient(identifier="1234", is_rna_available="Richtig"), ) # this should be a boolean) # TODO: make validation capture this data types errors! ModelValidator.validate( Neoantigen( patient_identifier=[ "12345" ], # this should be a string instead of a list of strings rna_expression=0.45, ))
def build_neoantigen(wild_type_xmer=None, mutated_xmer=None, patient_identifier=None, gene=None, rna_expression=None, rna_variant_allele_frequency=None, dna_variant_allele_frequency=None, imputed_gene_expression=None, **kw): neoantigen = Neoantigen() neoantigen.patient_identifier = patient_identifier neoantigen.gene = gene neoantigen.rna_expression = rna_expression neoantigen.rna_variant_allele_frequency = rna_variant_allele_frequency neoantigen.dna_variant_allele_frequency = dna_variant_allele_frequency neoantigen.imputed_gene_expression = imputed_gene_expression mutation = Mutation() mutation.wild_type_xmer = wild_type_xmer mutation.mutated_xmer = mutated_xmer mutation.position = EpitopeHelper.mut_position_xmer_seq(mutation) neoantigen.mutation = mutation external_annotation_names = dict.fromkeys( nam for nam in kw.keys() if stringcase.snakecase(nam) not in set( Neoantigen.__annotations__.keys())) neoantigen.external_annotations = [ Annotation(name=name, value=str(kw.get(name))) for name in external_annotation_names ] ModelValidator.validate_neoantigen(neoantigen) return neoantigen
def test_annotations2short_wide_df(self): neoantigens = [ Neoantigen( mutation=Mutation(wild_type_xmer="AAAAAAA", mutated_xmer="AAACAAA", position=[]), neofox_annotations=NeoantigenAnnotations( annotations=[ Annotation(name="this_name", value="this_value"), Annotation(name="that_name", value="that_value"), Annotation(name="diese_name", value="diese_value"), Annotation(name="das_name", value="das_value"), ] ) ), Neoantigen( mutation=Mutation(wild_type_xmer="AAAGAAA", mutated_xmer="AAAZAAA", position=[1, 2, 3]), neofox_annotations=NeoantigenAnnotations( annotations=[ Annotation(name="this_name", value="0"), Annotation(name="that_name", value="1"), Annotation(name="diese_name", value="2"), Annotation(name="das_name", value="3"), ], ) ), ] df = ModelConverter.annotations2table(neoantigens=neoantigens) self.assertEqual(df.shape[0], 2) self.assertEqual(df.shape[1], 13) self.assertEqual(0, df[df["mutation.position"].transform(lambda x: isinstance(x, list))].shape[0])
def test_neoantigen_in_proteome(self): patient_identifier = "12345" neoantigen = Neoantigen( mutation=Mutation(mutated_xmer="PKLLENLLSKGETISFLECF"), patient_identifier=patient_identifier) patient = PatientFactory.build_patient( identifier=patient_identifier, mhc_alleles=[ "HLA-A*24:106", "HLA-A*02:200", "HLA-B*08:33", "HLA-B*40:94", "HLA-C*02:20", "HLA-C*07:86" ], mhc2_alleles=[ "HLA-DRB1*07:14", "HLA-DRB1*04:18", "HLA-DPA1*01:05", "HLA-DPA1*03:01", "HLA-DPB1*17:01", "HLA-DPB1*112:01", "HLA-DQA1*01:06", "HLA-DQA1*01:09", "HLA-DQB1*03:08", "HLA-DQB1*06:01" ], mhc_database=self.references.get_mhc_database()) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_neoantigen_without_9mer_netmhcpan_results(self): patient_identifier = "12345" neoantigen = Neoantigen(mutation=Mutation( wild_type_xmer="HLAQHQRVHTGEKPYKCNECGKTFRQT", mutated_xmer="HLAQHQRVHTGEKAYKCNECGKTFRQT"), patient_identifier=patient_identifier) patient = PatientFactory.build_patient( identifier=patient_identifier, mhc_alleles=[ "HLA-A*24:106", "HLA-A*02:200", "HLA-B*08:33", "HLA-B*40:94", "HLA-C*02:20", "HLA-C*07:86" ], mhc2_alleles=[ "HLA-DRB1*07:14", "HLA-DRB1*04:18", "HLA-DPA1*01:05", "HLA-DPA1*03:01", "HLA-DPB1*17:01", "HLA-DPB1*112:01", "HLA-DQA1*01:06", "HLA-DQA1*01:09", "HLA-DQB1*03:08", "HLA-DQB1*06:01" ], mhc_database=self.references.get_mhc_database()) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_good_data_does_not_raise_exceptions(self): neoantigen = Neoantigen(patient_identifier="1234", rna_expression=0.45) ModelValidator.validate(neoantigen) patient = Patient(identifier="1234", is_rna_available=True) ModelValidator.validate(patient)
def annotate_neoantigen(neoantigen: Neoantigen, patient: Patient, reference_folder: ReferenceFolder, configuration: DependenciesConfiguration, tcell_predictor: TcellPrediction, self_similarity: SelfSimilarityCalculator, log_file_name: str, affinity_threshold=AFFINITY_THRESHOLD_DEFAULT): # the logs need to be initialised inside every dask job NeoFox._initialise_logs(log_file_name) logger.info("Starting neoantigen annotation with peptide={}".format( neoantigen.mutation.mutated_xmer)) start = time.time() try: annotated_neoantigen = NeoantigenAnnotator( reference_folder, configuration, tcell_predictor=tcell_predictor, self_similarity=self_similarity, affinity_threshold=affinity_threshold).get_annotation( neoantigen, patient) except Exception as e: logger.error("Error processing neoantigen {}".format( neoantigen.to_dict())) logger.error("Error processing patient {}".format( patient.to_dict())) raise e end = time.time() logger.info( "Elapsed time for annotating neoantigen for peptide={}: {} seconds" .format(neoantigen.mutation.mutated_xmer, int(end - start))) return annotated_neoantigen
def test_model2dict(self): neoantigens = [get_random_neoantigen() for _ in range(5)] json_data = [n.to_dict() for n in neoantigens] self.assertIsInstance(json_data, list) self.assertEqual(5, len(json_data)) neoantigens2 = [Neoantigen().from_dict(j) for j in json_data] self._assert_lists_equal(neoantigens, neoantigens2)
def _get_test_neoantigen(self): return Neoantigen( gene="GENE", mutation=Mutation( mutated_xmer="AAAAAAAIAAAAAAAA", wild_type_xmer="AAAAAAALAAAAAAAA" ), patient_identifier="12345", rna_expression=0.12345, )
def parse_neoantigens_json_file( neoantigens_json_file: str) -> List[Neoantigen]: """ :param neoantigens_json_file: the file to neoantigens data JSON file :return: the parsed JSON into model objects """ return [ Neoantigen().from_dict(n) for n in json.loads(open(neoantigens_json_file).read()) ]
def validate_neoantigen(neoantigen: Neoantigen): # checks format consistency first ModelValidator.validate(neoantigen) try: assert neoantigen.patient_identifier is not None and len(neoantigen.patient_identifier) > 0, \ "A patient identifier is missing. Please provide patientIdentifier in the input file" # checks mutation ModelValidator._validate_mutation(neoantigen.mutation) # check the expression values ModelValidator._validate_expression_values(neoantigen) except AssertionError as e: logger.error(neoantigen.to_json(indent=3)) raise NeofoxDataValidationException(e)
def neoantigen(self, patient_identifier=None, wildtype=True) -> Neoantigen: neoantigen = None found = False while not found: try: neoantigen = Neoantigen( patient_identifier=self.generator.unique.uuid4() if patient_identifier is None else patient_identifier, gene="BRCA2" if wildtype else None, # no gene if no wildtype provided mutation=self.mutation(wildtype=wildtype), rna_expression=float(self.random_number(digits=4, fix_len=True))/100, dna_variant_allele_frequency=float(self.random_number(digits=3, fix_len=True))/1000, rna_variant_allele_frequency=float(self.random_number(digits=3, fix_len=True))/1000 ) ModelValidator.validate_neoantigen(neoantigen) except NeofoxDataValidationException: continue found = True return neoantigen
def test_neoantigen_no_wt_failing(self): patient_identifier = "12345" neoantigen = Neoantigen( mutation=Mutation(mutated_xmer="SPSFPLEPDDEVFTAIAKAMEEMVEDS"), patient_identifier=patient_identifier) patient = Patient( identifier=patient_identifier, mhc1=MhcFactory.build_mhc1_alleles( [ "HLA-A*02:24", "HLA-A*36:04", "HLA-B*58:25", "HLA-B*35:102", "HLA-C*02:30", "HLA-C*07:139" ], mhc_database=self.references.get_mhc_database()), ) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def test_neoantigen_failing(self): patient_identifier = "12345" neoantigen = Neoantigen(mutation=Mutation( wild_type_xmer="ARPDMFCLFHGKRYFPGESWHPYLEPQ", mutated_xmer="ARPDMFCLFHGKRHFPGESWHPYLEPQ"), patient_identifier=patient_identifier) patient = Patient( identifier=patient_identifier, mhc1=MhcFactory.build_mhc1_alleles( [ "HLA-A*03:01", "HLA-A*29:02", "HLA-B*07:02", "HLA-B*44:03", "HLA-C*07:02", "HLA-C*16:01" ], mhc_database=self.references.get_mhc_database()), ) annotations = NeoFox( neoantigens=[neoantigen], patients=[patient], num_cpus=1, ).get_annotations() # it does not crash even though there are no best 9mers self.assertIsNotNone(annotations)
def get_annotation(self, neoantigen: Neoantigen, patient: Patient) -> Neoantigen: """Calculate new epitope features and add to dictonary that stores all properties""" neoantigen.neofox_annotations = NeoantigenAnnotations( annotator="NeoFox", annotator_version=neofox.VERSION, timestamp="{:%Y%m%d%H%M%S%f}".format(datetime.now()), resources=self.resources_versions, annotations=[] ) # Runs netmhcpan, netmhc2pan, mixmhcpred and mixmhc2prd in parallel ( mixmhc2pred_annotations, mixmhcpred_annotations, netmhc2pan, netmhcpan, prime_annotations ) = self._compute_long_running_tasks(neoantigen, patient) # HLA I predictions: NetMHCpan if netmhcpan: neoantigen.neofox_annotations.annotations.extend(netmhcpan.get_annotations(mutation=neoantigen.mutation)) # HLA II predictions: NetMHCIIpan if netmhc2pan: neoantigen.neofox_annotations.annotations.extend(netmhc2pan.get_annotations()) # MixMHCpred if mixmhcpred_annotations is not None: neoantigen.neofox_annotations.annotations.extend(mixmhcpred_annotations) # PRIME if prime_annotations is not None: neoantigen.neofox_annotations.annotations.extend(prime_annotations) # MixMHC2pred if mixmhc2pred_annotations is not None: neoantigen.neofox_annotations.annotations.extend(mixmhc2pred_annotations) # decides which VAF to use vaf_rna = neoantigen.rna_variant_allele_frequency if not patient.is_rna_available and neoantigen.dna_variant_allele_frequency is not None: logger.warning( "Using the DNA VAF to estimate the RNA VAF as the patient does not have RNA available" ) # TODO: overwrite value in the neoantigen object vaf_rna = neoantigen.dna_variant_allele_frequency # MHC binding independent features start = time.time() expression_calculator = Expression( transcript_expression=neoantigen.rna_expression, vaf_rna=vaf_rna ) neoantigen.neofox_annotations.annotations.extend(expression_calculator.get_annotations()) end = time.time() logger.info( "Expression annotation elapsed time {} seconds".format( round(end - start, 3) ) ) start = time.time() sequence_not_in_uniprot = self.uniprot.is_sequence_not_in_uniprot( neoantigen.mutation.mutated_xmer ) neoantigen.neofox_annotations.annotations.extend( self.uniprot.get_annotations(sequence_not_in_uniprot) ) end = time.time() logger.info( "Uniprot annotation elapsed time {} seconds".format(round(end - start, 3)) ) # Amplitude start = time.time() self.amplitude.run(netmhcpan=netmhcpan, netmhc2pan=netmhc2pan) neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations()) neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations_mhc2()) end = time.time() logger.info( "Amplitude annotation elapsed time {} seconds".format(round(end - start, 3)) ) # Neoantigen fitness start = time.time() neoantigen.neofox_annotations.annotations.extend( self.neoantigen_fitness_calculator.get_annotations( mutated_peptide_mhci=netmhcpan.best_ninemer_epitope_by_affinity if netmhcpan else None, mutation_in_anchor=netmhcpan.mutation_in_anchor_9mer if netmhcpan else None, amplitude=self.amplitude.amplitude_mhci_affinity_9mer, mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None ) ) end = time.time() logger.info( "Neoantigen annotation elapsed time {} seconds".format( round(end - start, 3) ) ) # Differential Binding start = time.time() if netmhcpan: neoantigen.neofox_annotations.annotations.extend( self.differential_binding.get_annotations_dai( mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, wt_peptide_mhcii=netmhcpan.best_wt_epitope_by_affinity ) ) neoantigen.neofox_annotations.annotations.extend( self.differential_binding.get_annotations(mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, amplitude=self.amplitude) ) if netmhc2pan: neoantigen.neofox_annotations.annotations.extend( self.differential_binding.get_annotations_mhc2(mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_rank, amplitude=self.amplitude) ) end = time.time() logger.info( "Differential binding annotation elapsed time {} seconds".format( round(end - start, 3) ) ) # T cell predictor if netmhcpan: start = time.time() neoantigen.neofox_annotations.annotations.extend( self.tcell_predictor.get_annotations( neoantigen=neoantigen, netmhcpan=netmhcpan ) ) end = time.time() logger.info( "T-cell predictor annotation elapsed time {} seconds".format( round(end - start, 3) ) ) # self-similarity start = time.time() neoantigen.neofox_annotations.annotations.extend( self.self_similarity.get_annnotations( mutated_peptide_mhci=netmhcpan.best_epitope_by_rank if netmhcpan else None, wt_peptide_mhci=netmhcpan.best_wt_epitope_by_rank if netmhcpan else None, mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None, wt_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity_wt if netmhc2pan else None, ) ) end = time.time() logger.info( "Self similarity annotation elapsed time {} seconds".format( round(end - start, 3) ) ) # number of mismatches and priority score if netmhcpan and netmhcpan: start = time.time() neoantigen.neofox_annotations.annotations.extend( self.priority_score_calculator.get_annotations( netmhcpan=netmhcpan, vaf_transcr=vaf_rna, vaf_tum=neoantigen.dna_variant_allele_frequency, expr=neoantigen.rna_expression, mut_not_in_prot=sequence_not_in_uniprot, ) ) end = time.time() logger.info( "Priotity score annotation elapsed time {} seconds".format( round(end - start, 3) ) ) # neoag immunogenicity model if netmhcpan and netmhcpan.best_epitope_by_affinity: start = time.time() peptide_variant_position = EpitopeHelper.position_of_mutation_epitope( wild_type=netmhcpan.best_wt_epitope_by_affinity.peptide, mutation=netmhcpan.best_epitope_by_affinity.peptide, ) neoantigen.neofox_annotations.annotations.append( self.neoag_calculator.get_annotation( sample_id=patient.identifier, mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity, wt_peptide_mhci=netmhcpan.best_wt_epitope_by_affinity, peptide_variant_position=peptide_variant_position, mutation=neoantigen.mutation) ) end = time.time() logger.info( "Neoag annotation elapsed time {} seconds".format(round(end - start, 3)) ) # IEDB immunogenicity if self.organism == ORGANISM_HOMO_SAPIENS: start = time.time() neoantigen.neofox_annotations.annotations.extend( self.iedb_immunogenicity.get_annotations( mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None ) ) end = time.time() logger.info( "IEDB annotation elapsed time {} seconds".format(round(end - start, 3)) ) # dissimilarity to self-proteome start = time.time() neoantigen.neofox_annotations.annotations.extend( self.dissimilarity_calculator.get_annotations( mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None) ) end = time.time() logger.info( "Dissimilarity annotation elapsed time {} seconds".format( round(end - start, 3) ) ) # vaxrank if netmhcpan and netmhcpan.epitope_affinities: start = time.time() vaxrankscore = vaxrank.VaxRank() vaxrankscore.run( mutation_scores=netmhcpan.epitope_affinities, expression_score=expression_calculator.expression, ) neoantigen.neofox_annotations.annotations.extend(vaxrankscore.get_annotations()) end = time.time() logger.info( "Vaxrank annotation elapsed time {} seconds".format(round(end - start, 3)) ) # hex # TODO: hex is failing for mouse with the current IEDB fasta with only 2 entries if self.organism == ORGANISM_HOMO_SAPIENS: start = time.time() neoantigen.neofox_annotations.annotations.extend( self.hex.get_annotation( mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None, mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None) ) end = time.time() logger.info( "Hex annotation elapsed time {} seconds".format(round(end - start, 3)) ) return neoantigen