Exemplo n.º 1
0
    def test_bad_type_raises_exception(self):

        self.assertRaises(
            NeofoxDataValidationException,
            ModelValidator.validate,
            Neoantigen(
                patient_identifier=
                1234,  # this should be a string instead of an integer
                rna_expression=0.45,
            ),
        )

        self.assertRaises(
            NeofoxDataValidationException,
            ModelValidator.validate,
            Neoantigen(patient_identifier="1234", rna_expression="0.45"),
        )  # this should be a float)

        self.assertRaises(
            NeofoxDataValidationException,
            ModelValidator.validate,
            Patient(identifier="1234", is_rna_available="Richtig"),
        )  # this should be a boolean)

        # TODO: make validation capture this data types errors!
        ModelValidator.validate(
            Neoantigen(
                patient_identifier=[
                    "12345"
                ],  # this should be a string instead of a list of strings
                rna_expression=0.45,
            ))
Exemplo n.º 2
0
    def build_neoantigen(wild_type_xmer=None,
                         mutated_xmer=None,
                         patient_identifier=None,
                         gene=None,
                         rna_expression=None,
                         rna_variant_allele_frequency=None,
                         dna_variant_allele_frequency=None,
                         imputed_gene_expression=None,
                         **kw):

        neoantigen = Neoantigen()
        neoantigen.patient_identifier = patient_identifier
        neoantigen.gene = gene
        neoantigen.rna_expression = rna_expression
        neoantigen.rna_variant_allele_frequency = rna_variant_allele_frequency
        neoantigen.dna_variant_allele_frequency = dna_variant_allele_frequency
        neoantigen.imputed_gene_expression = imputed_gene_expression

        mutation = Mutation()
        mutation.wild_type_xmer = wild_type_xmer
        mutation.mutated_xmer = mutated_xmer
        mutation.position = EpitopeHelper.mut_position_xmer_seq(mutation)
        neoantigen.mutation = mutation

        external_annotation_names = dict.fromkeys(
            nam for nam in kw.keys() if stringcase.snakecase(nam) not in set(
                Neoantigen.__annotations__.keys()))
        neoantigen.external_annotations = [
            Annotation(name=name, value=str(kw.get(name)))
            for name in external_annotation_names
        ]

        ModelValidator.validate_neoantigen(neoantigen)

        return neoantigen
    def test_annotations2short_wide_df(self):

        neoantigens = [
            Neoantigen(
                mutation=Mutation(wild_type_xmer="AAAAAAA", mutated_xmer="AAACAAA", position=[]),
                neofox_annotations=NeoantigenAnnotations(
                    annotations=[
                        Annotation(name="this_name", value="this_value"),
                        Annotation(name="that_name", value="that_value"),
                        Annotation(name="diese_name", value="diese_value"),
                        Annotation(name="das_name", value="das_value"),
                    ]
                )
            ),
            Neoantigen(
                mutation=Mutation(wild_type_xmer="AAAGAAA", mutated_xmer="AAAZAAA", position=[1, 2, 3]),
                neofox_annotations=NeoantigenAnnotations(
                    annotations=[
                        Annotation(name="this_name", value="0"),
                        Annotation(name="that_name", value="1"),
                        Annotation(name="diese_name", value="2"),
                        Annotation(name="das_name", value="3"),
                    ],
                )
            ),
        ]
        df = ModelConverter.annotations2table(neoantigens=neoantigens)
        self.assertEqual(df.shape[0], 2)
        self.assertEqual(df.shape[1], 13)
        self.assertEqual(0, df[df["mutation.position"].transform(lambda x: isinstance(x, list))].shape[0])
Exemplo n.º 4
0
    def test_neoantigen_in_proteome(self):
        patient_identifier = "12345"
        neoantigen = Neoantigen(
            mutation=Mutation(mutated_xmer="PKLLENLLSKGETISFLECF"),
            patient_identifier=patient_identifier)
        patient = PatientFactory.build_patient(
            identifier=patient_identifier,
            mhc_alleles=[
                "HLA-A*24:106", "HLA-A*02:200", "HLA-B*08:33", "HLA-B*40:94",
                "HLA-C*02:20", "HLA-C*07:86"
            ],
            mhc2_alleles=[
                "HLA-DRB1*07:14", "HLA-DRB1*04:18", "HLA-DPA1*01:05",
                "HLA-DPA1*03:01", "HLA-DPB1*17:01", "HLA-DPB1*112:01",
                "HLA-DQA1*01:06", "HLA-DQA1*01:09", "HLA-DQB1*03:08",
                "HLA-DQB1*06:01"
            ],
            mhc_database=self.references.get_mhc_database())

        annotations = NeoFox(
            neoantigens=[neoantigen],
            patients=[patient],
            num_cpus=1,
        ).get_annotations()
        # it does not crash even though there are no best 9mers
        self.assertIsNotNone(annotations)
Exemplo n.º 5
0
    def test_neoantigen_without_9mer_netmhcpan_results(self):
        patient_identifier = "12345"
        neoantigen = Neoantigen(mutation=Mutation(
            wild_type_xmer="HLAQHQRVHTGEKPYKCNECGKTFRQT",
            mutated_xmer="HLAQHQRVHTGEKAYKCNECGKTFRQT"),
                                patient_identifier=patient_identifier)
        patient = PatientFactory.build_patient(
            identifier=patient_identifier,
            mhc_alleles=[
                "HLA-A*24:106", "HLA-A*02:200", "HLA-B*08:33", "HLA-B*40:94",
                "HLA-C*02:20", "HLA-C*07:86"
            ],
            mhc2_alleles=[
                "HLA-DRB1*07:14", "HLA-DRB1*04:18", "HLA-DPA1*01:05",
                "HLA-DPA1*03:01", "HLA-DPB1*17:01", "HLA-DPB1*112:01",
                "HLA-DQA1*01:06", "HLA-DQA1*01:09", "HLA-DQB1*03:08",
                "HLA-DQB1*06:01"
            ],
            mhc_database=self.references.get_mhc_database())

        annotations = NeoFox(
            neoantigens=[neoantigen],
            patients=[patient],
            num_cpus=1,
        ).get_annotations()
        # it does not crash even though there are no best 9mers
        self.assertIsNotNone(annotations)
Exemplo n.º 6
0
    def test_good_data_does_not_raise_exceptions(self):

        neoantigen = Neoantigen(patient_identifier="1234", rna_expression=0.45)
        ModelValidator.validate(neoantigen)

        patient = Patient(identifier="1234", is_rna_available=True)
        ModelValidator.validate(patient)
Exemplo n.º 7
0
 def annotate_neoantigen(neoantigen: Neoantigen,
                         patient: Patient,
                         reference_folder: ReferenceFolder,
                         configuration: DependenciesConfiguration,
                         tcell_predictor: TcellPrediction,
                         self_similarity: SelfSimilarityCalculator,
                         log_file_name: str,
                         affinity_threshold=AFFINITY_THRESHOLD_DEFAULT):
     # the logs need to be initialised inside every dask job
     NeoFox._initialise_logs(log_file_name)
     logger.info("Starting neoantigen annotation with peptide={}".format(
         neoantigen.mutation.mutated_xmer))
     start = time.time()
     try:
         annotated_neoantigen = NeoantigenAnnotator(
             reference_folder,
             configuration,
             tcell_predictor=tcell_predictor,
             self_similarity=self_similarity,
             affinity_threshold=affinity_threshold).get_annotation(
                 neoantigen, patient)
     except Exception as e:
         logger.error("Error processing neoantigen {}".format(
             neoantigen.to_dict()))
         logger.error("Error processing patient {}".format(
             patient.to_dict()))
         raise e
     end = time.time()
     logger.info(
         "Elapsed time for annotating neoantigen for peptide={}: {} seconds"
         .format(neoantigen.mutation.mutated_xmer, int(end - start)))
     return annotated_neoantigen
 def test_model2dict(self):
     neoantigens = [get_random_neoantigen() for _ in range(5)]
     json_data = [n.to_dict() for n in neoantigens]
     self.assertIsInstance(json_data, list)
     self.assertEqual(5, len(json_data))
     neoantigens2 = [Neoantigen().from_dict(j) for j in json_data]
     self._assert_lists_equal(neoantigens, neoantigens2)
Exemplo n.º 9
0
 def _get_test_neoantigen(self):
     return Neoantigen(
         gene="GENE",
         mutation=Mutation(
             mutated_xmer="AAAAAAAIAAAAAAAA", wild_type_xmer="AAAAAAALAAAAAAAA"
         ),
         patient_identifier="12345",
         rna_expression=0.12345,
     )
Exemplo n.º 10
0
 def parse_neoantigens_json_file(
         neoantigens_json_file: str) -> List[Neoantigen]:
     """
     :param neoantigens_json_file: the file to neoantigens data JSON file
     :return: the parsed JSON into model objects
     """
     return [
         Neoantigen().from_dict(n)
         for n in json.loads(open(neoantigens_json_file).read())
     ]
Exemplo n.º 11
0
    def validate_neoantigen(neoantigen: Neoantigen):

        # checks format consistency first
        ModelValidator.validate(neoantigen)

        try:
            assert neoantigen.patient_identifier is not None and len(neoantigen.patient_identifier) > 0, \
                "A patient identifier is missing. Please provide patientIdentifier in the input file"

            # checks mutation
            ModelValidator._validate_mutation(neoantigen.mutation)

            # check the expression values
            ModelValidator._validate_expression_values(neoantigen)
        except AssertionError as e:
            logger.error(neoantigen.to_json(indent=3))
            raise NeofoxDataValidationException(e)
Exemplo n.º 12
0
    def neoantigen(self, patient_identifier=None, wildtype=True) -> Neoantigen:

        neoantigen = None
        found = False
        while not found:
            try:
                neoantigen = Neoantigen(
                    patient_identifier=self.generator.unique.uuid4() if patient_identifier is None else patient_identifier,
                    gene="BRCA2" if wildtype else None, # no gene if no wildtype provided
                    mutation=self.mutation(wildtype=wildtype),
                    rna_expression=float(self.random_number(digits=4, fix_len=True))/100,
                    dna_variant_allele_frequency=float(self.random_number(digits=3, fix_len=True))/1000,
                    rna_variant_allele_frequency=float(self.random_number(digits=3, fix_len=True))/1000
                )
                ModelValidator.validate_neoantigen(neoantigen)
            except NeofoxDataValidationException:
                continue
            found = True

        return neoantigen
Exemplo n.º 13
0
    def test_neoantigen_no_wt_failing(self):
        patient_identifier = "12345"
        neoantigen = Neoantigen(
            mutation=Mutation(mutated_xmer="SPSFPLEPDDEVFTAIAKAMEEMVEDS"),
            patient_identifier=patient_identifier)
        patient = Patient(
            identifier=patient_identifier,
            mhc1=MhcFactory.build_mhc1_alleles(
                [
                    "HLA-A*02:24", "HLA-A*36:04", "HLA-B*58:25",
                    "HLA-B*35:102", "HLA-C*02:30", "HLA-C*07:139"
                ],
                mhc_database=self.references.get_mhc_database()),
        )

        annotations = NeoFox(
            neoantigens=[neoantigen],
            patients=[patient],
            num_cpus=1,
        ).get_annotations()
        # it does not crash even though there are no best 9mers
        self.assertIsNotNone(annotations)
Exemplo n.º 14
0
    def test_neoantigen_failing(self):
        patient_identifier = "12345"
        neoantigen = Neoantigen(mutation=Mutation(
            wild_type_xmer="ARPDMFCLFHGKRYFPGESWHPYLEPQ",
            mutated_xmer="ARPDMFCLFHGKRHFPGESWHPYLEPQ"),
                                patient_identifier=patient_identifier)
        patient = Patient(
            identifier=patient_identifier,
            mhc1=MhcFactory.build_mhc1_alleles(
                [
                    "HLA-A*03:01", "HLA-A*29:02", "HLA-B*07:02", "HLA-B*44:03",
                    "HLA-C*07:02", "HLA-C*16:01"
                ],
                mhc_database=self.references.get_mhc_database()),
        )

        annotations = NeoFox(
            neoantigens=[neoantigen],
            patients=[patient],
            num_cpus=1,
        ).get_annotations()
        # it does not crash even though there are no best 9mers
        self.assertIsNotNone(annotations)
Exemplo n.º 15
0
    def get_annotation(self, neoantigen: Neoantigen, patient: Patient) -> Neoantigen:
        """Calculate new epitope features and add to dictonary that stores all properties"""
        neoantigen.neofox_annotations = NeoantigenAnnotations(
            annotator="NeoFox",
            annotator_version=neofox.VERSION,
            timestamp="{:%Y%m%d%H%M%S%f}".format(datetime.now()),
            resources=self.resources_versions,
            annotations=[]
        )

        # Runs netmhcpan, netmhc2pan, mixmhcpred and mixmhc2prd in parallel
        (
            mixmhc2pred_annotations,
            mixmhcpred_annotations,
            netmhc2pan,
            netmhcpan,
            prime_annotations
        ) = self._compute_long_running_tasks(neoantigen, patient)

        # HLA I predictions: NetMHCpan
        if netmhcpan:
            neoantigen.neofox_annotations.annotations.extend(netmhcpan.get_annotations(mutation=neoantigen.mutation))

        # HLA II predictions: NetMHCIIpan
        if netmhc2pan:
            neoantigen.neofox_annotations.annotations.extend(netmhc2pan.get_annotations())

        # MixMHCpred
        if mixmhcpred_annotations is not None:
            neoantigen.neofox_annotations.annotations.extend(mixmhcpred_annotations)

        # PRIME
        if prime_annotations is not None:
            neoantigen.neofox_annotations.annotations.extend(prime_annotations)

        # MixMHC2pred
        if mixmhc2pred_annotations is not None:
            neoantigen.neofox_annotations.annotations.extend(mixmhc2pred_annotations)

        # decides which VAF to use
        vaf_rna = neoantigen.rna_variant_allele_frequency
        if not patient.is_rna_available and neoantigen.dna_variant_allele_frequency is not None:
            logger.warning(
                "Using the DNA VAF to estimate the RNA VAF as the patient does not have RNA available"
            )
            # TODO: overwrite value in the neoantigen object
            vaf_rna = neoantigen.dna_variant_allele_frequency

        # MHC binding independent features
        start = time.time()
        expression_calculator = Expression(
            transcript_expression=neoantigen.rna_expression, vaf_rna=vaf_rna
        )
        neoantigen.neofox_annotations.annotations.extend(expression_calculator.get_annotations())
        end = time.time()
        logger.info(
            "Expression annotation elapsed time {} seconds".format(
                round(end - start, 3)
            )
        )

        start = time.time()
        sequence_not_in_uniprot = self.uniprot.is_sequence_not_in_uniprot(
            neoantigen.mutation.mutated_xmer
        )
        neoantigen.neofox_annotations.annotations.extend(
            self.uniprot.get_annotations(sequence_not_in_uniprot)
        )
        end = time.time()
        logger.info(
            "Uniprot annotation elapsed time {} seconds".format(round(end - start, 3))
        )

        # Amplitude
        start = time.time()
        self.amplitude.run(netmhcpan=netmhcpan, netmhc2pan=netmhc2pan)
        neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations())
        neoantigen.neofox_annotations.annotations.extend(self.amplitude.get_annotations_mhc2())
        end = time.time()
        logger.info(
            "Amplitude annotation elapsed time {} seconds".format(round(end - start, 3))
        )

        # Neoantigen fitness
        start = time.time()
        neoantigen.neofox_annotations.annotations.extend(
            self.neoantigen_fitness_calculator.get_annotations(
                mutated_peptide_mhci=netmhcpan.best_ninemer_epitope_by_affinity if netmhcpan else None,
                mutation_in_anchor=netmhcpan.mutation_in_anchor_9mer if netmhcpan else None,
                amplitude=self.amplitude.amplitude_mhci_affinity_9mer,
                mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None
            )
        )
        end = time.time()
        logger.info(
            "Neoantigen annotation elapsed time {} seconds".format(
                round(end - start, 3)
            )
        )

        # Differential Binding
        start = time.time()
        if netmhcpan:
            neoantigen.neofox_annotations.annotations.extend(
                self.differential_binding.get_annotations_dai(
                    mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity,
                    wt_peptide_mhcii=netmhcpan.best_wt_epitope_by_affinity
                )
            )
            neoantigen.neofox_annotations.annotations.extend(
                self.differential_binding.get_annotations(mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity,
                                                            amplitude=self.amplitude)
            )
        if netmhc2pan:
            neoantigen.neofox_annotations.annotations.extend(
                self.differential_binding.get_annotations_mhc2(mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_rank,
                                                               amplitude=self.amplitude)
            )
        end = time.time()
        logger.info(
            "Differential binding annotation elapsed time {} seconds".format(
                round(end - start, 3)
            )
        )

        # T cell predictor
        if netmhcpan:
            start = time.time()
            neoantigen.neofox_annotations.annotations.extend(
                self.tcell_predictor.get_annotations(
                    neoantigen=neoantigen, netmhcpan=netmhcpan
                )
            )
            end = time.time()
            logger.info(
                "T-cell predictor annotation elapsed time {} seconds".format(
                    round(end - start, 3)
                )
            )

        # self-similarity
        start = time.time()
        neoantigen.neofox_annotations.annotations.extend(
            self.self_similarity.get_annnotations(
                mutated_peptide_mhci=netmhcpan.best_epitope_by_rank if netmhcpan else None,
                wt_peptide_mhci=netmhcpan.best_wt_epitope_by_rank if netmhcpan else None,
                mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None,
                wt_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity_wt if netmhc2pan else None,
            )
        )
        end = time.time()
        logger.info(
            "Self similarity annotation elapsed time {} seconds".format(
                round(end - start, 3)
            )
        )

        # number of mismatches and priority score
        if netmhcpan and netmhcpan:
            start = time.time()
            neoantigen.neofox_annotations.annotations.extend(
                self.priority_score_calculator.get_annotations(
                    netmhcpan=netmhcpan,
                    vaf_transcr=vaf_rna,
                    vaf_tum=neoantigen.dna_variant_allele_frequency,
                    expr=neoantigen.rna_expression,
                    mut_not_in_prot=sequence_not_in_uniprot,
                )
            )
            end = time.time()
            logger.info(
                "Priotity score annotation elapsed time {} seconds".format(
                    round(end - start, 3)
                )
            )

        # neoag immunogenicity model
        if netmhcpan and netmhcpan.best_epitope_by_affinity:
            start = time.time()
            peptide_variant_position = EpitopeHelper.position_of_mutation_epitope(
                wild_type=netmhcpan.best_wt_epitope_by_affinity.peptide,
                mutation=netmhcpan.best_epitope_by_affinity.peptide,
            )
            neoantigen.neofox_annotations.annotations.append(
                self.neoag_calculator.get_annotation(
                    sample_id=patient.identifier,
                    mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity,
                    wt_peptide_mhci=netmhcpan.best_wt_epitope_by_affinity,
                    peptide_variant_position=peptide_variant_position,
                    mutation=neoantigen.mutation)
            )
            end = time.time()
            logger.info(
                "Neoag annotation elapsed time {} seconds".format(round(end - start, 3))
            )

        # IEDB immunogenicity
        if self.organism == ORGANISM_HOMO_SAPIENS:
            start = time.time()
            neoantigen.neofox_annotations.annotations.extend(
                self.iedb_immunogenicity.get_annotations(
                    mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None,
                    mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None
                )
            )
            end = time.time()
            logger.info(
                "IEDB annotation elapsed time {} seconds".format(round(end - start, 3))
            )

        # dissimilarity to self-proteome
        start = time.time()
        neoantigen.neofox_annotations.annotations.extend(
            self.dissimilarity_calculator.get_annotations(
                mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None,
                mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None)
        )
        end = time.time()
        logger.info(
            "Dissimilarity annotation elapsed time {} seconds".format(
                round(end - start, 3)
            )
        )

        # vaxrank
        if netmhcpan and netmhcpan.epitope_affinities:
            start = time.time()
            vaxrankscore = vaxrank.VaxRank()
            vaxrankscore.run(
                mutation_scores=netmhcpan.epitope_affinities,
                expression_score=expression_calculator.expression,
            )
            neoantigen.neofox_annotations.annotations.extend(vaxrankscore.get_annotations())
            end = time.time()
            logger.info(
                "Vaxrank annotation elapsed time {} seconds".format(round(end - start, 3))
            )

        # hex
        # TODO: hex is failing for mouse with the current IEDB fasta with only 2 entries
        if self.organism == ORGANISM_HOMO_SAPIENS:
            start = time.time()
            neoantigen.neofox_annotations.annotations.extend(
                self.hex.get_annotation(
                    mutated_peptide_mhci=netmhcpan.best_epitope_by_affinity if netmhcpan else None,
                    mutated_peptide_mhcii=netmhc2pan.best_predicted_epitope_affinity if netmhc2pan else None)
            )
            end = time.time()
            logger.info(
                "Hex annotation elapsed time {} seconds".format(round(end - start, 3))
            )

        return neoantigen