def test_affinity_threshold(self):
     result = TcellPrediction(affinity_threshold=1)._calculate_tcell_predictor_score(
         gene="BRCA2",
         substitution="CCCCVCCCC",
         epitope="CCCCCCCCC",
         score=5
     )
     self.assertIsNone(result)
 def test_existing_gene(self):
     result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score(
         gene="BRCA2",
         substitution="CCCCVCCCC",
         epitope="CCCCCCCCC",
         score=5
     )
     self.assertAlmostEqual(0.2453409331088489, float(result))
 def test_non_existing_gene(self):
     result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score(
         gene="BLAH",
         substitution="blaaaah",
         epitope="BLAHBLAH",
         score=5
     )
     self.assertEqual(None, result)
 def test_existing_gene_with_too_long_epitope(self):
     result = TcellPrediction(affinity_threshold=10)._calculate_tcell_predictor_score(
         gene="BRCA2", substitution="C", epitope="CCCCCCCCCC", score=5
     )
     self.assertEqual(None, result)
 def setUp(self) -> None:
     self.tcell_predictor = TcellPrediction()
Пример #6
0
    def __init__(self,
                 neoantigens: List[Neoantigen],
                 patients: List[Patient],
                 num_cpus: int = 1,
                 patient_id: str = None,
                 work_folder=None,
                 output_prefix=None,
                 reference_folder: ReferenceFolder = None,
                 configuration: DependenciesConfiguration = None,
                 verbose=True,
                 configuration_file=None,
                 affinity_threshold=AFFINITY_THRESHOLD_DEFAULT):

        self.affinity_threshold = affinity_threshold

        if configuration_file:
            dotenv.load_dotenv(configuration_file, override=True)

        # initialise logs
        self.log_file_name = self._get_log_file_name(output_prefix,
                                                     work_folder)
        self._initialise_logs(self.log_file_name, verbose)

        # intialize references folder and configuration
        # NOTE: uses the reference folder and config passed as a parameter if exists, this is here to make it
        # testable with fake objects
        self.reference_folder = (reference_folder if reference_folder else
                                 ReferenceFolder(verbose=verbose))
        # NOTE: makes this call to force the loading of the available alleles here
        self.reference_folder.get_available_alleles()
        self.configuration = (configuration if configuration else
                              DependenciesConfiguration())
        self.tcell_predictor = TcellPrediction(
            affinity_threshold=self.affinity_threshold)
        self.self_similarity = SelfSimilarityCalculator()
        self.num_cpus = num_cpus

        if (neoantigens is None or len(neoantigens) == 0 or patients is None
                or len(patients) == 0):
            raise NeofoxConfigurationException(
                "Missing input data to run Neofox")

        # validates neoantigens
        self.neoantigens = neoantigens
        for n in self.neoantigens:
            if n.patient_identifier is None:
                n.patient_identifier = patient_id
            # NOTE: the position of the mutations is not expected from the user and if provide the value is ignored
            n.mutation.position = EpitopeHelper.mut_position_xmer_seq(
                mutation=n.mutation)
            ModelValidator.validate_neoantigen(n)

        # validates patients
        self.patients = {}
        for patient in patients:
            ModelValidator.validate_patient(
                patient, organism=self.reference_folder.organism)
            self.patients[patient.identifier] = patient

        self._validate_input_data()

        # retrieve from the data, if RNA-seq was available
        # add this information to patient model
        expression_per_patient = {
            self.patients[patient].identifier: []
            for patient in self.patients
        }
        for neoantigen in self.neoantigens:
            expression_per_patient[neoantigen.patient_identifier].append(
                neoantigen.rna_expression)

        for patient in self.patients:
            self.patients[patient].is_rna_available = all(
                e is not None for e in expression_per_patient[
                    self.patients[patient].identifier])

        # only performs the expression imputation for humans
        if self.reference_folder.organism == ORGANISM_HOMO_SAPIENS:
            # impute expresssion from TCGA, ONLY if isRNAavailable = False for given patient,
            # otherwise original values is reported
            # NOTE: this must happen after validation to avoid uncaptured errors due to missing patients
            # NOTE: add gene expression to neoantigen candidate model
            self.neoantigens = self._conditional_expression_imputation()

        logger.info("Data loaded")