Пример #1
0
    def sample_posterior(self):
        """ sample the posterior distribution of the mortality probability """

        # find values of mortality probability at which the posterior should be evaluated
        self._mortalitySamples = np.random.uniform(low=CalibSets.POST_L,
                                                   high=CalibSets.POST_U,
                                                   size=CalibSets.POST_N)

        # create a multi cohort
        multiCohort = SurvivalCls.MultiCohort(
            ids=self._cohortIDs,
            mortality_probs=self._mortalitySamples,
            pop_sizes=[CalibSets.SIM_POP_SIZE] * CalibSets.POST_N)

        # simulate the multi cohort
        multiCohort.simulate(CalibSets.TIME_STEPS)

        # calculate the likelihood of each simulated cohort
        for cohort_id in self._cohortIDs:

            # get the average survival time for this cohort
            mean = multiCohort.get_cohort_mean_survival(cohort_id)
            #survivaltimes = multiCohort.get_survival_times(cohort_id)
            fiveyearlist = multiCohort.get_five_year_survival(cohort_id)
            fiveyearrate = sum(fiveyearlist) / CalibSets.OBS_N

            # construct a gaussian likelihood
            # with mean calculated from the simulated data and standard deviation from the clinical study.
            # evaluate this pdf (probability density function) at the mean reported in the clinical study.
            weight = stat.binom.pmf(k=CalibSets.OBS_FIVE_YEAR_RATE,
                                    n=CalibSets.OBS_N,
                                    p=fiveyearrate)

            # store the weight
            self._weights.append(weight)

        # normalize the likelihood weights
        sum_weights = np.sum(self._weights)
        self._normalizedWeights = np.divide(self._weights, sum_weights)

        # re-sample mortality probability (with replacement) according to likelihood weights
        self._mortalityResamples = np.random.choice(
            a=self._mortalitySamples,
            size=CalibSets.NUM_SIM_COHORTS,
            replace=True,
            p=self._normalizedWeights)

        # produce the list to report the results
        for i in range(0, len(self._mortalitySamples)):
            self._csvRows.append([
                self._cohortIDs[i], self._normalizedWeights[i],
                self._mortalitySamples[i]
            ])

        # write the calibration result into a csv file
        InOutSupport.write_csv('CalibrationResults.csv', self._csvRows)
Пример #2
0
    def simulate(self,
                 num_of_simulated_cohorts,
                 cohort_size,
                 time_steps,
                 cohort_ids=None):
        """ simulate the specified number of cohorts based on their associated likelihood weight
        :param num_of_simulated_cohorts: number of cohorts to simulate
        :param cohort_size: the population size of cohorts
        :param time_steps: simulation length
        :param cohort_ids: ids of cohort to simulate
        """
        # resample cohort IDs and mortality probabilities based on their likelihood weights
        # sample (with replacement) from indices [0, 1, 2, ..., number of weights] based on the likelihood weights
        sampled_row_indices = np.random.choice(a=range(0, len(self._weights)),
                                               size=num_of_simulated_cohorts,
                                               replace=True,
                                               p=self._weights)

        # use the sampled indices to populate the list of cohort IDs and mortality probabilities
        resampled_ids = []
        resampled_probs = []
        for i in sampled_row_indices:
            resampled_ids.append(self._cohortIDs[i])
            resampled_probs.append(self._mortalityProbs[i])

        # simulate the desired number of cohorts
        if cohort_ids is None:
            # if cohort ids are not provided, use the ids stored in the calibration results
            self._multiCohorts = SurvivalCls.MultiCohort(
                ids=resampled_ids,
                pop_sizes=[cohort_size] * num_of_simulated_cohorts,
                mortality_probs=resampled_probs)
        else:
            # if cohort ids are provided, use them instead of the ids stored in the calibration results
            self._multiCohorts = SurvivalCls.MultiCohort(
                ids=cohort_ids,
                pop_sizes=[cohort_size] * num_of_simulated_cohorts,
                mortality_probs=resampled_probs)

        # simulate all cohorts
        self._multiCohorts.simulate(time_steps)