Python Distribution.get_probabilities示例

class TestStatisticDistribution:
    def __init__(self, samples_x_y: SamplesXY,
                 shared_combined_sample: CombinedSample):
        self.shared_combined_sample = shared_combined_sample
        self.shared_combined_sample_size = self.shared_combined_sample.combined_sample_size
        self.sample_size_y = samples_x_y.sample_size_y
        self.sample_size_x = self.shared_combined_sample_size - self.sample_size_y
        self.sample_size_x_y_gcd = gcd(self.sample_size_x, self.sample_size_y)
        self.test_statistic_factor = self.sample_size_x * self.sample_size_y / self.sample_size_x_y_gcd
        self.test_statistic_factor_approximate = self.sample_size_x_y_gcd / \
                                 sqrt(self.sample_size_x * self.sample_size_y * self.shared_combined_sample_size)
        self.algorithm = CountAlgorithm()
        self.combined_sample_counts = []
        self.x_meshing_cdf = []
        self.y_meshing_cdf = []
        self.max_cumulative_difference = 0
        self.meshing_test_statistic = 0
        self.y_item_positions = []
        self.number_of_y_item_positions = 0
        self.combined_sample_attributions_cache = []

        self.distribution = Distribution()

    def make_y_item_indices(self):
        print("Calculating meshings... ", sep=" ", end=" ")
        item_indices = [x for x in range(0, self.shared_combined_sample_size)]
        self.y_item_positions = combinations(item_indices, self.sample_size_y)
        self.number_of_y_item_positions = sum(1 for _ in self.y_item_positions)
        # Restore the iterator
        self.y_item_positions = combinations(item_indices, self.sample_size_y)

    def get_y_item_positions(self):
        return self.y_item_positions

    def show_progress(self, step):
        if step % 1000 == 0:
            print("\rCalculating meshings... {}/{}".format(
                step, self.number_of_y_item_positions),
                  sep=" ",
                  end=" ")

    @staticmethod
    def finish_progress():
        print("\rCalculating meshings... Done")

    def update_combined_sample_attributions(self, y_item_indices):
        next_attributions = [
            Source.FORM_SAMPLE_X
            for _ in range(0, self.shared_combined_sample_size)
        ]
        for index in y_item_indices:
            next_attributions[index] = Source.FORM_SAMPLE_Y
        self.shared_combined_sample.set_attributions(next_attributions)

    def make_cdf_from_counts_x(self, count):
        return Fraction(count, self.sample_size_x)

    def make_cdf_from_counts_y(self, count):
        return Fraction(count, self.sample_size_y)

    def save_counts_as_x_meshing_cdf(self):
        self.x_meshing_cdf = map(self.make_cdf_from_counts_x,
                                 self.combined_sample_counts)

    def save_counts_as_y_meshing_cdf(self):
        self.y_meshing_cdf = map(self.make_cdf_from_counts_y,
                                 self.combined_sample_counts)

    def find_max_cumulative_difference(self):
        cumulative_difference = []
        for index, cumulative_probability_x_y in enumerate(
                zip(self.x_meshing_cdf, self.y_meshing_cdf)):
            cumulative_difference.append(
                abs(cumulative_probability_x_y[0] -
                    cumulative_probability_x_y[1]))
        self.max_cumulative_difference = max(cumulative_difference)

    def calculate_test_statistic(self):
        self.meshing_test_statistic = int(self.test_statistic_factor *
                                          self.max_cumulative_difference)

    def approximate_test_statistic(self):
        self.meshing_test_statistic = self.test_statistic_factor_approximate * self.meshing_test_statistic

    def append_test_statistic_to_distribution(self):
        self.distribution.add_test_statistic(self.meshing_test_statistic)

    def calculate_meshing_test_statistic(self):
        self.count_x_in_combined_sample()
        self.save_counts_as_x_meshing_cdf()
        self.count_y_in_combined_sample()
        self.save_counts_as_y_meshing_cdf()
        self.find_max_cumulative_difference()
        self.calculate_test_statistic()

    def append_meshing_to_distribution(self):
        self.calculate_meshing_test_statistic()
        self.append_test_statistic_to_distribution()

    def count_x_in_combined_sample(self):
        self.algorithm.configure_count_x()
        self.run_algorithm()

    def count_y_in_combined_sample(self):
        self.algorithm.configure_count_y()
        self.run_algorithm()

    def run_algorithm(self):
        self.shared_combined_sample.run(self.algorithm)
        self.combined_sample_counts = self.algorithm.get_combined_sample_counts(
        )

    def make_probabilities(self):
        self.distribution.calculate_probabilities()

    def make_test_statistic_distribution(self):
        self.make_y_item_indices()
        for index, y_item_indices in enumerate(self.get_y_item_positions()):
            self.show_progress(index)
            self.update_combined_sample_attributions(y_item_indices)
            self.append_meshing_to_distribution()
        self.finish_progress()
        self.make_probabilities()

    def make_test_statistic(self):
        self.calculate_meshing_test_statistic()

    def make_test_statistic_approximate(self):
        self.calculate_meshing_test_statistic()
        self.approximate_test_statistic()

    def cache_attributions(self):
        self.combined_sample_attributions_cache = self.shared_combined_sample.get_attributions_copy(
        )

    def restore_attributions(self):
        self.shared_combined_sample.set_attributions(
            self.combined_sample_attributions_cache)

    def get_test_statistic(self):
        return self.meshing_test_statistic

    def get_probabilities(self):
        return self.distribution.get_probabilities()

    def get_calculate_p_values_for(self, observed_test_statistic):
        p_value = 0
        for test_statistic, probability in self.get_probabilities().items():
            if test_statistic >= observed_test_statistic:
                p_value += probability
        return p_value

    def perform_test(self):
        # Get the value of the test statistic for the combined samples with the original attributions
        self.make_test_statistic()
        observed_value_of_test_statisitic = self.get_test_statistic()
        self.cache_attributions()

        # Make test statistic distribution
        self.make_test_statistic_distribution()
        p_value = self.get_calculate_p_values_for(
            observed_value_of_test_statisitic)
        self.restore_attributions()

        print("Observed value of test statistic is {}".format(
            observed_value_of_test_statisitic))
        print("Exact p-value is {}\n".format(p_value))

    def perform_test_approximate(self):
        self.make_test_statistic_approximate()
        observed_value_of_test_statisitic = self.get_test_statistic()

        print("Observed approximate value of test statistic is {}".format(
            observed_value_of_test_statisitic))
        print("Critical value for 0.05-rejection region is 1.358\n")