示例#1
0
    def e_step(self, observation):
        '''Perform the E-step on a single obervation. That is, compute the posterior of mixture components
        and add the expected occurrence of each component to the running totals
        self.log_likelihood ,
        self.expected_component_counts , and
        self.expected_observation_counts

        :param observation: The observation
        '''
        component_probabilities = list()
        for component in range(self.num_components):
            # prob = weigth * geo(x)
            component_probability = self.mixture_weights[
                component] + self.component_distributions[component].log_prob(
                    observation)
            component_probabilities.append(component_probability)
        # marginal_probabilities:
        observation_probability = log_add_list(component_probabilities)
        self.log_likelihood += observation_probability

        for component in range(self.num_components):
            # component/total
            norm_comp_prob = component_probabilities[
                component] - observation_probability
            self.expected_component_counts[component] = log_add(
                self.expected_component_counts[component], norm_comp_prob)
            # we take care if observation is 0, we have to define: prob * value = -inf
            if observation == 0:
                self.expected_observation_counts[component] = log_add(
                    self.expected_observation_counts[component], -float("inf"))
            else:
                self.expected_observation_counts[component] = log_add(
                    self.expected_observation_counts[component],
                    norm_comp_prob + log(observation))
示例#2
0
    def m_step(self):
        '''Perform the M-step. This step updates
        self.mixture_weights
        self.component_distributions
        '''

        # test if the sum of the summed expected_component_counts is roughly equal to the total amount of observations
        sum_obs_counts = log_add_list(self.expected_component_counts.values())
        print("Sum of expected component counts: {}".format(exp(sum_obs_counts)))

        param_list = list()
        weight_list = list()
        for component in range(self.num_components):
            # calculate new weight (= expected component counts / total number of datapoints)
            new_weight = exp( self.expected_component_counts[component] - sum_obs_counts )
            weight_list.append(new_weight)

            # for each component, calculate denominator of geometric MLE-function (= expected component counts * expected observation counts )
            # see (https://www.projectrhea.org/rhea/index.php/MLE_Examples:_Exponential_and_Geometric_Distributions_Old_Kiwi)
            nSigmaX = log_add(self.expected_component_counts[component], self.expected_observation_counts[component])

            # calculate new parameter value
            new_param = exp(self.expected_component_counts[component] - nSigmaX)
            param_list.append(new_param)

        for component in range(self.num_components):
            # update weights
            self.mixture_weights[component] = weight_list[component]

            # update parameters
            self.component_distributions[component] = GeometricDistribution(param_list[component])

            # reset counters
            self.expected_component_counts[component] = -float("inf")
            self.expected_observation_counts[component] = -float("inf")
    def m_step(self):
        '''Perform the M-step. This step updates
        self.mixture_weights
        self.component_distributions
        '''

        # test if the sum of the summed expected_component_counts is roughly equal to the total amount of observations
        sum_obs_counts = log_add_list(self.expected_component_counts.values())
        print("Sum of expected component counts: {}".format(
            exp(sum_obs_counts)))

        weight_list = list()
        param_list = list()

        for component in range(self.num_components):
            new_weight = exp(self.expected_component_counts[component] -
                             sum_obs_counts)
            weight_list.append(new_weight)

            noemer = log_add(self.expected_component_counts[component],
                             self.expected_observation_counts[component])
            new_param = exp(self.expected_component_counts[component] - noemer)
            param_list.append(new_param)

        for component in range(self.num_components):
            self.mixture_weights[component] = weight_list[
                component]  #add float
            self.component_distributions[component] = GeometricDistribution(
                param_list[component])

            self.expected_component_counts[component] = -float("inf")
            self.expected_observation_counts[component] = -float("inf")
示例#4
0
    def e_step(self, observation):
        '''Perform the E-step on a single obervation. That is, compute the posterior of mixture components
        and add the expected occurrence of each component to the running totals
        self.log_likelihood ,
        self.expected_component_counts , and
        self.expected_observation_counts

        :param observation: The observation (x_i)
        '''

        likelihood_list = list()

        for component in range(self.num_components):
            # Prior (= weights)
            log_prior = self.mixture_weights[component]

            # Calculate log-likelihood of one datapoint for each component
            log_likelihood = log_prior + self.component_distributions[component].log_prob(observation)

            # store result in list
            likelihood_list.append(log_likelihood)

        # log sum of likelihood over all components (for this datapoint)
        marginal_likelihood = log_add_list(likelihood_list)

        # Calculate total likelihood of data for this iteration ( = sum of marginal log likelihoods over all datapoints)
        self.log_likelihood += marginal_likelihood

        for component in range(self.num_components):
        # Calculate posterior for current datapoint
            posterior =  likelihood_list[component] - marginal_likelihood

        # Calculate weight of each component ( = cumulative log-addition of posteriors for all datapoints)
            self.expected_component_counts[component] = log_add(self.expected_component_counts[component], posterior)

        # Update expected observation counts
            # Skip if value == 0 (i.e., zero-values won't add anything and will crash log-function)
            if observation == 0:
                continue

            # calculate expected observation count of current datapoint for each component
            value_count_log = log(observation) + posterior

            # add to total observation count of each component
            self.expected_observation_counts[component] = log_add(self.expected_observation_counts[component], value_count_log)
    def e_step(self, observation):
        '''Perform the E-step on a single observation. That is, compute the posterior of mixture components
        and add the expected occurrence of each component to the running totals
        self.log_likelihood ,
        self.expected_component_counts , and
        self.expected_observation_counts

        :param observation: The observation
        '''
        #TODO: Implement this. Make sure to update the log-likelihood during the E-step.
        naive_posterior_list = list()
        loglikelihood_list = list()

        for component in range(self.num_components):
            # prior component weight
            log_prior = self.mixture_weights[component]

            # calculate likelihood of the observation given that component
            prob_geo_obs = self.component_distributions[component].log_prob(
                observation)
            likelihood_obs = prob_geo_obs + log_prior
            loglikelihood_list.append(likelihood_obs)

        # sum of the posteriors of that observation for the three components
        marginal_posterior = log_add_list(
            loglikelihood_list)  # sum of logprobs becomes log_add

        # likelihood_obs_total = sum(loglikelihood_list)
        self.log_likelihood += marginal_posterior

        for component in range(self.num_components):
            posterior = loglikelihood_list[component] - marginal_posterior
            self.expected_component_counts[component] = log_add(
                self.expected_component_counts[component], posterior)

            observation_count = observation * exp(posterior)
            if observation_count == 0.0:
                continue
            else:
                log_observation_count = log(observation_count)
                self.expected_observation_counts[component] = log_add(
                    self.expected_observation_counts[component],
                    log_observation_count)
示例#6
0
    def m_step(self):
        '''Perform the M-step. This step updates
        self.mixture_weights
        self.component_distributions
        '''

        # test if the sum of the summed expected_component_counts is roughly equal to the total amount of observations
        sum_obs_counts = log_add_list(self.expected_component_counts.values())
        print("Sum of expected component counts: {}".format(
            exp(sum_obs_counts)))

        # updating mixture weights and mixture components
        for i in range(self.num_components):
            self.mixture_weights[
                i] = self.expected_component_counts[i] - sum_obs_counts
            self.component_distributions[i] = GeometricDistribution(
                exp(self.expected_component_counts[i] -
                    (log_add(self.expected_component_counts[i],
                             self.expected_observation_counts[i]))))

            self.expected_component_counts[i] = -float('inf')
            self.expected_observation_counts[i] = -float('inf')
示例#7
0
    def e_step(self, observation):
        '''Perform the E-step on a single obervation. That is, compute the posterior of mixture components
        and add the expected occurrence of each component to the running totals
        self.log_likelihood ,
        self.expected_component_counts , and
        self.expected_observation_counts

        :param observation: The observation
        '''

        # computing the new log_likelihood
        log_like = []
        for i in range(self.num_components):
            # we can take the logprob of observation from the GeometricDistribution object. mixture weights are logs already.
            log_like.append(
                self.mixture_weights[i] +
                self.component_distributions[i].log_prob(observation))

        log_like_nr = log_add_list(log_like)
        self.log_likelihood += log_like_nr

        # compute expected component counts and expected observation counts, account for special case where observation = 0
        for i in range(self.num_components):
            # normalising the component probability
            normalised = log_like[i] - log_like_nr
            self.expected_component_counts[i] = log_add(
                self.expected_component_counts[i], normalised)

            # if the observation is 0, we have to account for that (otherwise we take log(0))
            if observation != 0:
                self.expected_observation_counts[i] = log_add(
                    self.expected_observation_counts[i],
                    normalised + log(observation))
            else:
                self.expected_observation_counts[i] = log_add(
                    self.expected_observation_counts[i], -float('inf'))