def e_step(self, observation): '''Perform the E-step on a single obervation. That is, compute the posterior of mixture components and add the expected occurrence of each component to the running totals self.log_likelihood , self.expected_component_counts , and self.expected_observation_counts :param observation: The observation ''' component_probabilities = list() for component in range(self.num_components): # prob = weigth * geo(x) component_probability = self.mixture_weights[ component] + self.component_distributions[component].log_prob( observation) component_probabilities.append(component_probability) # marginal_probabilities: observation_probability = log_add_list(component_probabilities) self.log_likelihood += observation_probability for component in range(self.num_components): # component/total norm_comp_prob = component_probabilities[ component] - observation_probability self.expected_component_counts[component] = log_add( self.expected_component_counts[component], norm_comp_prob) # we take care if observation is 0, we have to define: prob * value = -inf if observation == 0: self.expected_observation_counts[component] = log_add( self.expected_observation_counts[component], -float("inf")) else: self.expected_observation_counts[component] = log_add( self.expected_observation_counts[component], norm_comp_prob + log(observation))
def m_step(self): '''Perform the M-step. This step updates self.mixture_weights self.component_distributions ''' # test if the sum of the summed expected_component_counts is roughly equal to the total amount of observations sum_obs_counts = log_add_list(self.expected_component_counts.values()) print("Sum of expected component counts: {}".format(exp(sum_obs_counts))) param_list = list() weight_list = list() for component in range(self.num_components): # calculate new weight (= expected component counts / total number of datapoints) new_weight = exp( self.expected_component_counts[component] - sum_obs_counts ) weight_list.append(new_weight) # for each component, calculate denominator of geometric MLE-function (= expected component counts * expected observation counts ) # see (https://www.projectrhea.org/rhea/index.php/MLE_Examples:_Exponential_and_Geometric_Distributions_Old_Kiwi) nSigmaX = log_add(self.expected_component_counts[component], self.expected_observation_counts[component]) # calculate new parameter value new_param = exp(self.expected_component_counts[component] - nSigmaX) param_list.append(new_param) for component in range(self.num_components): # update weights self.mixture_weights[component] = weight_list[component] # update parameters self.component_distributions[component] = GeometricDistribution(param_list[component]) # reset counters self.expected_component_counts[component] = -float("inf") self.expected_observation_counts[component] = -float("inf")
def m_step(self): '''Perform the M-step. This step updates self.mixture_weights self.component_distributions ''' # test if the sum of the summed expected_component_counts is roughly equal to the total amount of observations sum_obs_counts = log_add_list(self.expected_component_counts.values()) print("Sum of expected component counts: {}".format( exp(sum_obs_counts))) weight_list = list() param_list = list() for component in range(self.num_components): new_weight = exp(self.expected_component_counts[component] - sum_obs_counts) weight_list.append(new_weight) noemer = log_add(self.expected_component_counts[component], self.expected_observation_counts[component]) new_param = exp(self.expected_component_counts[component] - noemer) param_list.append(new_param) for component in range(self.num_components): self.mixture_weights[component] = weight_list[ component] #add float self.component_distributions[component] = GeometricDistribution( param_list[component]) self.expected_component_counts[component] = -float("inf") self.expected_observation_counts[component] = -float("inf")
def e_step(self, observation): '''Perform the E-step on a single obervation. That is, compute the posterior of mixture components and add the expected occurrence of each component to the running totals self.log_likelihood , self.expected_component_counts , and self.expected_observation_counts :param observation: The observation (x_i) ''' likelihood_list = list() for component in range(self.num_components): # Prior (= weights) log_prior = self.mixture_weights[component] # Calculate log-likelihood of one datapoint for each component log_likelihood = log_prior + self.component_distributions[component].log_prob(observation) # store result in list likelihood_list.append(log_likelihood) # log sum of likelihood over all components (for this datapoint) marginal_likelihood = log_add_list(likelihood_list) # Calculate total likelihood of data for this iteration ( = sum of marginal log likelihoods over all datapoints) self.log_likelihood += marginal_likelihood for component in range(self.num_components): # Calculate posterior for current datapoint posterior = likelihood_list[component] - marginal_likelihood # Calculate weight of each component ( = cumulative log-addition of posteriors for all datapoints) self.expected_component_counts[component] = log_add(self.expected_component_counts[component], posterior) # Update expected observation counts # Skip if value == 0 (i.e., zero-values won't add anything and will crash log-function) if observation == 0: continue # calculate expected observation count of current datapoint for each component value_count_log = log(observation) + posterior # add to total observation count of each component self.expected_observation_counts[component] = log_add(self.expected_observation_counts[component], value_count_log)
def e_step(self, observation): '''Perform the E-step on a single observation. That is, compute the posterior of mixture components and add the expected occurrence of each component to the running totals self.log_likelihood , self.expected_component_counts , and self.expected_observation_counts :param observation: The observation ''' #TODO: Implement this. Make sure to update the log-likelihood during the E-step. naive_posterior_list = list() loglikelihood_list = list() for component in range(self.num_components): # prior component weight log_prior = self.mixture_weights[component] # calculate likelihood of the observation given that component prob_geo_obs = self.component_distributions[component].log_prob( observation) likelihood_obs = prob_geo_obs + log_prior loglikelihood_list.append(likelihood_obs) # sum of the posteriors of that observation for the three components marginal_posterior = log_add_list( loglikelihood_list) # sum of logprobs becomes log_add # likelihood_obs_total = sum(loglikelihood_list) self.log_likelihood += marginal_posterior for component in range(self.num_components): posterior = loglikelihood_list[component] - marginal_posterior self.expected_component_counts[component] = log_add( self.expected_component_counts[component], posterior) observation_count = observation * exp(posterior) if observation_count == 0.0: continue else: log_observation_count = log(observation_count) self.expected_observation_counts[component] = log_add( self.expected_observation_counts[component], log_observation_count)
def m_step(self): '''Perform the M-step. This step updates self.mixture_weights self.component_distributions ''' # test if the sum of the summed expected_component_counts is roughly equal to the total amount of observations sum_obs_counts = log_add_list(self.expected_component_counts.values()) print("Sum of expected component counts: {}".format( exp(sum_obs_counts))) # updating mixture weights and mixture components for i in range(self.num_components): self.mixture_weights[ i] = self.expected_component_counts[i] - sum_obs_counts self.component_distributions[i] = GeometricDistribution( exp(self.expected_component_counts[i] - (log_add(self.expected_component_counts[i], self.expected_observation_counts[i])))) self.expected_component_counts[i] = -float('inf') self.expected_observation_counts[i] = -float('inf')
def e_step(self, observation): '''Perform the E-step on a single obervation. That is, compute the posterior of mixture components and add the expected occurrence of each component to the running totals self.log_likelihood , self.expected_component_counts , and self.expected_observation_counts :param observation: The observation ''' # computing the new log_likelihood log_like = [] for i in range(self.num_components): # we can take the logprob of observation from the GeometricDistribution object. mixture weights are logs already. log_like.append( self.mixture_weights[i] + self.component_distributions[i].log_prob(observation)) log_like_nr = log_add_list(log_like) self.log_likelihood += log_like_nr # compute expected component counts and expected observation counts, account for special case where observation = 0 for i in range(self.num_components): # normalising the component probability normalised = log_like[i] - log_like_nr self.expected_component_counts[i] = log_add( self.expected_component_counts[i], normalised) # if the observation is 0, we have to account for that (otherwise we take log(0)) if observation != 0: self.expected_observation_counts[i] = log_add( self.expected_observation_counts[i], normalised + log(observation)) else: self.expected_observation_counts[i] = log_add( self.expected_observation_counts[i], -float('inf'))