def _help_do_mstep(self, stats): #add pseudo counts for nan entries help_denum = _add_pseudo_counts( stats['post'] ) self.p[0] = stats['post_emission'][0] / (self.n[0] * help_denum) self.p[1] = stats['post_emission'][1] / (self.n[1] * help_denum) self.p[0] = _add_pseudo_counts(self.p[0]) self.p[1] = _add_pseudo_counts(self.p[1]) self.merge_distr()
def _help_do_mstep(self, stats): #add pseudo counts for nan entries help_denum = _add_pseudo_counts(stats['post']) self.p[0] = stats['post_emission'][0] / (self.n[0] * help_denum) self.p[1] = stats['post_emission'][1] / (self.n[1] * help_denum) self.p[0] = _add_pseudo_counts(self.p[0]) self.p[1] = _add_pseudo_counts(self.p[1]) self.merge_distr()
def _help_do_mstep(self, stats): for dim in range(self.n_features): for comp in range(self.distr_magnitude): for state in range(self.n_components): self.c[dim][comp][state] = stats['post_sum_l'][dim][comp][ state] / _add_pseudo_counts(stats['post'][state]) if comp == 0: self.p[dim][comp][state] = stats['post_sum_l_emisson'][ dim][comp][state] / (_add_pseudo_counts( stats['post_sum_l_factor'][dim][comp][state])) self.p[dim][comp][state] = _add_pseudo_counts( self.p[dim][comp][state]) else: self.p[dim][comp][ state] = self.factors[comp] * self.p[dim][0][state] self.merge_distr(stats['weights'])
def _help_do_mstep(self, stats): #add pseudo counts for nan entries for dim in range(self.n_features): for state in range(self.n_components): for comp in range(self.distr_magnitude): self.p[dim][state][comp] = stats['post_sum_l_emisson'][dim][state][comp] / (self.n[dim] * _add_pseudo_counts(stats['post_sum_l'][dim][state][comp])) self.p[dim][state][comp] = _add_pseudo_counts(self.p[dim][state][comp]) self.c[dim][state][comp] = stats['post_sum_l'][dim][state][comp] / stats['post'][state]
def _help_accumulate_sufficient_statistics(self, obs, stats, posteriors): posteriors = _valid_posteriors(posteriors, obs) i = 0 print("run...! start at " + str(time()), file=sys.stderr) for t, symbol in enumerate(obs): stats['post'] += posteriors[t] for dim in range(self.n_features): for comp in range(self.distr_magnitude): #lookup index = (symbol[dim], tuple([ self.p[dim][comp][state] for state in range(self.n_components) ])) if index not in lookup_poisson_state: tmp = np.array([ self._get_poisson(symbol[dim], self.p[dim][comp][state]) for state in range(self.n_components) ]) lookup_poisson_state[index] = tmp h = lookup_poisson_state[index] enum = self.c[dim][comp] * h denum = np.array([ self._get_value(state, symbol, dim) for state in range(self.n_components) ]) i += 1 try: help = posteriors[t] * enum / _add_pseudo_counts(denum) except: print("%s \n" % i, file=sys.stderr) print("%s %s %s \n" % (denum, symbol, dim), file=sys.stderr) print("%s \n" % (self.c), file=sys.stderr) print("%s \n" % (self.p), file=sys.stderr) print("%s \n" % (posteriors[t]), file=sys.stderr) print("%s \n" % (enum), file=sys.stderr) help = np.array([ 1.0 / self.distr_magnitude, 1.0 / self.distr_magnitude, 1.0 / self.distr_magnitude ]) stats['post_sum_l'][dim][comp] += help stats['post_sum_l_emisson'][dim][ comp] += help * symbol[dim] stats['post_sum_l_factor'][dim][ comp] += help * self.factors[comp] if posteriors[t][1] > 0.5 or posteriors[t][2] > 0.5: if posteriors[t][1] >= posteriors[t][2]: stats['weights'][dim][state][0] += 1 if posteriors[t][2] > posteriors[t][1]: stats['weights'][dim][state][1] += 1 #print(self.p) stats['posterior'] = np.copy(posteriors)
def _help_do_mstep(self, stats): for dim in range(self.n_features): for comp in range(self.distr_magnitude): for state in range(self.n_components): self.c[dim][comp][state] = stats['post_sum_l'][dim][comp][state] / _add_pseudo_counts(stats['post'][state]) # if comp == 0: self.p[dim][comp][state] = stats['post_sum_l_emisson'][dim][comp][state] / (_add_pseudo_counts(stats['post_sum_l_factor'][dim][comp][state])) self.p[dim][comp][state] = _add_pseudo_counts(self.p[dim][comp][state]) # else: # self.p[dim][comp][state] = self.factors[comp] * self.p[dim][0][state] self.merge_distr(stats['weights'])
def _help_accumulate_sufficient_statistics(self, obs, stats, posteriors): posteriors = _valid_posteriors(posteriors, obs) i = 0 print("run...! start at " + str(time()), file=sys.stderr) for t, symbol in enumerate(obs): stats['post'] += posteriors[t] for dim in range(self.n_features): for comp in range(self.distr_magnitude): #lookup index = (symbol[dim], tuple([self.p[dim][comp][state] for state in range(self.n_components)])) if index not in lookup_poisson_state: tmp = np.array([self._get_poisson(symbol[dim], self.p[dim][comp][state]) for state in range(self.n_components)]) lookup_poisson_state[index] = tmp h = lookup_poisson_state[index] enum = self.c[dim][comp] * h denum = np.array([self._get_value(state, symbol, dim) for state in range(self.n_components)]) i += 1 try: help = posteriors[t] * enum / _add_pseudo_counts(denum) except: print("%s \n" %i, file=sys.stderr) print("%s %s %s \n" %(denum, symbol, dim), file=sys.stderr) print("%s \n" %(self.c), file=sys.stderr) print("%s \n" %(self.p), file=sys.stderr) print("%s \n" %(posteriors[t]), file=sys.stderr) print("%s \n" %(enum), file=sys.stderr) help = np.array([1.0/self.distr_magnitude, 1.0/self.distr_magnitude, 1.0/self.distr_magnitude]) stats['post_sum_l'][dim][comp] += help stats['post_sum_l_emisson'][dim][comp] += help * symbol[dim] stats['post_sum_l_factor'][dim][comp] += help * self.factors[comp] if posteriors[t][1] > 0.5 or posteriors[t][2] > 0.5: if posteriors[t][1] >= posteriors[t][2]: stats['weights'][dim][state][0] += 1 if posteriors[t][2] > posteriors[t][1]: stats['weights'][dim][state][1] += 1 #print(self.p) stats['posterior'] = np.copy(posteriors)