def run_em(self): sum_probs = defaultdict(lambda: 1.0) for i in range(10): print "iteration ", i for sentence in self.sentences: if(sentence.strip() == ""): continue parsing_algo = ParsingAlgo(sentence, self.dep_multinomial_holder.mult_list, self.stop_multinomial_holder.mult_list) marginals = parsing_algo.get_marginals() sum_probs[i] += math.log(parsing_algo.total_potentials) edges = parsing_algo.hypergraph.edges self.update_counts(marginals, edges) if(sum_probs[i-1]!=1.0): assert sum_probs[i] > sum_probs[i-1], \ "The prob are %r, %r"% (sum_probs[i], sum_probs[i-1]) self.update_parameters() self.validate_multinomials(self.dep_multinomial_holder) self.validate_multinomials(self.stop_multinomial_holder) pickle_hand = PickleHandler(self.final_value_path) pickle_hand.write_to_pickle(self.dep_multinomial_holder.\ mult_list, self.stop_multinomial_holder.mult_list) pprint.pprint(sum_probs)
def initialize_dep(self): dep_mult_holder = MultinomialHolder() for cond_key, mult in self.harmonic_dep_mult.iteritems(): for prob_key in mult.prob: dep_mult_holder.\ inc_counts(prob_key, cond_key, random.random()) dep_mult_holder.estimate() return dep_mult_holder def initialize_stop_mult_cont(self): stop_cont_mult_holder = MultinomialHolder() for cond_key, mult in self.harmonic_stop_cont_mult.iteritems(): random_value = random.random() stop_cont_mult_holder.\ inc_counts(0, cond_key,random_value) stop_cont_mult_holder.\ inc_counts(1, cond_key,1 - random_value) stop_cont_mult_holder.estimate() return stop_cont_mult_holder if __name__ == "__main__": pickle_handler = PickleHandler("data/dummy") dep_mult, stop_cont_mult = pickle_handler.init_all_dicts() random_init = RandomInitializer(dep_mult, stop_cont_mult) random_init.initialize_multinomials() pickle_handler = PickleHandler("data/random_init") pickle_handler.write_to_pickle(random_init.dep_mult_holder.\ mult_list, random_init.stop_cont_mult_holder.mult_list)
self.root_val_file_name = root_val_file_name self.dep_creator = DepCreator() self.stop_cont_creator = ContStopCreator() np.seterr(divide='ignore', invalid='ignore') def sentences(self): sentences = [] with open(self.harmonic_file_name,"r") as fp: sentences += fp.readlines() with open(self.root_val_file_name,"r") as fp: sentences += fp.readlines() return sentences def initialize_harmonic_values(self): sentences = self.sentences() for sent in sentences: if "attach" in sent: self.dep_creator.add_entry(sent) if "continue" in sent: self.stop_cont_creator.add_entry(sent) if "stop" in sent: self.stop_cont_creator.add_entry(sent) if "root" in sent: self.dep_creator.add_entry(sent) if __name__ == "__main__": initializer = HarmonicInitializer("data/harmonic", "data/root_val_file.txt") initializer.initialize_harmonic_values() pickle_handler = PickleHandler("data/harmonic_values_numpy") pickle_handler.write_to_pickle(initializer.dep_creator.prob_attach, initializer.stop_cont_creator.prob_cont, "data/harmonic_values_numpy")
sentences += fp.readlines() with open(self.root_val_file_name,"r") as fp: sentences += fp.readlines() return sentences def create_dict(self): sentences = self.sentences() for sent in sentences: if "attach" in sent: self.dep_creator.add_entry(sent) if "continue" in sent: self.stop_cont_creator.add_entry(sent) if "stop" in sent: self.stop_cont_creator.add_entry(sent) if "root" in sent: self.dep_creator.add_entry(sent) self.dep_creator.mult_holder.estimate() self.stop_cont_creator.mult_holder.estimate() if __name__ == "__main__": initializer = InitDict("data/harmonic", "data/root_val_file.txt") initializer.create_dict() pickle_handler = PickleHandler("data/harmonic_values_mult") dep_mult_list = initializer.dep_creator.mult_holder.mult_list stop_cont_mult_list = initializer.stop_cont_creator.\ mult_holder.mult_list pickle_handler.write_to_pickle(dep_mult_list, stop_cont_mult_list)