Exemplo n.º 1
0
    def run_em(self):
        sum_probs = defaultdict(lambda: 1.0)
        for i in range(10):
            print "iteration ", i
            for sentence in self.sentences:
                if(sentence.strip() == ""):
                    continue
                parsing_algo = ParsingAlgo(sentence,
			 self.dep_multinomial_holder.mult_list,
                               self.stop_multinomial_holder.mult_list)
                marginals = parsing_algo.get_marginals()
		sum_probs[i] += math.log(parsing_algo.total_potentials)
                edges = parsing_algo.hypergraph.edges
                self.update_counts(marginals, edges)

            if(sum_probs[i-1]!=1.0):
                assert sum_probs[i] > sum_probs[i-1], \
                 "The prob are %r, %r"% (sum_probs[i],  sum_probs[i-1])

            self.update_parameters()
            self.validate_multinomials(self.dep_multinomial_holder)
            self.validate_multinomials(self.stop_multinomial_holder)

	pickle_hand = PickleHandler(self.final_value_path)
	pickle_hand.write_to_pickle(self.dep_multinomial_holder.\
           mult_list, self.stop_multinomial_holder.mult_list)
	pprint.pprint(sum_probs)
Exemplo n.º 2
0
    def initialize_dep(self):
        dep_mult_holder = MultinomialHolder()
        for cond_key, mult in self.harmonic_dep_mult.iteritems():
            for prob_key in mult.prob:
                dep_mult_holder.\
                    inc_counts(prob_key, cond_key, random.random())

        dep_mult_holder.estimate()
        return dep_mult_holder

    def initialize_stop_mult_cont(self):
        stop_cont_mult_holder = MultinomialHolder()
        for cond_key, mult in self.harmonic_stop_cont_mult.iteritems():
            random_value = random.random()
            stop_cont_mult_holder.\
                    inc_counts(0, cond_key,random_value)
            stop_cont_mult_holder.\
                    inc_counts(1, cond_key,1 - random_value)

        stop_cont_mult_holder.estimate()
        return stop_cont_mult_holder

if __name__ == "__main__":
    pickle_handler = PickleHandler("data/dummy")
    dep_mult, stop_cont_mult = pickle_handler.init_all_dicts()
    random_init = RandomInitializer(dep_mult, stop_cont_mult)
    random_init.initialize_multinomials()
    pickle_handler = PickleHandler("data/random_init")
    pickle_handler.write_to_pickle(random_init.dep_mult_holder.\
          mult_list, random_init.stop_cont_mult_holder.mult_list)
        self.root_val_file_name = root_val_file_name
        self.dep_creator = DepCreator()
        self.stop_cont_creator = ContStopCreator()
        np.seterr(divide='ignore', invalid='ignore')

    def sentences(self):
        sentences = []
        with open(self.harmonic_file_name,"r") as fp:
            sentences += fp.readlines()
        with open(self.root_val_file_name,"r") as fp:
            sentences += fp.readlines()
        return sentences

    def initialize_harmonic_values(self):
        sentences = self.sentences()
        for sent in sentences:
            if "attach" in sent:
                self.dep_creator.add_entry(sent)
            if "continue" in sent:
                self.stop_cont_creator.add_entry(sent)
            if "stop" in sent:
                self.stop_cont_creator.add_entry(sent)
            if "root" in sent:
                self.dep_creator.add_entry(sent)

if __name__ == "__main__":
    initializer = HarmonicInitializer("data/harmonic", "data/root_val_file.txt")
    initializer.initialize_harmonic_values()
    pickle_handler = PickleHandler("data/harmonic_values_numpy")
    pickle_handler.write_to_pickle(initializer.dep_creator.prob_attach, initializer.stop_cont_creator.prob_cont, "data/harmonic_values_numpy")
            sentences += fp.readlines()
        with open(self.root_val_file_name,"r") as fp:
            sentences += fp.readlines()
        return sentences

    def create_dict(self):
        sentences = self.sentences()
        for sent in sentences:
            if "attach" in sent:
                self.dep_creator.add_entry(sent)
            if "continue" in sent:
                self.stop_cont_creator.add_entry(sent)
            if "stop" in sent:
                self.stop_cont_creator.add_entry(sent)
            if "root" in sent:
                self.dep_creator.add_entry(sent)

        self.dep_creator.mult_holder.estimate()
        self.stop_cont_creator.mult_holder.estimate()

if __name__ == "__main__":
    initializer = InitDict("data/harmonic", "data/root_val_file.txt")
    initializer.create_dict()
    pickle_handler = PickleHandler("data/harmonic_values_mult")
    dep_mult_list = initializer.dep_creator.mult_holder.mult_list
    stop_cont_mult_list = initializer.stop_cont_creator.\
            mult_holder.mult_list
    pickle_handler.write_to_pickle(dep_mult_list, stop_cont_mult_list)