Exemplo n.º 1
0
 def compute_pmi(self, words_dict):
     #compute all kinds of combines for word
     sub_part = gen_bigram(self.text)
     if len(sub_part) > 0:
         self.pmi = min(
             map(
                 lambda (left, right): math.log(self.freq / words_dict[
                     left].freq / words_dict[right].freq), sub_part))
Exemplo n.º 2
0
 def compute_pmi(self, words_dict):
     #compute all kinds of combines for word
     sub_part = gen_bigram(self.text)
     if len(sub_part) > 0:
         self.pmi = min(
             map(
                 lambda word: math.log(self.freq / words_dict[word[
                     0]].freq / words_dict[word[1]].freq), sub_part))
Exemplo n.º 3
0
 def compute_pmi(self, words_dict):
     # 这里的words_dict是word_cad
     # key:word,value:word_info
     #compute all kinds of combines for word
     sub_part = gen_bigram(self.text)
     if len(sub_part) > 0:
         # 使用一个具体的例子来概括就是:
         # 计算min{p(电影院)/(p(电影)*p(院)),p(电影院)/(p(电)*p(影院))}
         self.pmi = min(
             map(
                 lambda word: math.log(self.freq / words_dict[word[
                     0]].freq / words_dict[word[1]].freq), sub_part))
Exemplo n.º 4
0
 def compute_pmi(self, words_dict):
     # compute all kinds of combines for word
     sub_part = gen_bigram(self.text)
     # print(sub_part)
     if len(sub_part) > 0:
         # self.pmi = min(
         #     map(lambda x: math.log(self.freq / (words_dict[x[0]].freq *
         #                                         words_dict[x[1]].freq)),
         #         sub_part))
         # print(self.freq, max(words_dict[sub_part[0][0]].freq - self.freq, 0.00001),
         #       max(words_dict[sub_part[0][1]].freq - self.freq, 0.00001))
         # print(math.log(self.freq / (max(words_dict[sub_part[0][0]].freq - self.freq, 0.00001)) *
         #                                    (max(words_dict[sub_part[0][1]].freq - self.freq, 0.00001))))
         self.pmi = min(
             map(
                 lambda x: math.log(self.freq / (
                     (max(words_dict[x[0]].freq - self.freq, 0.00001)) *
                     (max(words_dict[x[1]].freq - self.freq, 0.00001)))),
                 sub_part))