예제 #1
0
 def setUp(self):
     tokenizer = get_tokenizer('regexp')
     self.dictionary = Dictionary.build(get_dump_db(),
                                        tokenizer=tokenizer,
                                        lowercase=True,
                                        min_word_count=2,
                                        min_entity_count=1,
                                        min_paragraph_len=5,
                                        category=True,
                                        disambi=False,
                                        pool_size=1,
                                        chunk_size=1,
                                        progressbar=False)
예제 #2
0
 def setUp(self):
     self.dictionary = Dictionary.build(get_dump_db(),
                                        None,
                                        lowercase=True,
                                        min_word_count=2,
                                        min_entity_count=1,
                                        pool_size=1,
                                        chunk_size=1,
                                        min_paragraph_len=5,
                                        category=True,
                                        progressbar=False)
     self.link_graph = LinkGraph.build(get_dump_db(),
                                       self.dictionary,
                                       pool_size=1,
                                       chunk_size=1,
                                       progressbar=False)
예제 #3
0
 def setUp(self):
     self.phrase_dic = PhraseDictionary.build(get_dump_db(),
                                              min_link_count=0,
                                              min_link_prob=0.1,
                                              lowercase=True,
                                              max_phrase_len=3,
                                              pool_size=1,
                                              chunk_size=1,
                                              progressbar=False)
     self.dictionary = Dictionary.build(get_dump_db(),
                                        phrase_dict=self.phrase_dic,
                                        lowercase=True,
                                        min_word_count=2,
                                        min_entity_count=1,
                                        min_paragraph_len=5,
                                        category=True,
                                        pool_size=1,
                                        chunk_size=1,
                                        progressbar=False)