def setUp(self): tokenizer = get_tokenizer('regexp') self.dictionary = Dictionary.build(get_dump_db(), tokenizer=tokenizer, lowercase=True, min_word_count=2, min_entity_count=1, min_paragraph_len=5, category=True, disambi=False, pool_size=1, chunk_size=1, progressbar=False)
def setUp(self): self.dictionary = Dictionary.build(get_dump_db(), None, lowercase=True, min_word_count=2, min_entity_count=1, pool_size=1, chunk_size=1, min_paragraph_len=5, category=True, progressbar=False) self.link_graph = LinkGraph.build(get_dump_db(), self.dictionary, pool_size=1, chunk_size=1, progressbar=False)
def setUp(self): self.phrase_dic = PhraseDictionary.build(get_dump_db(), min_link_count=0, min_link_prob=0.1, lowercase=True, max_phrase_len=3, pool_size=1, chunk_size=1, progressbar=False) self.dictionary = Dictionary.build(get_dump_db(), phrase_dict=self.phrase_dic, lowercase=True, min_word_count=2, min_entity_count=1, min_paragraph_len=5, category=True, pool_size=1, chunk_size=1, progressbar=False)