def get_vsm_model_hyperp():
     return {
         mh.VSM_Model_Hyperp.VECTORIZER_NGRAM_RANGE.value: (1,1),
         mh.VSM_Model_Hyperp.VECTORIZER_MAX_FEATURES.value: 400,
         mh.VSM_Model_Hyperp.VECTORIZER.value : TfidfVectorizer(stop_words='english', use_idf=True, smooth_idf=True),
         mh.VSM_Model_Hyperp.VECTORIZER_TOKENIZER.value : tok.WordNetBased_LemmaTokenizer()
     }
Exemplo n.º 2
0
 def set_tokenizer(self, **kwargs):
     self.tokenizer = tok.WordNetBased_LemmaTokenizer() if BM25_Model_Hyperp.TOKENIZER.value not in kwargs.keys() else kwargs[BM25_Model_Hyperp.TOKENIZER.value]