Exemplo n.º 1
0
 def __init__(self,
              use_stopword=True,
              stop_words_file=utils.default_stopwords_file(),
              max_iter=100,
              tol=0.0001,
              window=2):
     self.__use_stopword = use_stopword
     self.__max_iter = max_iter
     self.__tol = tol
     self.__window = window
     self.__stop_words = set()
     self.__stop_words_file = utils.default_stopwords_file()
     if stop_words_file:
         self.__stop_words_file = stop_words_file
     if use_stopword:
         with open(self.__stop_words_file, 'r', encoding='utf-8') as f:
             for word in f:
                 self.__stop_words.add(word.strip())
Exemplo n.º 2
0
    def __init__(self, use_stopword=True,
                 stop_words_file=None,
                 dict_path=None,
                 max_iter=100,
                 tol=0.0001):
        if dict_path:
            raise RuntimeError("True")
        self.__use_stopword = use_stopword
        self.__dict_path = dict_path
        self.__max_iter = max_iter
        self.__tol = tol

        self.__stop_words = set()
        self.__stop_words_file = utils.default_stopwords_file()
        if stop_words_file:
            self.__stop_words_file = stop_words_file
        if use_stopword:
            for word in open(self.__stop_words_file, 'r', encoding='utf-8'):
                self.__stop_words.add(word.strip())
Exemplo n.º 3
0
 def test_default_stopwords(self):
     file = utils.default_stopwords_file()
     stopwords = [x.strip() for x in open(file, 'r', encoding='utf-8')]
     self.assertTrue(len(stopwords) > 0)