Python Sentence примеры использования

Язык программирования: Python

Пространство имен/Пакет: datastructures.sentence

Класс/Тип: Sentence

Примеров на hotexamples.com: 6

Python Sentence - 6 примеров найдено. Это лучшие примеры Python кода для datastructures.sentence.Sentence, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Sentence(6)

split(1)

Основные методы

Sentence (6)

split (1)

Пример #1

Показать файл

 def get_list_sim(self, word):
     if word not in self.simwords:
         word = Sentence(word).remove_continue()
     if word not in self.simwords:
         word = re.sub(r'(\d+)|([wjfz]+)', '', word)
         word = re.sub(r'([a-z])(r|s|x)', r'\1', word)
     if word not in self.simwords:
         return {self.index[Sentence().R_S]}
     return self.simwords[word]

Пример #2

Показать файл

 def fast_predict(self, testdata, list_of_indices=[10], output_size=5):
     print('Predicting...')
     y = []
     for sentence in testdata:
         start = time.time()
         sentence = ' '.join(word_tokenize(sentence))
         old_words = sentence.split()
         sentence = Sentence(sentence).remove_continue()
         #print(sentence)
         new_words = sentence.split()
         res = self.get_result_continue(old_words, new_words,
                                        list_of_indices, output_size)
         end = time.time()
         print('Done %f (ms)' % ((end - start) * 1000))
         self.maxtime = max(self.maxtime, end - start)
         y.append(res)
     print('Done!')
     return y

Пример #3

Показать файл

 def get_words_continue_prev(self, words, lim_per_index):
     if len(words) == 0: return tuple()
     if len(words) == 1:
         return tuple([tuple([w]) for w in self.get_list_sim(words[0])])
     cur_word = words[0]
     # if has accent then not neccessary to guess
     if cur_word != Sentence().remove_accents(cur_word):
         if cur_word in self.index:
             cur_simwords = {self.index[cur_word]}
         else:
             cur_simwords = {self.index[Sentence().R_S]}
     else:
         cur_simwords = self.get_list_sim(cur_word)
     list_next_words = self.get_words_continue_prev(words[1:],
                                                    lim_per_index[1:])
     list_choices = {}
     for next_words in list_next_words:
         size_next = len(next_words)
         prop_prev = {}
         for n in self.list_ngrams:
             if n - 1 > size_next: continue
             check_words = next_words[:n - 1]
             for word in cur_simwords:
                 tp = tuple([word]) + check_words
                 prop = 0 if tp not in self.prop[
                     n] else self.prop[n][tp] / self.cnt[n]
                 if word not in prop_prev:
                     prop_prev[word] = prop
                 else:
                     prop_prev[word] += prop
         cur_choices = [
             (k, prop_prev[k])
             for k in sorted(prop_prev, key=prop_prev.get, reverse=True)
         ]
         for choice in cur_choices[:lim_per_index[0]]:
             if choice[1] < self.eta: break
             prev_sentence = choice[:1] + tuple(next_words)
             prop = self.max_prop_indices(prev_sentence)
             list_choices[prev_sentence] = prop
     res = tuple([
         k for k in sorted(list_choices, key=list_choices.get, reverse=True)
     ])
     #print([(' '.join([self.word[index] for index in indices]), list_choices[indices]) for indices in res[:lim_per_index[-1]]])
     return res[:lim_per_index[0]]

Пример #4

Показать файл

 def convert_to_indices(self, words):
     lst = []
     for word in words:
         if type(word) == list:
             word = word[0]
         info = self.trie.searchWord(word)
         if info == False:
             info = self.trie.searchWord(Sentence().R_S)
         lst.append(info[0])
     return tuple(lst)

Пример #5

Показать файл

 def separate_sentences(self, list_paragraphs):
     print('Separating sentence...')
     self.list_sentences = []
     for paragraph in list_paragraphs:
         list_of_sentences = sent_tokenize(paragraph)
         for sentence in list_of_sentences:
             sentence = Sentence(sentence).remove_continue()
             self.list_sentences.append(sentence)
     print(self.list_sentences[:10])
     print('Done!')

Пример #6

Показать файл

 def fit(self):
     print('Fitting model...')
     start_time = time.time()
     self.prop = {}
     self.cnt = {}
     self.simwords = {}
     self.word = {}
     self.index = {}
     for i in self.list_ngrams:
         self.prop[i] = {}
         self.cnt[i] = 0
     print('Extract n-gram ', self.list_ngrams)
     cnt, full_size = 1, len(self.list_sentences)
     for sentence in self.list_sentences:
         print('Processing at %d/%d (%.2f)' % (cnt, full_size,
                                               (cnt / full_size) * 100))
         list_indices = []
         words = word_tokenize(sentence)
         for word in words:
             if word not in self.index:
                 self.index[word] = len(self.word) + 1
             index = self.index[word]
             self.word[index] = word
             # build prop
             list_indices.append(index)
             for ngram in self.list_ngrams:
                 if ngram > len(list_indices): continue
                 word_encodes = tuple(list_indices[-ngram:])
                 if word_encodes not in self.prop[ngram]:
                     self.prop[ngram][word_encodes] = 1
                 else:
                     self.prop[ngram][word_encodes] += 1
             # inscrease cnt
         for ngram in self.list_ngrams:
             self.cnt[ngram] += max(0, len(words) - ngram + 1)
         cnt += 1
     ################# add __object__ #####################
     R_S = Sentence().R_S
     if R_S not in self.index:
         index = len(self.index) + 1
         self.index[R_S] = index
         self.word[index] = R_S
         self.prop[1][index] = 0
     ######################################################
     print('Done!')
     print('Extract similar non-accent words')
     cnt, full_size = 1, len(self.index)
     # build sim words
     for word in self.index:
         print('Processing at %d/%d (%.2f)' % (cnt, full_size,
                                               (cnt / full_size) * 100))
         index = self.index[word]
         non_accent = Sentence().remove_accents(word)
         if non_accent not in self.simwords:
             self.simwords[non_accent] = {index}
         else:
             self.simwords[non_accent].add(index)
         cnt += 1
     print('Done!')
     end_time = time.time()
     print('Finished! Ellapse time: %f (ms)' %
           ((end_time - start_time) * 1000))