def get_stopwords(self): lang = self.args.get('lang_code') return StopWords.get_stop_words(lang)
grams = self.generate_ngrams(tokens, 3) grams.extend(self.generate_ngrams(tokens, 2)) for word in grams: if word not in stop_tokens: doc_grams.append(space_join(word)) pattern = r'<VERB>?<ADV>*<VERB>+' doc = textacy.Doc(sentence, lang=model) lists = textacy.extract.pos_regex_matches(doc, pattern) verbs_list = [] for l in lists: verb_tokens = l.lemma_.split() for verb in verb_tokens: if verb not in stop_tokens and self.is_valid_word(verb): verbs_list.append(verb) return doc_grams, unigrams, verbs_list def generate_ngrams(self, tokens, n): return list(ngrams(tokens, n)) if __name__ == "__main__": a = 'How does the e-monies NEFT service differ from RGTS and EFT?' from StopWords import StopWords from StringProcessor import StringProcessor a = StringProcessor().normalize(a, 'en') en = StopWords.get_stop_words('en') cl = PhraseFinder() print(cl.find_phrases(a, en))