def __iter__(self): if self.files != None: # Iterating over a list of file paths for input_file in self.files: for text in file(input_file, "rb"): for line in text2sentences(text,remove_non_english_chars=self.remove_non_english_chars): words = sentence2words(line,remove_stop_words=self.remove_stop_words) if len(words) < 3: continue yield words else: # Iterating over a list of text for text in self.text_list: for line in text2sentences(text,remove_non_english_chars=self.remove_non_english_chars): words = sentence2words(line,remove_stop_words=self.remove_stop_words) if len(words) < 3: continue yield words
def text2vectors(self, text): """ Convert input text into an iterator that returns the corresponding vector representation of each word in the text, if it exists in the Word2Vec model :param txt: input text :param is_html: if True, then extract the text from the input HTML :return: iterator of vectors created from the words in the text using the Word2Vec model. """ words = sentence2words(text) words = [w for w in words if w in self.model] if len(words) != 0: for w in words: yield self.model[w]
def text2vectors(self,text): """ Convert input text into an iterator that returns the corresponding vector representation of each word in the text, if it exists in the Word2Vec model :param txt: input text :param is_html: if True, then extract the text from the input HTML :return: iterator of vectors created from the words in the text using the Word2Vec model. """ words = sentence2words(text) words = [w for w in words if w in self.model] if len(words) != 0: for w in words: yield self.model[w]
def __iter__(self): if self.files != None: # Iterating over a list of file paths for input_file in self.files: for text in file(input_file, "rb"): for line in text2sentences(text, remove_non_english_chars=self. remove_non_english_chars): words = sentence2words( line, remove_stop_words=self.remove_stop_words) if len(words) < 3: continue yield words else: # Iterating over a list of text for text in self.text_list: for line in text2sentences(text, remove_non_english_chars=self. remove_non_english_chars): words = sentence2words( line, remove_stop_words=self.remove_stop_words) if len(words) < 3: continue yield words
def text2vectors(self, text): '''Convert input text into an iterator that returns the corresponding vector representation of each word in the text, if it exists in the Word2Vec model Parameters ========== txt: input text returns iterator of vectors, from txt using the Word2Vec model. ''' words = sentence2words(text) words = [w for w in words if w in self.model] if len(words) != 0: for w in words: yield self.model.wv.__getitem__(w)
def __iter__(self): if self.files != None: # Iterating over a list of file paths for input_file in self.files: for text in open(input_file, "r").readlines(): for line in text2sentences(text, remove_non_english_chars=self. remove_non_english_chars): words = sentence2words( line, remove_stop_words=self.remove_stop_words) yield words else: # Iterating over a list of text for text in self.text_list: for line in text2sentences(text, remove_non_english_chars=self. remove_non_english_chars): words = sentence2words( line, remove_stop_words=self.remove_stop_words) yield words