def _create_char_features(self, sentences, sentence_length, word_length): char_dict = {} char_id = 3 new_sentences = [] for s in sentences: char_sents = [] for w in s: char_vector = [] for c in w: char_int = char_dict.get(c, None) if char_int is None: char_dict[c] = char_id char_int = char_id char_id += 1 char_vector.append(char_int) char_vector = [1] + char_vector + [2] char_sents.append(char_vector) char_sents = pad_sentences(char_sents, sentence_length=word_length) if sentence_length - char_sents.shape[0] < 0: char_sents = char_sents[:sentence_length] else: padding = np.zeros( (sentence_length - char_sents.shape[0], word_length)) char_sents = np.vstack((padding, char_sents)) new_sentences.append(char_sents) char_sentences = np.asarray(new_sentences) self.vocabs.update({'char_rnn': char_dict}) return char_sentences
def _create_char_features(self, sentences, sentence_length, word_length): char_dict = {} char_id = 3 new_sentences = [] for s in sentences: char_sents = [] for w in s: char_vector = [] for c in w: char_int = char_dict.get(c, None) if char_int is None: char_dict[c] = char_id char_int = char_id char_id += 1 char_vector.append(char_int) char_vector = [1] + char_vector + [2] char_sents.append(char_vector) char_sents = pad_sentences(char_sents, sentence_length=word_length) if sentence_length - char_sents.shape[0] < 0: char_sents = char_sents[:sentence_length] else: padding = np.zeros( (sentence_length - char_sents.shape[0], word_length)) char_sents = np.vstack((padding, char_sents)) new_sentences.append(char_sents) char_sentences = np.asarray(new_sentences) self.vocabs.update({'char_rnn': char_dict}) return char_sentences
def pad_sentences(sentences, sentence_length=None, dtype=np.int32, pad_val=0.): logger.error('pad_sentances in the Text class is deprecated. This function' 'is now in neon.data.text_preprocessing.') return pad_sentences(sentences, sentence_length=sentence_length, dtype=dtype, pad_val=pad_val)
def vectorize_stories(self, data): """ Convert (story, query, answer) word data into vectors. Args: data (tuple) : Tuple of story, query, answer word data. Returns: tuple : Tuple of story, query, answer vectors. """ s, q, a = [], [], [] for story, query, answer in data: s.append(self.words_to_vector(story)) q.append(self.words_to_vector(query)) a.append(self.one_hot_vector(answer)) s = pad_sentences(s, self.story_maxlen) q = pad_sentences(q, self.query_maxlen) a = np.array(a) return (s, q, a)
def pad_sentences(sentences, sentence_length=None, dtype=np.int32, pad_val=0.): logger.error( 'pad_sentances in the Text class is deprecated. This function' 'is now in neon.data.text_preprocessing.') return pad_sentences(sentences, sentence_length=sentence_length, dtype=dtype, pad_val=pad_val)
def vectorize_stories(self, data): """ Convert (story, query, answer) word data into vectors. Args: data (tuple) : Tuple of story, query, answer word data. Returns: tuple : Tuple of story, query, answer vectors. """ s, q, a = [], [], [] for story, query, answer in data: s.append(self.words_to_vector(story)) q.append(self.words_to_vector(query)) a.append(self.one_hot_vector(answer)) s = pad_sentences(s, self.story_maxlen) q = pad_sentences(q, self.query_maxlen) a = np.array(a) return (s, q, a)
def vectorize(words, max_len): return be.array(pad_sentences([babi.words_to_vector(BABI.tokenize(words))], max_len))
def vectorize(words, max_len): return be.array( pad_sentences([babi.words_to_vector(BABI.tokenize(words))], max_len))