Пример #1
0
 def _create_char_features(self, sentences, sentence_length, word_length):
     char_dict = {}
     char_id = 3
     new_sentences = []
     for s in sentences:
         char_sents = []
         for w in s:
             char_vector = []
             for c in w:
                 char_int = char_dict.get(c, None)
                 if char_int is None:
                     char_dict[c] = char_id
                     char_int = char_id
                     char_id += 1
                 char_vector.append(char_int)
             char_vector = [1] + char_vector + [2]
             char_sents.append(char_vector)
         char_sents = pad_sentences(char_sents, sentence_length=word_length)
         if sentence_length - char_sents.shape[0] < 0:
             char_sents = char_sents[:sentence_length]
         else:
             padding = np.zeros(
                 (sentence_length - char_sents.shape[0], word_length))
             char_sents = np.vstack((padding, char_sents))
         new_sentences.append(char_sents)
     char_sentences = np.asarray(new_sentences)
     self.vocabs.update({'char_rnn': char_dict})
     return char_sentences
Пример #2
0
 def _create_char_features(self, sentences, sentence_length, word_length):
     char_dict = {}
     char_id = 3
     new_sentences = []
     for s in sentences:
         char_sents = []
         for w in s:
             char_vector = []
             for c in w:
                 char_int = char_dict.get(c, None)
                 if char_int is None:
                     char_dict[c] = char_id
                     char_int = char_id
                     char_id += 1
                 char_vector.append(char_int)
             char_vector = [1] + char_vector + [2]
             char_sents.append(char_vector)
         char_sents = pad_sentences(char_sents, sentence_length=word_length)
         if sentence_length - char_sents.shape[0] < 0:
             char_sents = char_sents[:sentence_length]
         else:
             padding = np.zeros(
                 (sentence_length - char_sents.shape[0], word_length))
             char_sents = np.vstack((padding, char_sents))
         new_sentences.append(char_sents)
     char_sentences = np.asarray(new_sentences)
     self.vocabs.update({'char_rnn': char_dict})
     return char_sentences
Пример #3
0
 def pad_sentences(sentences, sentence_length=None, dtype=np.int32, pad_val=0.):
     logger.error('pad_sentances in the Text class is deprecated.  This function'
                  'is now in neon.data.text_preprocessing.')
     return pad_sentences(sentences,
                          sentence_length=sentence_length,
                          dtype=dtype,
                          pad_val=pad_val)
Пример #4
0
    def vectorize_stories(self, data):
        """
        Convert (story, query, answer) word data into vectors.

        Args:
            data (tuple) : Tuple of story, query, answer word data.

        Returns:
            tuple : Tuple of story, query, answer vectors.
        """
        s, q, a = [], [], []
        for story, query, answer in data:
            s.append(self.words_to_vector(story))
            q.append(self.words_to_vector(query))
            a.append(self.one_hot_vector(answer))

        s = pad_sentences(s, self.story_maxlen)
        q = pad_sentences(q, self.query_maxlen)
        a = np.array(a)
        return (s, q, a)
Пример #5
0
 def pad_sentences(sentences,
                   sentence_length=None,
                   dtype=np.int32,
                   pad_val=0.):
     logger.error(
         'pad_sentances in the Text class is deprecated.  This function'
         'is now in neon.data.text_preprocessing.')
     return pad_sentences(sentences,
                          sentence_length=sentence_length,
                          dtype=dtype,
                          pad_val=pad_val)
Пример #6
0
    def vectorize_stories(self, data):
        """
        Convert (story, query, answer) word data into vectors.

        Args:
            data (tuple) : Tuple of story, query, answer word data.

        Returns:
            tuple : Tuple of story, query, answer vectors.
        """
        s, q, a = [], [], []
        for story, query, answer in data:
            s.append(self.words_to_vector(story))
            q.append(self.words_to_vector(query))
            a.append(self.one_hot_vector(answer))

        s = pad_sentences(s, self.story_maxlen)
        q = pad_sentences(q, self.query_maxlen)
        a = np.array(a)
        return (s, q, a)
Пример #7
0
def vectorize(words, max_len):
    return be.array(pad_sentences([babi.words_to_vector(BABI.tokenize(words))], max_len))
Пример #8
0
def vectorize(words, max_len):
    return be.array(
        pad_sentences([babi.words_to_vector(BABI.tokenize(words))], max_len))