Exemplo n.º 1
0
class InputTransformer:
    def __init__(self):
        self.encoder = Encoder()

    def transform(self, X_train, y_train, augment):
        X_train = list(X_train)
        y_train = list(y_train)
        print('before augmenting', len(X_train))
        if augment is not None:
            X_train, y_train = augment(X_train, y_train)

        print('after augmetning', len(X_train), len(y_train))

        def char_func(char):
            # word = WordNetLemmatizer().lemmatize(word)
            return self.encoder.transform(char) + 1

        X_train = [
            preprocess_chars(ingredients, char_func) for ingredients in X_train
        ]
        lengths = numpy.array(list(len(x) for x in X_train))
        print(lengths.min(), lengths.mean(), lengths.max(), lengths.std())

        X_train = sequence.pad_sequences(X_train, maxlen=600)

        print("ingredients")
        print(X_train[:3])

        label_transform = LabelBinarizer()
        y_train = label_transform.fit_transform(y_train)

        return X_train, y_train
Exemplo n.º 2
0
 def test_encoder(self):
     encoder = Encoder()
     self.assertEqual(encoder.transform("a"), 0)
     self.assertEqual(encoder.transform("b"), 1)
     self.assertEqual(encoder.transform("a"), 0)