class InputTransformer: def __init__(self): self.encoder = Encoder() def transform(self, X_train, y_train, augment): X_train = list(X_train) y_train = list(y_train) print('before augmenting', len(X_train)) if augment is not None: X_train, y_train = augment(X_train, y_train) print('after augmetning', len(X_train), len(y_train)) def char_func(char): # word = WordNetLemmatizer().lemmatize(word) return self.encoder.transform(char) + 1 X_train = [ preprocess_chars(ingredients, char_func) for ingredients in X_train ] lengths = numpy.array(list(len(x) for x in X_train)) print(lengths.min(), lengths.mean(), lengths.max(), lengths.std()) X_train = sequence.pad_sequences(X_train, maxlen=600) print("ingredients") print(X_train[:3]) label_transform = LabelBinarizer() y_train = label_transform.fit_transform(y_train) return X_train, y_train
def test_encoder(self): encoder = Encoder() self.assertEqual(encoder.transform("a"), 0) self.assertEqual(encoder.transform("b"), 1) self.assertEqual(encoder.transform("a"), 0)