def test_io_api(tmp_dir): (image_x, train_y), (test_x, test_y) = mnist.load_data() (text_x, train_y), (test_x, test_y) = common.imdb_raw() num_instances = 20 image_x = image_x[:num_instances] text_x = text_x[:num_instances] structured_data_x = common.generate_structured_data( num_instances=num_instances) classification_y = common.generate_one_hot_labels( num_instances=num_instances, num_classes=3) regression_y = common.generate_data(num_instances=num_instances, shape=(1, )) # Build model and train. automodel = ak.AutoModel( inputs=[ak.ImageInput(), ak.TextInput(), ak.StructuredDataInput()], outputs=[ ak.RegressionHead(metrics=['mae']), ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy']) ], directory=tmp_dir, max_trials=2, seed=common.SEED) automodel.fit([image_x, text_x, structured_data_x], [regression_y, classification_y], epochs=2, validation_split=0.2)
def test_structured_data_input_col_type_without_name(): num_data = 500 train_x = common.generate_structured_data(num_data) with pytest.raises(ValueError) as info: input_node = node.StructuredDataInput( column_types=common.COLUMN_TYPES_FROM_NUMPY) input_node.fit(train_x) assert str(info.value) == 'Column names must be specified.'
def test_feature_engineering(tmp_dir): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.column_names = common.COLUMN_NAMES_FROM_NUMPY feature.column_types = common.COLUMN_TYPES_FROM_NUMPY new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32, tmp_dir) assert isinstance(new_dataset, tf.data.Dataset)
def test_structured_data_assembler(): data = common.generate_structured_data() dataset = tf.data.Dataset.from_tensor_slices(data) assembler = meta_model.StructuredDataAssembler() for line in dataset: assembler.update(line) input_node = node.StructuredDataInput() assembler.assemble(input_node) assert isinstance(input_node.out_blocks[0], ak.StructuredDataBlock)
def test_feature_engineering(): dataset = common.generate_structured_data(dtype='dataset') feature = preprocessor_module.FeatureEngineering() feature.input_node = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_NUMPY, column_types=common.COLUMN_TYPES_FROM_NUMPY) new_dataset = run_preprocessor(feature, dataset, common.generate_data(dtype='dataset'), tf.float32) assert isinstance(new_dataset, tf.data.Dataset)
def test_structured_data_assembler(): data = common.generate_structured_data() dataset = tf.data.Dataset.from_tensor_slices(data) assembler = meta_model.StructuredDataAssembler( column_names=common.COLUMN_NAMES_FROM_NUMPY) for line in dataset: assembler.update(line) input_node = node.StructuredDataInput() assembler.assemble(input_node) assert input_node.column_types == common.COLUMN_TYPES_FROM_NUMPY
def test_structured_data_from_numpy_classifier(tmp_dir): num_data = 500 num_train = 400 data = common.generate_structured_data(num_data) x_train, x_test = data[:num_train], data[num_train:] y = common.generate_one_hot_labels(num_instances=num_data, num_classes=3) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataClassifier(directory=tmp_dir, max_trials=1, seed=common.SEED) clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train)) assert clf.predict(x_test).shape == (len(y_test), 3)
def test_structured_data_from_numpy_regressor(tmp_dir): num_data = 500 num_train = 400 data = common.generate_structured_data(num_data) x_train, x_test = data[:num_train], data[num_train:] y = common.generate_data(num_instances=num_data, shape=(1, )) y_train, y_test = y[:num_train], y[num_train:] clf = ak.StructuredDataRegressor(directory=tmp_dir, max_trials=1, seed=common.SEED) clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train)) assert clf.predict(x_test).shape == (len(y_test), 1)
def test_structured_regressor(init, fit): num_data = 500 train_x = common.generate_structured_data(num_data) train_y = common.generate_data(num_instances=100, shape=(1,)) clf = ak.StructuredDataRegressor( column_names=common.COLUMN_NAMES_FROM_NUMPY, directory=tmp_dir, max_trials=1, seed=common.SEED) clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y)) assert init.called assert fit.called
def test_structured_classifier(init, fit): num_data = 500 train_x = common.generate_structured_data(num_data) train_y = common.generate_one_hot_labels(num_instances=num_data, num_classes=3) clf = ak.StructuredDataClassifier( column_names=common.COLUMN_NAMES_FROM_NUMPY, directory=tmp_dir, max_trials=1, seed=common.SEED) clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y)) assert init.called assert fit.called
def test_structured_data_input(tmp_dir): num_data = 500 data = common.generate_structured_data(num_data) x_train = data y = np.random.randint(0, 3, num_data) y_train = y input_node = ak.StructuredDataInput( column_names=common.COLUMN_NAMES_FROM_NUMPY, column_types=common.COLUMN_TYPES_FROM_NUMPY) output_node = input_node output_node = ak.StructuredDataBlock()(output_node) output_node = ak.ClassificationHead(loss='categorical_crossentropy', metrics=['accuracy'])(output_node) auto_model = ak.GraphAutoModel(input_node, output_node, directory=tmp_dir, max_trials=1) auto_model.fit(x_train, y_train, epochs=1, validation_data=(x_train, y_train)) auto_model.predict(x_train)