示例#1
0
def test_preprocessing():
    input_shape = (33, )
    output_shape = (1, )
    x_train_1 = common.generate_data(num_instances=100,
                                     shape=input_shape,
                                     dtype='dataset')
    x_train_2 = common.generate_data(num_instances=100,
                                     shape=input_shape,
                                     dtype='dataset')
    y_train = common.generate_data(num_instances=100,
                                   shape=output_shape,
                                   dtype='dataset')
    dataset = tf.data.Dataset.zip(((x_train_1, x_train_2), y_train))

    input_node1 = ak.Input(shape=input_shape)
    temp_node1 = ak.Normalization()(input_node1)
    output_node1 = ak.DenseBlock()(temp_node1)

    output_node3 = ak.Normalization()(temp_node1)
    output_node3 = ak.DenseBlock()(output_node3)

    input_node2 = ak.Input(shape=input_shape)
    output_node2 = ak.Normalization()(input_node2)
    output_node2 = ak.DenseBlock()(output_node2)

    output_node = ak.Merge()([output_node1, output_node2, output_node3])
    output_node = ak.RegressionHead()(output_node)

    graph = graph_module.HyperBuiltGraphHyperModel([input_node1, input_node2],
                                                   output_node)
    graph.preprocess(hp=kerastuner.HyperParameters(),
                     dataset=dataset,
                     validation_data=dataset,
                     fit=True)
def test_normalize():
    dataset = common.generate_data(dtype='dataset')
    new_dataset = run_preprocessor(preprocessor_module.Normalization(),
                                   dataset,
                                   common.generate_data(dtype='dataset'),
                                   dtype=tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#3
0
def test_text_regressor(tmp_dir):
    (train_x, train_y), (test_x, test_y) = common.imdb_raw()
    train_y = common.generate_data(num_instances=train_y.shape[0], shape=(1, ))
    test_y = common.generate_data(num_instances=test_y.shape[0], shape=(1, ))
    clf = ak.TextRegressor(directory=tmp_dir, max_trials=2, seed=common.SEED)
    clf.fit(train_x, train_y, epochs=1, validation_data=(test_x, test_y))
    assert clf.predict(test_x).shape == (len(test_x), 1)
示例#4
0
def test_augment():
    dataset = common.generate_data(dtype='dataset')
    new_dataset = run_preprocessor(preprocessor.ImageAugmentation(seed=common.SEED),
                                   dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#5
0
def test_lgbm_regressor():
    dataset = common.generate_data(11, (32, ), dtype='dataset')
    y = common.generate_data(11, (1, ), dtype='dataset')
    instance = preprocessor_module.LightGBMBlock(seed=common.SEED)
    instance.lightgbm_block = preprocessor_module.LightGBMRegressor(
        seed=common.SEED)
    new_dataset = run_preprocessor(instance, dataset, y, tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#6
0
def test_image_input_with_illegal_dim():
    x = common.generate_data(shape=(32, ))
    input_node = node.ImageInput()
    with pytest.raises(ValueError) as info:
        input_node.fit(x)
        x = input_node.transform(x)
    assert 'Expect the data to ImageInput to have 3' in str(info.value)
示例#7
0
def test_io_api(tmp_dir):
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = common.imdb_raw()

    num_instances = 20
    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = common.generate_structured_data(
        num_instances=num_instances)
    classification_y = common.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = common.generate_data(num_instances=num_instances,
                                        shape=(1, ))

    # Build model and train.
    automodel = ak.AutoModel(
        inputs=[ak.ImageInput(),
                ak.TextInput(),
                ak.StructuredDataInput()],
        outputs=[
            ak.RegressionHead(metrics=['mae']),
            ak.ClassificationHead(loss='categorical_crossentropy',
                                  metrics=['accuracy'])
        ],
        directory=tmp_dir,
        max_trials=2,
        seed=common.SEED)
    automodel.fit([image_x, text_x, structured_data_x],
                  [regression_y, classification_y],
                  epochs=2,
                  validation_split=0.2)
示例#8
0
def test_image_classifier(tmp_dir):
    train_x = common.generate_data(num_instances=100, shape=(32, 32, 3))
    train_y = common.generate_one_hot_labels(num_instances=100, num_classes=10)
    clf = ak.ImageClassifier(directory=tmp_dir, max_trials=2, seed=common.SEED)
    clf.fit(train_x, train_y, epochs=1, validation_split=0.2)
    keras_model = clf.export_model()
    assert clf.predict(train_x).shape == (len(train_x), 10)
    assert isinstance(keras_model, tf.keras.Model)
示例#9
0
def test_image_input_with_three_dim():
    x = common.generate_data(shape=(32, 32))
    input_node = node.ImageInput()
    x = input_node.transform(x)
    assert isinstance(x, tf.data.Dataset)
    for a in x:
        assert a.shape == (32, 32, 1)
        break
示例#10
0
def test_feature_engineering(tmp_dir):
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.column_names = common.COLUMN_NAMES_FROM_NUMPY
    feature.column_types = common.COLUMN_TYPES_FROM_NUMPY
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32, tmp_dir)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#11
0
def test_lgbm_classifier():
    dataset = common.generate_data(11, (32,), dtype='dataset')
    y = common.generate_one_hot_labels(11, dtype='dataset')
    instance = preprocessor.LightGBMBlock(seed=common.SEED)
    instance.lightgbm_block = preprocessor.LightGBMClassifier(seed=common.SEED)
    new_dataset = run_preprocessor(instance,
                                   dataset,
                                   y,
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#12
0
def test_ngram():
    texts = ['The cat sat on the mat.',
             'The dog sat on the log.',
             'Dogs and cats living together.']
    dataset = tf.data.Dataset.from_tensor_slices(texts)
    new_dataset = run_preprocessor(preprocessor.TextToNgramVector(),
                                   dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#13
0
def test_feature_engineering():
    dataset = common.generate_structured_data(dtype='dataset')
    feature = preprocessor_module.FeatureEngineering()
    feature.input_node = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        column_types=common.COLUMN_TYPES_FROM_NUMPY)
    new_dataset = run_preprocessor(feature, dataset,
                                   common.generate_data(dtype='dataset'),
                                   tf.float32)
    assert isinstance(new_dataset, tf.data.Dataset)
示例#14
0
def test_functional_api(tmp_dir):
    # Prepare the data.
    num_instances = 20
    (image_x, train_y), (test_x, test_y) = mnist.load_data()
    (text_x, train_y), (test_x, test_y) = common.imdb_raw()
    (structured_data_x, train_y), (test_x, test_y) = common.dataframe_numpy()

    image_x = image_x[:num_instances]
    text_x = text_x[:num_instances]
    structured_data_x = structured_data_x[:num_instances]
    classification_y = common.generate_one_hot_labels(
        num_instances=num_instances, num_classes=3)
    regression_y = common.generate_data(num_instances=num_instances,
                                        shape=(1, ))

    # Build model and train.
    image_input = ak.ImageInput()
    output = ak.Normalization()(image_input)
    output = ak.ImageAugmentation()(output)
    outputs1 = ak.ResNetBlock(version='next')(image_input)
    outputs2 = ak.XceptionBlock()(image_input)
    image_output = ak.Merge()((outputs1, outputs2))

    structured_data_input = ak.StructuredDataInput(
        column_names=common.COLUMN_NAMES_FROM_CSV,
        column_types=common.COLUMN_TYPES_FROM_CSV)
    structured_data_output = ak.FeatureEngineering()(structured_data_input)
    structured_data_output = ak.DenseBlock()(structured_data_output)

    text_input = ak.TextInput()
    outputs1 = ak.TextToIntSequence()(text_input)
    outputs1 = ak.EmbeddingBlock()(outputs1)
    outputs1 = ak.ConvBlock(separable=True)(outputs1)
    outputs1 = ak.SpatialReduction()(outputs1)
    outputs2 = ak.TextToNgramVector()(text_input)
    outputs2 = ak.DenseBlock()(outputs2)
    text_output = ak.Merge()((outputs1, outputs2))

    merged_outputs = ak.Merge()(
        (structured_data_output, image_output, text_output))

    regression_outputs = ak.RegressionHead()(merged_outputs)
    classification_outputs = ak.ClassificationHead()(merged_outputs)
    automodel = ak.GraphAutoModel(
        inputs=[image_input, text_input, structured_data_input],
        directory=tmp_dir,
        outputs=[regression_outputs, classification_outputs],
        max_trials=2,
        seed=common.SEED)

    automodel.fit((image_x, text_x, structured_data_x),
                  (regression_y, classification_y),
                  validation_split=0.2,
                  epochs=2)
示例#15
0
def test_structured_data_from_numpy_regressor(tmp_dir):
    num_data = 500
    num_train = 400
    data = common.generate_structured_data(num_data)
    x_train, x_test = data[:num_train], data[num_train:]
    y = common.generate_data(num_instances=num_data, shape=(1, ))
    y_train, y_test = y[:num_train], y[num_train:]
    clf = ak.StructuredDataRegressor(directory=tmp_dir,
                                     max_trials=1,
                                     seed=common.SEED)
    clf.fit(x_train, y_train, epochs=2, validation_data=(x_train, y_train))
    assert clf.predict(x_test).shape == (len(y_test), 1)
示例#16
0
def test_lgbm_classifier():
    dataset = common.generate_data(100, (32, ), dtype='dataset')
    y = common.generate_one_hot_labels(100, num_classes=3, dtype='dataset')
    instance = preprocessor_module.LightGBMBlock(seed=common.SEED)
    instance.lightgbm_block = preprocessor_module.LightGBMClassifier(
        seed=common.SEED)
    instance.lightgbm_block.num_classes = 3
    new_dataset = run_preprocessor(instance, dataset, y, tf.float32)
    for (x, ) in new_dataset:
        assert x.shape == (3, )
        break
    assert isinstance(new_dataset, tf.data.Dataset)
示例#17
0
def test_lgbm_classifier_two_classes():
    dataset = common.generate_data(11, (32, ), dtype='dataset')
    y = tf.data.Dataset.from_tensor_slices(
        np.random.randint(0, 2, 11).reshape(-1, 1))
    instance = preprocessor_module.LightGBMBlock(seed=common.SEED)
    instance.lightgbm_block = preprocessor_module.LightGBMClassifier(
        seed=common.SEED)
    instance.lightgbm_block.num_classes = 11
    new_dataset = run_preprocessor(instance, dataset, y, tf.float32)
    for (x, ) in new_dataset:
        assert x.shape == (1, )
        break
    assert isinstance(new_dataset, tf.data.Dataset)
示例#18
0
def test_structured_regressor(init, fit):
    num_data = 500
    train_x = common.generate_structured_data(num_data)
    train_y = common.generate_data(num_instances=100, shape=(1,))

    clf = ak.StructuredDataRegressor(
        column_names=common.COLUMN_NAMES_FROM_NUMPY,
        directory=tmp_dir,
        max_trials=1,
        seed=common.SEED)
    clf.fit(train_x, train_y, epochs=2, validation_data=(train_x, train_y))

    assert init.called
    assert fit.called
示例#19
0
def test_sequence():
    texts = ['The cat sat on the mat.',
             'The dog sat on the log.',
             'Dogs and cats living together.']
    dataset = tf.data.Dataset.from_tensor_slices(texts)
    new_dataset = run_preprocessor(
        preprocessor_module.TextToIntSequence(),
        dataset,
        common.generate_data(dtype='dataset'),
        tf.int64)
    assert isinstance(new_dataset, tf.data.Dataset)
    for (x,) in new_dataset:
        assert x.shape == (6,)
        break
示例#20
0
def test_add_early_stopping(_2, get_trials, _1, _, run_trial, tmp_dir):
    trial = kerastuner.engine.trial.Trial()
    trial.hyperparameters = kerastuner.HyperParameters()
    get_trials.return_value = [trial]
    input_shape = (32,)
    num_instances = 100
    num_classes = 10
    x = common.generate_data(num_instances=num_instances,
                             shape=input_shape,
                             dtype='dataset')
    y = common.generate_one_hot_labels(num_instances=num_instances,
                                       num_classes=num_classes,
                                       dtype='dataset')

    input_node = ak.Input(shape=input_shape)
    output_node = input_node
    output_node = ak.DenseBlock()(output_node)
    output_node = ak.ClassificationHead(num_classes=num_classes,
                                        output_shape=(num_classes,))(output_node)
    hypermodel = ak.hypermodel.graph.HyperBuiltGraphHyperModel(input_node,
                                                               output_node)
    tuner = ak.tuner.RandomSearch(
        hypermodel=hypermodel,
        objective='val_loss',
        max_trials=1,
        directory=tmp_dir,
        seed=common.SEED)
    tuner.search(x=tf.data.Dataset.zip((x, y)),
                 validation_data=(x, y),
                 epochs=20,
                 callbacks=[])

    _, kwargs = run_trial.call_args_list[0]
    callbacks = kwargs['callbacks']
    assert len(callbacks) == 1
    assert isinstance(callbacks[0], tf.keras.callbacks.EarlyStopping)

    _, kwargs = run_trial.call_args_list[1]
    callbacks = kwargs['callbacks']
    assert len(callbacks) == 0
示例#21
0
def test_image_regressor(tmp_dir):
    train_x = common.generate_data(num_instances=100, shape=(32, 32, 3))
    train_y = common.generate_data(num_instances=100, shape=(1, ))
    clf = ak.ImageRegressor(directory=tmp_dir, max_trials=2, seed=common.SEED)
    clf.fit(train_x, train_y, epochs=1, validation_split=0.2)
    assert clf.predict(train_x).shape == (len(train_x), 1)
示例#22
0
def test_text_input_with_illegal_dim():
    x = common.generate_data(shape=(32,))
    input_node = node.TextInput()
    with pytest.raises(ValueError) as info:
        x = input_node.transform(x)
    assert 'Expect the data to TextInput to have 1' in str(info.value)
示例#23
0
def test_image_input():
    x = common.generate_data()
    input_node = node.ImageInput()
    input_node.fit(x)
    assert isinstance(input_node.transform(x), tf.data.Dataset)