예제 #1
0
def build_and_train_model(opts,
                          data_config_train,
                          data_config_eval,
                          output_dir,
                          num_epochs,
                          fake_training=False):
    """Compile and fit a Keras Criteo model."""
    model = build_model(opts)
    logging.info('Compiling model...')
    model.compile(
        keras.optimizers.Adam(opts.learning_rate),
        loss=keras.losses.BinaryCrossentropy(from_logits=False),
        metrics=[keras.metrics.BinaryAccuracy(),
                 keras.metrics.AUC()])

    logging.info('Building datasets...')
    bs = opts.batch_size
    steps_per_epoch = 2 if fake_training else data_lib.NUM_TRAIN_EXAMPLES // bs
    dataset_eval = data_lib.build_dataset(data_config_eval, batch_size=bs)
    dataset_train = data_lib.build_dataset(data_config_train,
                                           batch_size=bs,
                                           is_training=True)
    tensorboard_cb = keras.callbacks.TensorBoard(update_freq=int(1e5),
                                                 log_dir=output_dir,
                                                 write_graph=False)

    logging.info('Starting training...')
    model.fit(dataset_train,
              validation_data=dataset_eval,
              epochs=1 if fake_training else num_epochs,
              validation_steps=100,
              callbacks=[tensorboard_cb],
              steps_per_epoch=steps_per_epoch)
    return model
예제 #2
0
def run(model_dir,
        dataset_name,
        predictions_per_example,
        max_examples,
        output_dir,
        fake_data=False):
    """Runs predictions on the given dataset using the specified model."""
    tf.io.gfile.makedirs(output_dir)
    data_config = data_lib.DataConfig.from_name(dataset_name,
                                                fake_data=fake_data)
    dataset = data_lib.build_dataset(data_config, batch_size=_BATCH_SIZE)
    if max_examples:
        dataset = dataset.take(max_examples // _BATCH_SIZE)

    model = models_lib.load_trained_model(model_dir)

    logging.info('Starting predictions.')
    predictions = models_lib.make_predictions(model, dataset,
                                              predictions_per_example)

    array_utils.write_npz(output_dir, 'predictions_%s.npz' % dataset_name,
                          predictions)
    del predictions['probs_samples']
    array_utils.write_npz(output_dir,
                          'predictions_small_%s.npz' % dataset_name,
                          predictions)
예제 #3
0
    def test_build_dataset(self):
        config = data_lib.DataConfig(split='train', fake_data=True)
        dataset = data_lib.build_dataset(config,
                                         batch_size=8,
                                         is_training=False,
                                         fake_training=False)

        # Check output_shapes.
        features_shapes, label_shape = dataset.output_shapes
        self.assertEqual([None], label_shape.as_list())
        expected_keys = [
            data_lib.feature_name(i)
            for i in range(1, data_lib.NUM_TOTAL_FEATURES + 1)
        ]
        self.assertSameElements(expected_keys, list(features_shapes.keys()))
        for key, shape in six.iteritems(features_shapes):
            self.assertEqual([None], shape.as_list(),
                             'Unexpected shape at key=' + key)

        # Check output_types.
        features_types, label_type = tf.compat.v1.data.get_output_types(
            dataset)
        self.assertEqual(tf.float32, label_type)
        for idx in data_lib.INT_FEATURE_INDICES:
            self.assertEqual(tf.float32,
                             features_types[data_lib.feature_name(idx)])
        for idx in data_lib.CAT_FEATURE_INDICES:
            self.assertEqual(tf.string,
                             features_types[data_lib.feature_name(idx)])