Пример #1
0
def get_adanet_model():
    # Estimator configuration.
    runConfig = tf.estimator.RunConfig(save_checkpoints_steps=100,
                                       save_summary_steps=100,
                                       tf_random_seed=RANDOM_SEED)
    estimator = adanet.Estimator(
        model_dir=OUTPUT_DIR,
        # adanet_loss_decay=0.99,
        head=tf.contrib.estimator.binary_classification_head(),
        subnetwork_generator=simple_dnn.Generator(
            learn_mixture_weights=True,
            dropout=CONFIG["DROPOUT"],
            feature_columns=bidding_data.
            get_feature_columns_for_imp_prediction(),
            optimizer=tf.train.RMSPropOptimizer(
                learning_rate=ADANET_LEARNING_RATE),
            seed=RANDOM_SEED),
        max_iteration_steps=NUM_EPOCHS // ADANET_ITERATIONS,
        evaluator=adanet.Evaluator(
            input_fn=lambda: bidding_data.validation_input_fn_for_predict_imp(
                batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS),
            steps=EVAL_STEPS),
        config=runConfig)

    return estimator
Пример #2
0
def get_adanet_model():
    LEARNING_RATE = 0.003  #@param {type:"number"}
    TRAIN_STEPS = NUM_EPOCHS  #@param {type:"integer"}
    # BATCH_SIZE = 64  #@param {type:"integer"}
    ADANET_ITERATIONS = 8  #@param {type:"integer"}

    RANDOM_SEED = 42
    # Estimator configuration.
    runConfig = tf.estimator.RunConfig(save_checkpoints_steps=100,
                                       save_summary_steps=100,
                                       tf_random_seed=RANDOM_SEED)
    classifier = estimator = adanet.Estimator(
        model_dir=OUTPUT_DIR,
        adanet_loss_decay=0.99,
        head=tf.contrib.estimator.binary_classification_head(),
        subnetwork_generator=simple_dnn.Generator(
            learn_mixture_weights=True,
            dropout=0.5,
            feature_columns=bidding_data.get_feature_columns_for_wr_prediction(
            ),
            optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            seed=RANDOM_SEED),
        max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
        evaluator=adanet.Evaluator(
            input_fn=lambda: bidding_data.validation_input_fn_for_predict_wr(
                batch_size=BATCH_SIZE, num_epochs=TRAIN_STEPS),
            steps=EVAL_STEPS),
        config=runConfig)

    return classifier
Пример #3
0
def train_and_evaluate(x_train, y_train, x_test, y_test,
                       learning_rate,
                       train_steps,
                       batch_size,
                       learn_mixture_weights,
                       adanet_lambda, boosting_iterations):
    """Trains an adanet.Estimator` to predict housing prices."""

    estimator = adanet.Estimator(
      # Since we are predicting housing prices, we'll use a regression
      # head that optimizes for MSE.
      head=tf.contrib.estimator.regression_head(
          loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE),

      # Define the generator, which defines our search space of subnetworks
      # to train as candidates to add to the final AdaNet model.
      subnetwork_generator=subnetworkGenerator.SimpleDNNGenerator(
          optimizer=tf.train.RMSPropOptimizer(learning_rate),
          learn_mixture_weights=learn_mixture_weights,
          seed=utils.RANDOM_SEED),

      # Lambda is a the strength of complexity regularization. A larger
      # value will penalize more complex subnetworks.
      adanet_lambda=adanet_lambda,

      # The number of train steps per iteration.
      max_iteration_steps=train_steps // boosting_iterations,

      # The evaluator will evaluate the model on the full training set to
      # compute the overall AdaNet loss (train loss + complexity
      # regularization) to select the best candidate to include in the
      # final AdaNet model.
      evaluator=adanet.Evaluator(
          input_fn=utils.input_fn(x_train, y_train, x_test, y_test,
                                  "train", training=False, batch_size=batch_size)),

      # The report materializer will evaluate the subnetworks' metrics
      # using the full training set to generate the reports that the generator
      # can use in the next iteration to modify its search space.
      report_materializer=adanet.ReportMaterializer(
          input_fn=utils.input_fn(x_train, y_train, x_test, y_test, "train", training=False, batch_size=batch_size)),

      # Configuration for Estimators.
      config=tf.estimator.RunConfig(
          save_checkpoints_steps=50000,
          save_summary_steps=50000,
          tf_random_seed=utils.RANDOM_SEED))

    # Train and evaluate using the tf.estimator tooling.
    train_spec = tf.estimator.TrainSpec(
        input_fn=utils.input_fn(x_train, y_train, x_test, y_test, "train", training=True, batch_size=batch_size),
        max_steps=train_steps)
    eval_spec = tf.estimator.EvalSpec(
        input_fn=utils.input_fn(x_train, y_train, x_test, y_test, "test", training=False, batch_size=batch_size),
        steps=None)

    return tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Пример #4
0
def cnn_ada():
    print("==============================================")
    start = datetime.datetime.now()
    print("Start Train Adanet with [CNN Model] on Mnist at %s" %
          time_str(start))
    print("- - - - - - - - - - - - - - - - - - - - - - - -")

    LEARNING_RATE = 0.05  # @param {type:"number"}
    TRAIN_STEPS = 5000  # @param {type:"integer"}
    BATCH_SIZE = 64  # @param {type:"integer"}
    ADANET_ITERATIONS = 2  # @param {type:"integer"}

    model_dir = os.path.join(LOG_DIR, "cnn_%s" % time_str(start))

    config = tf.estimator.RunConfig(save_checkpoints_steps=50000,
                                    save_summary_steps=50000,
                                    tf_random_seed=RANDOM_SEED,
                                    model_dir=model_dir)

    max_iteration_steps = TRAIN_STEPS // ADANET_ITERATIONS
    estimator = adanet.Estimator(
        head=head,
        subnetwork_generator=SimpleCNNGenerator(
            learning_rate=LEARNING_RATE,
            max_iteration_steps=max_iteration_steps,
            seed=RANDOM_SEED),
        max_iteration_steps=max_iteration_steps,
        evaluator=adanet.Evaluator(input_fn=input_fn("train",
                                                     training=False,
                                                     batch_size=BATCH_SIZE),
                                   steps=None),
        report_materializer=adanet.ReportMaterializer(input_fn=input_fn(
            "train", training=False, batch_size=BATCH_SIZE),
                                                      steps=None),
        adanet_loss_decay=.99,
        config=config)

    results, _ = tf.estimator.train_and_evaluate(
        estimator,
        train_spec=tf.estimator.TrainSpec(input_fn=input_fn(
            "train", training=True, batch_size=BATCH_SIZE),
                                          max_steps=TRAIN_STEPS),
        eval_spec=tf.estimator.EvalSpec(input_fn=input_fn(
            "test", training=False, batch_size=BATCH_SIZE),
                                        steps=None))

    print("Accuracy:", results["accuracy"])
    print("Loss:", results["average_loss"])

    end = datetime.datetime.now()
    print("Training end at %s" % time_str(end))
    print("Time Spend %s" % str(end - start))

    print("==============================================")
Пример #5
0
def train_and_evaluate(experiment_name,
                       learn_mixture_weights=LEARN_MIXTURE_WEIGHTS,
                       adanet_lambda=ADANET_LAMBDA):
    """Trains an `adanet.Estimator` to predict housing prices."""

    model_dir = os.path.join(LOG_DIR, experiment_name)

    estimator = adanet.Estimator(
        # Since we are predicting housing prices, we'll use a regression
        # head that optimizes for MSE.
        head=tf.contrib.estimator.regression_head(
            loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE),

        # Define the generator, which defines our search space of subnetworks
        # to train as candidates to add to the final AdaNet model
        subnetwork_generator=SimpleDNNGenerator(
            optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            learn_mixture_weights=learn_mixture_weights,
            seed=RANDOM_SEED),

        # Lambda is a the strength of complexity regularization. A larger
        # value will penalize more complex subnetworks.
        adanet_lambda=adanet_lambda,

        # The number of train steps per iteration.
        max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,

        # The evaluator will evaluate the model on the full training set to
        # compute the overall AdaNet loss (train loss + complexity
        # regularization) to select the best candidate to include in the
        # final AdaNet model.
        evaluator=adanet.Evaluator(
            input_fn=input_fn("train", training=False, batch_size=BATCH_SIZE)),

        # Configuration for Estimators
        config=tf.estimator.RunConfig(save_summary_steps=5000,
                                      save_checkpoints_steps=5000,
                                      tf_random_seed=RANDOM_SEED,
                                      model_dir=model_dir))

    # Train and evaluate using using the tf.estimator tooling.
    train_spec = tf.estimator.TrainSpec(input_fn=input_fn(
        "train", training=True, batch_size=BATCH_SIZE),
                                        max_steps=TRAIN_STEPS)
    eval_spec = tf.estimator.EvalSpec(
        input_fn=input_fn("test", training=False, batch_size=BATCH_SIZE),
        steps=None,
        start_delay_secs=1,
        throttle_secs=30,
    )
    return tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
Пример #6
0
 def create_estimator(
     self,
     subnetwork_generator,
     adanet_iterations: int,
     evaluator: Evaluator,
     config: tf.estimator.RunConfig,
 ) -> tf.estimator.Estimator:
     return adanet.Estimator(
         head=self.head,
         subnetwork_generator=subnetwork_generator,
         max_iteration_steps=self.train_steps // adanet_iterations,
         evaluator=evaluator,
         config=config,
     )
Пример #7
0
def dnn_ada():
    print("==============================================")
    start = datetime.datetime.now()
    print("Start Train Adanet with [DNN Model] on Mnist at %s" %
          time_str(start))
    print("- - - - - - - - - - - - - - - - - - - - - - - -")

    LEARNING_RATE = 0.003
    TRAIN_STEPS = 5000
    BATCH_SIZE = 64
    ADANET_ITERATIONS = 2

    model_dir = os.path.join(LOG_DIR, "dnn_%s" % time_str(start))

    config = tf.estimator.RunConfig(save_checkpoints_steps=50000,
                                    save_summary_steps=50000,
                                    tf_random_seed=RANDOM_SEED,
                                    model_dir=model_dir)

    estimator = adanet.Estimator(
        head=head,
        subnetwork_generator=simple_dnn.Generator(
            feature_columns=feature_columns,
            optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            seed=RANDOM_SEED),
        max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
        evaluator=adanet.Evaluator(input_fn=input_fn("train",
                                                     training=False,
                                                     batch_size=BATCH_SIZE),
                                   steps=None),
        config=config)

    results, _ = tf.estimator.train_and_evaluate(
        estimator,
        train_spec=tf.estimator.TrainSpec(input_fn=input_fn(
            "train", training=True, batch_size=BATCH_SIZE),
                                          max_steps=TRAIN_STEPS),
        eval_spec=tf.estimator.EvalSpec(input_fn=input_fn(
            "test", training=False, batch_size=BATCH_SIZE),
                                        steps=None))

    print("Accuracy:", results["accuracy"])
    print("Loss:", results["average_loss"])

    end = datetime.datetime.now()
    print("Training end at %s" % time_str(end))
    print("Time Spend %s" % str(end - start))
    print("==============================================")
Пример #8
0
    def get_estimator(self):
        estimator = adanet.Estimator(
            head=self.head,
            subnetwork_generator=simple_dnn.Generator(
                feature_columns=self.feature_columns,
                optimizer=tf.train.RMSPropOptimizer(
                    learning_rate=self.LEARNING_RATE),
                seed=self.RANDOM_SEED),
            max_iteration_steps=self.TRAIN_STEPS // self.ADANET_ITERATIONS,
            evaluator=adanet.Evaluator(input_fn=self.input_fn(
                "train",
                training=False,
                batch_size=self.BATCH_SIZE,
                RANDOM_SEED=self.RANDOM_SEED),
                                       steps=None),
            config=self.config)

        return estimator
Пример #9
0
    def train_and_evaluate(output_neurons, LEARNING_RATE,
                           learn_mixture_weights, adanet_lambda,
                           max_iteration_steps, TRAIN_STEPS, BATCH_SIZE,
                           RANDOM_SEED, train_test, model_dir):
        AdaNetEstimator = adanet.Estimator(
            # head instance computes loss and evaluation metrics for every candidate
            head=ah.head_classify(output_neurons),
            # defines candidate subnetworks to train and evaluate at every AdaNet iteration
            subnetwork_generator=SimpleDNNGenerator(
                optimizer=tf.train.AdamOptimizer(
                    learning_rate=LEARNING_RATE),  # adagrad
                layer_size=ah.hidden_layer_neurons(2, train_test),
                learn_mixture_weights=learn_mixture_weights,
                seed=RANDOM_SEED),
            adanet_lambda=adanet_lambda,
            max_iteration_steps=max_iteration_steps,

            # reports are made available to subnetwork_generator in next iteration report_materializer=
            # adanet.ReportMaterializer(input_fn=ah.input_fn("train", training=False, batch_size=BATCH_SIZE,
            # train_test=train_test)),
            config=tf.estimator.RunConfig(
                # save_checkpoints_steps=50000,
                # save_summary_steps=50000,
                tf_random_seed=RANDOM_SEED,
                model_dir=model_dir))

        # Determines input data for training
        train_spec = tf.estimator.TrainSpec(input_fn=ah.input_fn(
            "train",
            training=True,
            batch_size=BATCH_SIZE,
            train_test=train_test),
                                            max_steps=TRAIN_STEPS)

        # Combines evaluation metrics of trained models
        eval_spec = tf.estimator.EvalSpec(input_fn=ah.input_fn(
            "test",
            training=False,
            batch_size=BATCH_SIZE,
            train_test=train_test),
                                          steps=None)

        return tf.estimator.train_and_evaluate(AdaNetEstimator, train_spec,
                                               eval_spec)
Пример #10
0
def get_adanet_model():
    # Estimator configuration.
    # distribution_strategy = tf.contrib.distribute.MirroredStrategy()
    session_config = tf.ConfigProto(log_device_placement=True)
    session_config.gpu_options.allow_growth = True
    session_config.gpu_options.per_process_gpu_memory_fraction = 0.8

    runConfig = tf.estimator.RunConfig(
        # train_distribute=distribution_strategy,
        # eval_distribute=distribution_strategy,
        session_config=session_config,
        save_checkpoints_steps=100,
        save_summary_steps=100,
        tf_random_seed=RANDOM_SEED)
    estimator = adanet.Estimator(
        model_dir=OUTPUT_DIR,
        # metric_fn=custom_metrics,
        # adanet_loss_decay=0.99,
        head=tf.contrib.estimator.multi_label_head(
            name="name",
            n_classes=len(CONFIG['LABELS']),
            # classes_for_class_based_metrics= [5,6]
        ),
        subnetwork_generator=simple_dnn.Generator(
            learn_mixture_weights=True,
            dropout=CONFIG["DROPOUT"],
            feature_columns=data.get_feature_columns(),
            optimizer=tf.train.AdamOptimizer(
                learning_rate=ADANET_LEARNING_RATE),
            seed=RANDOM_SEED),
        max_iteration_steps=NUM_EPOCHS // ADANET_ITERATIONS,
        evaluator=adanet.Evaluator(input_fn=lambda: data.validation_input_fn(
            batch_size=BATCH_SIZE, num_epochs=NUM_EPOCHS),
                                   steps=EVAL_STEPS),
        config=runConfig)

    return estimator
Пример #11
0
    print("Training Done.")
    print("==" * 20)
    print("Accuracy: ", results["accuracy"])
    print("    Loss: ", results["loss"])
    print("==" * 20)

    tf.compat.v1.logging.info("Start to evaluate model with test data.")
    tf.compat.v1.logging.info("Restore model.")

    network_generator_cls = train.MODEL_MAPPER[args.experiment_name]
    network_generator = network_generator_cls(hparams.learning_rate)
    estimator = adanet.Estimator(
        head=AdanetTrainer(1000).head,
        max_iteration_steps=500,
        subnetwork_generator=network_generator.build_subnetwork_generator(
            hparams.unflatten),
        model_dir=args.model_dir,
    )

    test_dataset = MNISTDataset(tf.estimator.ModeKeys.PREDICT, args.dataset_id)

    tf.compat.v1.logging.info("Prepare test data.")
    test_predictions = estimator.predict(
        test_dataset.get_input_fn(hparams.batch_size))
    test_predictions = [pred for pred in test_predictions]

    class_ids = np.array([r["class_ids"].item() for r in test_predictions])
    answers = test_dataset.data[1]

    test_accuracy = (class_ids == answers) / len(answers)
Пример #12
0
#@title Parameters
LEARNING_RATE = 0.05  #@param {type:"number"}
TRAIN_STEPS = 8000  #@param {type:"integer"}
BATCH_SIZE = 3600 #@param {type:"integer"}
ADANET_ITERATIONS = 3  #@param {type:"integer"}

max_iteration_steps = TRAIN_STEPS // ADANET_ITERATIONS
estimator = adanet.Estimator(
    head=head,
    subnetwork_generator=SimpleCNNGenerator(
        learning_rate=LEARNING_RATE,
        max_iteration_steps=max_iteration_steps,
        seed=RANDOM_SEED),
    max_iteration_steps=max_iteration_steps,
    evaluator=adanet.Evaluator(
        input_fn=input_fn("train", training=False, batch_size=BATCH_SIZE),
        steps=None),
    report_materializer=adanet.ReportMaterializer(
        input_fn=input_fn("train", training=False, batch_size=BATCH_SIZE),
        steps=None),
    adanet_loss_decay=.99,
    config=config)

results, _ = tf.estimator.train_and_evaluate(
    estimator,
    train_spec=tf.estimator.TrainSpec(
        input_fn=input_fn("train", training=True, batch_size=BATCH_SIZE),
        max_steps=TRAIN_STEPS),
    eval_spec=tf.estimator.EvalSpec(
        input_fn=input_fn("test", training=False, batch_size=BATCH_SIZE),
Пример #13
0
def run_adanet():

    EPOCHS = 10
    BATCH_SIZE = 32

    #x_train, y_train,x_test = load_images()
    x_train, y_train, x_test = load_dog_breed()

    y_train = pd.get_dummies(y_train, sparse=True)

    y_train = np.asarray(y_train)

    x_train = x_train / 255  # map values between 0 and 1
    x_test = x_test / 255  # map values between 0 and 1

    #x_train = x_train.astype(np.float32) # cast values to float32
    #x_test = x_test.astype(np.float32)   # cast values to float32

    #y_train = y_train.astype(np.int32) # cast values to int32

    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_train},
                                                        y=y_train,
                                                        batch_size=BATCH_SIZE,
                                                        num_epochs=EPOCHS,
                                                        shuffle=False)

    adanet_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_train},
                                                         y=y_train,
                                                         batch_size=BATCH_SIZE,
                                                         num_epochs=1,
                                                         shuffle=False)

    test_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_test},
                                                       batch_size=BATCH_SIZE,
                                                       num_epochs=1,
                                                       shuffle=False)

    head = tf.contrib.estimator.multi_class_head(120)
    estimator = adanet.Estimator(head=head,
                                 subnetwork_generator=CNNGenerator(),
                                 max_iteration_steps=200,
                                 evaluator=adanet.Evaluator(
                                     input_fn=adanet_input_fn, steps=None),
                                 adanet_loss_decay=.99)

    results, _ = tf.estimator.train_and_evaluate(
        estimator,
        train_spec=tf.estimator.TrainSpec(input_fn=train_input_fn,
                                          max_steps=200),
        eval_spec=tf.estimator.EvalSpec(input_fn=train_input_fn, steps=None))

    predictions = estimator.predict(input_fn=test_input_fn)

    preds = list()
    for i, val in enumerate(predictions):
        predicted_class = val['class_ids'][0]
        print(val['probabilities'])
        preds.append(predicted_class)
        prediction_confidence = val['probabilities'][predicted_class] * 100

    print("Accuracy:", results["accuracy"])
    print("Loss:", results["average_loss"])

    id_test = pd.read_csv("datasets/aerial-cactus/sample_submission.csv")

    submission = pd.DataFrame({"id": id_test.id.values, "has_cactus": preds})

    submission.to_csv("invasive_adanet_submission.csv", index=False)
Пример #14
0
def map_fun(args, ctx):
    from datetime import datetime
    import tensorflow as tf
    import os
    import time
    import json

    import adanet
    from adanet.examples import simple_dnn

    worker_num = ctx.worker_num
    job_name = ctx.job_name
    task_index = ctx.task_index
    message = "worker_num: {0}, job_name: {1}, task_index: {2}".format(
        worker_num, job_name, task_index)
    print(message)
    input_dim = int(args.input_dim)
    batch_size = args.batch_size
    # Fix Random Seed
    RANDOM_SEED = 42

    FEATURES_KEY = "features"

    NUM_CLASSES = 2

    loss_reduction = tf.losses.Reduction.SUM_OVER_BATCH_SIZE

    # head = tf.contrib.estimator.multi_class_head(NUM_CLASSES, loss_reduction=loss_reduction)
    head = tf.contrib.estimator.binary_classification_head(
        loss_reduction=loss_reduction)

    # numeric_column do not support SparseTensor
    feature_columns = [
        tf.feature_column.numeric_column(key=FEATURES_KEY, shape=[input_dim])
    ]

    log_dir = ctx.absolute_path(args.log_dir)
    export_dir = ctx.absolute_path(args.export_dir)
    pred_dir = ctx.absolute_path(args.prediction_dir)
    print("tensorflow log path: {0}".format(log_dir))
    print("tensorflow export path: {0}".format(export_dir))
    print("tensorflow prediction path: {0}".format(pred_dir))

    def generator(ln):
        splits = tf.string_split([ln], delimiter=" ")
        label = splits.values[0]
        label = tf.string_to_number(label, tf.float64)
        label = tf.cond(
            label >= 1.0,
            lambda: tf.constant(1, shape=[1], dtype=tf.float32),
            lambda: tf.constant(0, shape=[1], dtype=tf.float32),
        )

        # SparseTensor output
        col_val = tf.string_split(splits.values[1::], delimiter=":")
        col = tf.string_to_number(col_val.values[0::2], tf.int64) - 1

        vals = col_val.values[1::2]
        vals = tf.string_to_number(vals, tf.float32)

        # Filter the features which occurs few than given input_dim
        vals = tf.boolean_mask(vals, col < input_dim)
        col = tf.boolean_mask(col, col < input_dim)

        row = tf.cast(tf.fill(tf.shape(col), 0), tf.int64, name="row_cast")
        row_col = tf.transpose(tf.stack([row, col]), name="row_col_transpose")

        sparse = tf.SparseTensor(row_col, vals, (1, input_dim))

        # convert to dense,191106 必须转
        features = {FEATURES_KEY: tf.sparse_tensor_to_dense(sparse)}

        return features, label

    def new_input_fn(partition, training):
        def _input_fn():
            # path is ok
            parse_fn = generator

            if partition == "train":
                data_dir = ctx.absolute_path(args.data_dir)
                file_pattern = os.path.join(data_dir, "part-*")
                ds = tf.data.Dataset.list_files(file_pattern, shuffle=False)

                ds = ds.apply(
                    tf.contrib.data.parallel_interleave(
                        tf.data.TextLineDataset, cycle_length=10))
                ds = ds.map(parse_fn, num_parallel_calls=5)
                if training:
                    ds = ds.shuffle(batch_size * 5).repeat()
            else:
                data_dir = ctx.absolute_path(args.test_dir)
                file_pattern = os.path.join(data_dir, "part-*")
                ds = tf.data.Dataset.list_files(file_pattern, shuffle=False)

                ds = ds.apply(
                    tf.contrib.data.parallel_interleave(
                        tf.data.TextLineDataset, cycle_length=10))
                ds = ds.map(parse_fn, num_parallel_calls=5)

            iterator = ds.make_one_shot_iterator()
            features, labels = iterator.get_next()
            return features, labels

            # ds = ds.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
            # return ds.batch(batch_size)

        return _input_fn

    print("========= Start Training")
    LEARNING_RATE = 0.01
    TRAIN_STEPS = 3000
    ADANET_ITERATIONS = 3  # AKA Boosting Iteration
    # 控制模型复杂度
    ADANET_LAMBDA = 0.1
    LEARN_MIXTURE_WEIGHTS = False

    #strategy = adanet.distributed.RoundRobinStrategy()

    # 191125 这里一定要设置
    tfc = json.dumps({
        "cluster": ctx.cluster_spec,
        "task": {
            "type": job_name,
            "index": task_index
        }
    })
    os.environ["TF_CONFIG"] = tfc

    # 191127 尝试不用 device_filter,用了 strategy 后会自动设置为 /job:ps,不需要时候手动设置
    config = tf.estimator.RunConfig(
        save_checkpoints_steps=5000,
        tf_random_seed=RANDOM_SEED,
        model_dir=log_dir,
    )

    # config = tf.estimator.RunConfig(
    #     save_checkpoints_steps=5000,
    #     tf_random_seed=RANDOM_SEED,
    #     model_dir=logdir,
    #     session_config=tf.ConfigProto(
    #         log_device_placement=False, device_filters=["/job:ps"]
    #     ),
    # )

    # BaseLine Linear
    # estimator = tf.estimator.LinearClassifier(
    #     feature_columns=feature_columns,
    #     n_classes=NUM_CLASSES,
    #     optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
    #     loss_reduction=loss_reduction,
    #     config=config
    # )

    # DNN TEST - ADANET
    estimator = adanet.Estimator(
        head=head,
        force_grow=True,
        subnetwork_generator=simple_dnn.Generator(
            layer_size=128,
            initial_num_layers=2,
            dropout=0.2,
            feature_columns=feature_columns,
            optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            learn_mixture_weights=LEARN_MIXTURE_WEIGHTS,
            seed=RANDOM_SEED,
        ),
        adanet_lambda=ADANET_LAMBDA,
        max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
        #evaluator=adanet.Evaluator(input_fn=new_input_fn("test", False)),
        evaluator=adanet.Evaluator(input_fn=new_input_fn("test", False),
                                   steps=1000),
        config=config,
        #experimental_placement_strategy=strategy,
        # 记录 report,实际上没啥用
        #     report_materializer=adanet.ReportMaterializer(
        #         input_fn=new_input_fn("train", False),
        #     ),
    )

    # 尝试不 return 任何东西,只是计算
    tf.estimator.train_and_evaluate(
        estimator,
        train_spec=tf.estimator.TrainSpec(input_fn=new_input_fn("train", True),
                                          max_steps=TRAIN_STEPS),
        # 这里的 Eval 在分布式场景下,实际上并没有任何作用
        eval_spec=tf.estimator.EvalSpec(
            input_fn=new_input_fn("test", False),
            steps=None,
            start_delay_secs=1,
            throttle_secs=30,
        ),
    )

    # 最后一轮只训练,模型参数会保存到 model.ckpt,并不会再为下一轮去做准备

    # 参考 https://github.com/tensorflow/adanet/blob/master/adanet/core/estimator_test.py
    # line 2362 def test_export_saved_model_always_uses_replication_placement(self):
    def serving_input_receiver_fn():
        serialized_sample = tf.compat.v1.placeholder(dtype=tf.float32,
                                                     shape=[None, input_dim],
                                                     name='features')
        tensor_features = {'features': serialized_sample}
        return tf.estimator.export.ServingInputReceiver(
            features=tensor_features, receiver_tensors=serialized_sample)

    # 在 RoundRobinStrategy 下无法执行
    if ctx.job_name == "chief":
        # 进行预测,分别是测试和训练
        print('export test result')
        predictions = estimator.predict(new_input_fn("test", False))
        print('Writing Predictions to {}'.format(pred_dir))
        tf.gfile.MakeDirs(pred_dir)
        with tf.gfile.GFile("{}/test".format(pred_dir), 'w') as f:
            for pred in predictions:
                f.write(str(pred))
                f.write('\n')
        print('export train result')
        predictions = estimator.predict(new_input_fn("train", False))
        print('Writing Predictions to {}'.format(pred_dir))
        tf.gfile.MakeDirs(pred_dir)
        with tf.gfile.GFile("{}/train".format(pred_dir), 'w') as f:
            for pred in predictions:
                f.write(str(pred))
                f.write('\n')
        # 导出模型
        estimator.export_saved_model(
            export_dir,
            serving_input_receiver_fn,
            experimental_mode=tf.estimator.ModeKeys.PREDICT)
Пример #15
0
def build_adanet_estimator(subnetwork_generator):
    '''Build ADANET estimator.

    Input : 
        *   subnetwork_generator : subnetwork generator that generates a candidate 
            that is provided to ADANET weaklearner algorithm.
        *   nn_type : type of neural network candidate to be generated. It is used 
            to select mixture weights type : scalar, vector or matrix.
        *   feature_shape : shape of features.

    Output : 
        *   adanet_estimator : ADANET estimator instance of adanet.Estimator class.
    '''
    
    output_dir = './tmp/adanet'
    
    #---------------------------------------------------------------------------
    # Get parameters from configuration file.
    #---------------------------------------------------------------------------
    dict_adanet_config = p8_util_config.dict_adanet_config
    feature_shape = dict_adanet_config['adanet_feature_shape']
    
    dict_nn_layer_config = dict_adanet_config['adanet_nn_layer_config']
    nn_type = dict_nn_layer_config['nn_type']
    
    #---------------------------------------------------------------------------
    # Fixe mixture weights
    #---------------------------------------------------------------------------
    if 'RNN' == nn_type :
        mixture_weight_type=MixtureWeightType.VECTOR
    else :
        mixture_weight_type=MixtureWeightType.MATRIX

    ensembler = ComplexityRegularizedEnsembler(mixture_weight_type=mixture_weight_type\
                                            , adanet_lambda=p8_util_config.dict_adanet_config['adanet_lambda'])
    dataset_type = p8_util_config.DATASET_TYPE
    input_fn_param={'num_epochs':p8_util_config.NUM_EPOCHS,\
                    'batch_size':p8_util_config.BATCH_SIZE,\
                    'feature_shape': feature_shape,\
                    'dataset_type': dataset_type
                   }

    train_input_fn=input_fn_2("train", input_fn_param)
    
    model_name = build_model_name(nn_type)
    
    adanet_estimator_config, output_dir_log= make_config(model_name\
                                        , output_dir=output_dir\
                                        , is_restored=False)
                                        
    nb_class = p8_util_config.dict_adanet_config['adanet_nn_layer_config']['nn_logit_dimension']
        
    feature_columns, loss_reduction, tf_head \
        = get_tf_head(feature_shape, nb_class, nn_type=nn_type, feature_shape=feature_shape)
    
    p8_util_config.dict_adanet_config['adanet_feature_columns'] = feature_columns    
    p8_util_config.dict_adanet_config['adanet_feature_shape'] = feature_shape
    
    adanet_estimator = adanet.Estimator(
        head=p8_util_config.dict_adanet_config['adanet_tf_head'],
        subnetwork_generator=subnetwork_generator,
        ensemblers        = [ensembler],

        max_iteration_steps=p8_util_config.ADANET_MAX_ITERATION_STEPS,

        evaluator=adanet.Evaluator(
            input_fn=train_input_fn,
            steps=None),
        config=  adanet_estimator_config)
    return adanet_estimator, output_dir_log
Пример #16
0
def map_fun(args, ctx):
    from datetime import datetime
    import tensorflow as tf
    import os
    import time
    import json

    import adanet
    from adanet.examples import simple_dnn

    worker_num = ctx.worker_num
    job_name = ctx.job_name
    task_index = ctx.task_index
    message = "worker_num: {0}, job_name: {1}, task_index: {2}".format(
        worker_num, job_name, task_index)
    print(message)
    input_dim = int(args.input_dim)
    batch_size = args.batch_size
    # Fix Random Seed
    RANDOM_SEED = 42

    FEATURES_KEY = "features"

    loss_reduction = tf.losses.Reduction.SUM_OVER_BATCH_SIZE

    def weighted_cross_entropy_with_logits(labels, logits):
        return tf.nn.weighted_cross_entropy_with_logits(targets=labels,
                                                        logits=logits,
                                                        pos_weight=4)

    head = tf.contrib.estimator.binary_classification_head(
        loss_reduction=loss_reduction,
        loss_fn=weighted_cross_entropy_with_logits)

    # numeric_column do not support SparseTensor
    feature_columns = [
        tf.feature_column.numeric_column(key=FEATURES_KEY, shape=[input_dim])
    ]

    log_dir = ctx.absolute_path(args.log_dir)
    export_dir = ctx.absolute_path(args.export_dir)
    pred_dir = ctx.absolute_path(args.prediction_dir)
    print("tensorflow log path: {0}".format(log_dir))
    print("tensorflow export path: {0}".format(export_dir))
    print("tensorflow prediction path: {0}".format(pred_dir))

    def generator(ln):
        splits = tf.string_split([ln], delimiter=" ")
        label = splits.values[0]
        label = tf.string_to_number(label, tf.float64)
        label = tf.cond(
            label >= 1.0,
            lambda: tf.constant(1, shape=[1], dtype=tf.float32),
            lambda: tf.constant(0, shape=[1], dtype=tf.float32),
        )

        # SparseTensor output
        col_val = tf.string_split(splits.values[1::], delimiter=":")
        col = tf.string_to_number(col_val.values[0::2], tf.int64) - 1

        vals = col_val.values[1::2]
        vals = tf.string_to_number(vals, tf.float32)

        # Filter the features which occurs few than given input_dim
        vals = tf.boolean_mask(vals, col < input_dim)
        col = tf.boolean_mask(col, col < input_dim)

        row = tf.cast(tf.fill(tf.shape(col), 0), tf.int64, name="row_cast")
        row_col = tf.transpose(tf.stack([row, col]), name="row_col_transpose")

        sparse = tf.SparseTensor(row_col, vals, (1, input_dim))

        # convert to dense,191106 必须转
        features = {FEATURES_KEY: tf.sparse_tensor_to_dense(sparse)}

        return features, label

    def new_input_fn(partition, training):
        def _input_fn():
            # path is ok
            parse_fn = generator

            if partition == "train":
                data_dir = ctx.absolute_path(args.data_dir)
                file_pattern = os.path.join(data_dir, "part-*")
                ds = tf.data.Dataset.list_files(file_pattern, shuffle=False)

                ds = ds.apply(
                    tf.contrib.data.parallel_interleave(
                        tf.data.TextLineDataset, cycle_length=10))
                ds = ds.map(parse_fn, num_parallel_calls=5)
                if training:
                    ds = ds.shuffle(batch_size * 5).repeat()
            else:
                data_dir = ctx.absolute_path(args.test_dir)
                file_pattern = os.path.join(data_dir, "part-*")
                ds = tf.data.Dataset.list_files(file_pattern, shuffle=False)

                ds = ds.apply(
                    tf.contrib.data.parallel_interleave(
                        tf.data.TextLineDataset, cycle_length=10))
                ds = ds.map(parse_fn, num_parallel_calls=5)

            iterator = ds.make_one_shot_iterator()
            features, labels = iterator.get_next()
            return features, labels

            # ds = ds.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
            # return ds.batch(batch_size)

        return _input_fn

    print("========= Start Training")
    LEARNING_RATE = 0.01
    TRAIN_STEPS = 1000
    ADANET_ITERATIONS = 4  # AKA Boosting Iteration
    # 控制模型复杂度
    ADANET_LAMBDA = 0.1
    LEARN_MIXTURE_WEIGHTS = False

    #strategy = adanet.distributed.RoundRobinStrategy()

    # 191125 这里一定要设置
    tfc = json.dumps({
        "cluster": ctx.cluster_spec,
        "task": {
            "type": job_name,
            "index": task_index
        }
    })
    os.environ["TF_CONFIG"] = tfc

    # 191127 尝试不用 device_filter,用了 strategy 后会自动设置为 /job:ps,不需要时候手动设置
    config = tf.estimator.RunConfig(
        save_checkpoints_steps=5000,
        tf_random_seed=RANDOM_SEED,
        model_dir=log_dir,
    )

    # estimator = tf.estimator.LinearEstimator(
    #     head=head,
    #     feature_columns=feature_columns,
    #     config=config
    #
    # )

    # config = tf.estimator.RunConfig(
    #     save_checkpoints_steps=5000,
    #     tf_random_seed=RANDOM_SEED,
    #     model_dir=logdir,
    #     session_config=tf.ConfigProto(
    #         log_device_placement=False, device_filters=["/job:ps"]
    #     ),
    # )

    # DNN TEST - ADANET
    estimator = adanet.Estimator(
        head=head,
        force_grow=False,
        subnetwork_generator=simple_dnn.Generator(
            layer_size=128,
            initial_num_layers=1,
            dropout=0.2,
            feature_columns=feature_columns,
            optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            learn_mixture_weights=LEARN_MIXTURE_WEIGHTS,
            seed=RANDOM_SEED,
        ),
        adanet_lambda=ADANET_LAMBDA,
        max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
        evaluator=adanet.Evaluator(input_fn=new_input_fn("test", False),
                                   steps=1000),
        config=config,
    )

    # ensemble_estimator = adanet.AutoEnsembleEstimator(
    #     head=head,
    #     candidate_pool= lambda config: {
    #         "linear1":
    #             tf.estimator.LinearEstimator(
    #                 head=head,
    #                 feature_columns=feature_columns,
    #                 optimizer=tf.train.RMSPropOptimizer(learning_rate=0.1),
    #                 config=config,
    #             ),
    #         "dnn1":
    #             tf.estimator.DNNEstimator(
    #                 head=head,
    #                 feature_columns=feature_columns,
    #                 optimizer=tf.train.RMSPropOptimizer(learning_rate=0.001),
    #                 hidden_units=[512, 256, 128],
    #                 config=config,
    #             ),
    #         "dnn2":
    #             tf.estimator.DNNEstimator(
    #                 head=head,
    #                 feature_columns=feature_columns,
    #                 optimizer=tf.train.RMSPropOptimizer(learning_rate=0.01),
    #                 hidden_units=[256, 128],
    #                 config=config,
    #             ),
    #         "dnn_linear":
    #             tf.estimator.DNNLinearCombinedEstimator(
    #                 head=head,
    #                 dnn_feature_columns=feature_columns,
    #                 linear_feature_columns=feature_columns,
    #                 dnn_hidden_units=[512, 256, 128],
    #                 config=config,
    #             )
    #     },
    #     max_iteration_steps=100,
    # )

    cur_e = estimator

    # 尝试不 return 任何东西,只是计算
    tf.estimator.train_and_evaluate(
        cur_e,
        train_spec=tf.estimator.TrainSpec(input_fn=new_input_fn("train", True),
                                          max_steps=TRAIN_STEPS),
        # 这里的 Eval 在分布式场景下,实际上并没有任何作用
        eval_spec=tf.estimator.EvalSpec(
            input_fn=new_input_fn("test", False),
            steps=None,
            start_delay_secs=1,
            throttle_secs=30,
        ),
    )

    # 最后一轮只训练,模型参数会保存到 model.ckpt,并不会再为下一轮去做准备
    # 这样的保存方式,需要输入是一个 example,不适合 DSP 的输入
    # feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
    # serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)

    def serving_input_receiver_fn():
        indices = tf.placeholder(dtype=tf.int64,
                                 shape=[None, None],
                                 name='indices')
        values = tf.placeholder(dtype=tf.float32, shape=[None], name='values')
        shape = tf.placeholder(dtype=tf.int64,
                               shape=[None],
                               name='dense_shape')
        receiver_input = {
            'indices': indices,
            'values': values,
            'dense_shape': shape
        }
        # 先构成 sparse,然后 sparse_to_dense
        sparse = tf.SparseTensor(indices, values, shape)
        features = {FEATURES_KEY: tf.sparse_tensor_to_dense(sparse)}

        return tf.estimator.export.ServingInputReceiver(
            features, receiver_input)

    # 在 RoundRobinStrategy 下无法执行
    if ctx.job_name == "chief":
        # 进行 evaluate,比较慢,跳过

        # predictions = cur_e.predict(new_input_fn("test", False))
        # result = cur_e.evaluate(new_input_fn("test", False))
        # with tf.gfile.GFile("{}/evaluate".format(log_dir), 'w') as f:
        #     f.write(str(result))
        #     f.write('\n')
        # 进行预测,分别是测试和训练
        # print('export test result')
        # predictions = estimator.predict(new_input_fn("test", False))
        # print('Writing Predictions to {}'.format(pred_dir))
        # tf.gfile.MakeDirs(pred_dir)
        # with tf.gfile.GFile("{}/test".format(pred_dir), 'w') as f:
        #     for pred in predictions:
        #         f.write(str(pred))
        #         f.write('\n')
        # print('export train result')
        # predictions = estimator.predict(new_input_fn("train", False))
        # print('Writing Predictions to {}'.format(pred_dir))
        # tf.gfile.MakeDirs(pred_dir)
        # with tf.gfile.GFile("{}/train".format(pred_dir), 'w') as f:
        #     for pred in predictions:
        #         f.write(pred['classes'][0])
        #         f.write('\n')
        # 导出模型
        # 191204 这样导出没有办法指定 serving 时的输出,
        cur_e.export_saved_model(export_dir, serving_input_receiver_fn)
Пример #17
0
def map_fun_v2(args, ctx):
    from datetime import datetime
    import tensorflow as tf
    import time

    worker_num = ctx.worker_num
    job_name = ctx.job_name
    task_index = ctx.task_index

    # Parameters
    IMAGE_PIXELS = 28
    hidden_units = 128
    # Fix Random Seed
    RANDOM_SEED = 42

    (x_train, y_train), (x_test,
                         y_test) = (tf.keras.datasets.mnist.load_data())

    FEATURES_KEY = "images"

    NUM_CLASSES = 10

    loss_reduction = tf.losses.Reduction.SUM_OVER_BATCH_SIZE

    head = tf.contrib.estimator.multi_class_head(NUM_CLASSES,
                                                 loss_reduction=loss_reduction)

    feature_columns = [
        tf.feature_column.numeric_column(FEATURES_KEY, shape=[28, 28, 1])
    ]

    # Get TF cluster and server instances
    cluster, server = ctx.start_cluster_server(1, args.rdma)

    def generator(images, labels):
        """Returns a generator that returns image-label pairs."""
        def _gen():
            for image, label in zip(images, labels):
                yield image, label

        return _gen

    def preprocess_image(image, label):
        """Preprocesses an image for an `Estimator`."""
        image = image / 255.
        image = tf.reshape(image, [28, 28, 1])
        features = {FEATURES_KEY: image}
        return features, label

    def input_fn(partition, training):
        """Generate an input_fn for the Estimator."""
        def _input_fn():
            if partition == "train":
                dataset = tf.data.Dataset.from_generator(
                    generator(x_train, y_train), (tf.float32, tf.int32),
                    ((28, 28), ()))
            else:
                dataset = tf.data.Dataset.from_generator(
                    generator(x_test, y_test), (tf.float32, tf.int32),
                    ((28, 28), ()))

            if training:
                dataset = dataset.shuffle(10 * args.batch_size,
                                          seed=RANDOM_SEED).repeat()

            dataset = dataset.map(preprocess_image).batch(args.batch_size)
            iterator = dataset.make_one_shot_iterator()
            features, labels = iterator.get_next()
            return features, labels

        return _input_fn

    if job_name == "ps":
        server.join()
    elif job_name == "worker":
        # Assigns ops to the local worker by default
        # 这里的日志都是看不到的
        message = ""
        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % task_index,
                    cluster=cluster)):
            print("========= Start Training")
            LEARNING_RATE = 0.003
            TRAIN_STEPS = 5000
            BATCH_SIZE = 64
            ADANET_ITERATIONS = 2

            logdir = ctx.absolute_path(args.model)

            config = tf.estimator.RunConfig(save_checkpoints_steps=50000,
                                            save_summary_steps=50000,
                                            tf_random_seed=RANDOM_SEED,
                                            model_dir=logdir)

            # 先测试下线性模型
            # estimator = tf.estimator.LinearClassifier(
            #     feature_columns=feature_columns,
            #     n_classes=NUM_CLASSES,
            #     optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            #     loss_reduction=loss_reduction,
            #     config=config
            # )

            estimator = adanet.Estimator(
                head=head,
                subnetwork_generator=simple_dnn.Generator(
                    feature_columns=feature_columns,
                    optimizer=tf.train.RMSPropOptimizer(
                        learning_rate=LEARNING_RATE),
                    seed=RANDOM_SEED),
                max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
                evaluator=adanet.Evaluator(input_fn=input_fn("train",
                                                             training=False),
                                           steps=None),
                config=config)

            results, _ = tf.estimator.train_and_evaluate(
                estimator,
                train_spec=tf.estimator.TrainSpec(input_fn=input_fn(
                    "train", training=True),
                                                  max_steps=TRAIN_STEPS),
                eval_spec=tf.estimator.EvalSpec(input_fn=input_fn(
                    "test", training=False),
                                                steps=None))

            print("Accuracy:", results["accuracy"])
            print("Loss:", results["average_loss"])
            message = "Accuracy: {}; Loss: {}".format(results["accuracy"],
                                                      results["average_loss"])
            print("==============================================")

        print("{} stopping MonitoredTrainingSession".format(
            datetime.now().isoformat()))

        # WORKAROUND FOR https://github.com/tensorflow/tensorflow/issues/21745
        # wait for all other nodes to complete (via done files)
        done_dir = "{}/{}/done".format(ctx.absolute_path(args.model),
                                       args.mode)
        print("Writing done file to: {}".format(done_dir))
        tf.gfile.MakeDirs(done_dir)
        with tf.gfile.GFile("{}/{}".format(done_dir, ctx.task_index),
                            'w') as done_file:
            done_file.write("done")
            done_file.write(message)

        for i in range(60):
            if len(tf.gfile.ListDirectory(done_dir)) < len(
                    ctx.cluster_spec['worker']):
                print("{} Waiting for other nodes {}".format(
                    datetime.now().isoformat(), i))
                time.sleep(1)
            else:
                print("{} All nodes done".format(datetime.now().isoformat()))
                break
Пример #18
0
def map_fun(args, ctx):
    from datetime import datetime
    import tensorflow as tf
    import os
    import time

    worker_num = ctx.worker_num
    job_name = ctx.job_name
    task_index = ctx.task_index
    message = 'worker_num: {0}, job_name: {1}, task_index: {2}'.format(worker_num, job_name, task_index)

    input_dim = int(args.input_dim)
    batch_size = args.batch_size

    # Fix Random Seed
    RANDOM_SEED = 42

    FEATURES_KEY = "ctr"

    NUM_CLASSES = 2

    loss_reduction = tf.losses.Reduction.SUM_OVER_BATCH_SIZE

    # head = tf.contrib.estimator.multi_class_head(NUM_CLASSES, loss_reduction=loss_reduction)
    head = tf.contrib.estimator.binary_classification_head(loss_reduction=loss_reduction)

    # 用 numeric_column 是不支持 SparseTensor 的
    feature_columns = [
        tf.feature_column.numeric_column(FEATURES_KEY, shape=[input_dim])
    ]

    log_dir = ctx.absolute_path(args.log_dir)
    export_dir = ctx.absolute_path(args.export_dir)
    print("tensorflow log path: {0}".format(log_dir))
    print("tensorflow export path: {0}".format(export_dir))

    # Get TF cluster and server instances
    cluster, server = ctx.start_cluster_server(1, args.rdma)

    def generator(ln):
        splits = tf.string_split([ln], delimiter=' ')
        label = splits.values[0]
        label = tf.string_to_number(label, tf.float64)
        label = tf.cond(label >= 1.0,
                        lambda: tf.constant(1, shape=[1], dtype=tf.float32),
                        lambda: tf.constant(0, shape=[1], dtype=tf.float32))

        # SparseTensor output
        col_val = tf.string_split(splits.values[1::], delimiter=':')
        col = tf.string_to_number(col_val.values[0::2], tf.int64) - 1

        vals = col_val.values[1::2]
        vals = tf.string_to_number(vals, tf.float32)

        # Filter the features which occurs few than given input_dim
        vals = tf.boolean_mask(vals, col < input_dim)
        col = tf.boolean_mask(col, col < input_dim)

        row = tf.cast(tf.fill(tf.shape(col), 0), tf.int64, name='row_cast')
        row_col = tf.transpose(tf.stack([row, col]), name='row_col_transpose')

        sparse = tf.SparseTensor(row_col, vals, (1, input_dim))

        # 转换成 dense
        features = {FEATURES_KEY: tf.sparse_tensor_to_dense(sparse)}

        return features, label

    def input_fn(partition):
        """Generate an input_fn for the Estimator."""

        def _input_fn():
            num_workers = len(ctx.cluster_spec['worker'])

            data_dir = ctx.absolute_path(args.data_dir)
            file_pattern = os.path.join(data_dir, 'part-*')
            ds = tf.data.Dataset.list_files(file_pattern)
            ds = ds.shard(num_workers, task_index).repeat(args.epochs)

            if args.format == 'libsvm':
                ds = ds.apply(tf.contrib.data.parallel_interleave(tf.data.TextLineDataset, cycle_length=10))
                parse_fn = generator

            if partition == "train":
                ds = ds.map(parse_fn, num_parallel_calls=5).shuffle(batch_size * 5)
            else:
                ds = ds.map(parse_fn, num_parallel_calls=5)

            ds = ds.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)).prefetch(100)
            iterator = ds.make_one_shot_iterator()
            features, labels = iterator.get_next()
            return features, labels

        return _input_fn

    if job_name == "ps":
        server.join()
    elif job_name == "worker":
        # Assigns ops to the local worker by default
        # 这里的日志都是看不到的
        message = ""
        with tf.device(tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % task_index,
                cluster=cluster)):
            print("========= Start Training")
            LEARNING_RATE = 0.003
            TRAIN_STEPS = 1000
            ADANET_ITERATIONS = 2

            # 目前来看效果不是很好,还不如线性
            logdir = ctx.absolute_path(args.log_dir)

            config = tf.estimator.RunConfig(
                save_checkpoints_steps=50000,
                save_summary_steps=50000,
                tf_random_seed=RANDOM_SEED,
                model_dir=logdir
            )

            # BaseLine 线性模型
            # estimator = tf.estimator.LinearClassifier(
            #     feature_columns=feature_columns,
            #     n_classes=NUM_CLASSES,
            #     optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            #     loss_reduction=loss_reduction,
            #     config=config
            # )

            # DNN 测试 - ADANET
            estimator = adanet.Estimator(
                head=head,
                subnetwork_generator=simple_dnn.Generator(
                    layer_size=128,
                    initial_num_layers=3,
                    dropout=0.2,
                    feature_columns=feature_columns,
                    optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
                    seed=RANDOM_SEED),
                max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
                evaluator=adanet.Evaluator(
                    input_fn=input_fn("train"),
                    steps=None
                ),
                config=config
            )

            results, _ = tf.estimator.train_and_evaluate(
                estimator,
                train_spec=tf.estimator.TrainSpec(
                    input_fn=input_fn("train"),
                    max_steps=TRAIN_STEPS),
                eval_spec=tf.estimator.EvalSpec(
                    input_fn=input_fn("test"),
                    steps=None)
            )

            print("Accuracy:", results["accuracy"])
            print("Loss:", results["average_loss"])
            message = "Accuracy: {}; Loss: {}".format(results["accuracy"], results["average_loss"])
            arch = results["architecture/adanet/ensembles"]
            summary_proto = tf.summary.Summary.FromString(arch)
            arch_result = summary_proto.value[0].tensor.string_val[0]
            print("==============================================")


    print("{} stopping MonitoredTrainingSession".format(datetime.now().isoformat()))

    # WORKAROUND for https://github.com/tensorflow/tensorflow/issues/21745
    # wait for all other nodes to complete (via done files)
    done_dir = "{}/{}/done".format(ctx.absolute_path(args.log_dir), args.mode)
    print("Writing done file to: {}".format(done_dir))
    tf.gfile.MakeDirs(done_dir)
    with tf.gfile.GFile("{}/{}".format(done_dir, ctx.task_index), 'w') as done_file:
        done_file.write(message)
        done_file.write(arch_result)

    for i in range(30):
        if len(tf.gfile.ListDirectory(done_dir)) < len(ctx.cluster_spec['worker']):
            print("{} Waiting for other nodes {}".format(datetime.now().isoformat(), i))
            time.sleep(1)
        else:
            print("{} All nodes done".format(datetime.now().isoformat()))
            break
Пример #19
0
    def estimator(self,
                  data_provider,
                  run_config,
                  hparams,
                  train_steps=None,
                  seed=None):
        """Returns an AdaNet `Estimator` for train and evaluation.

    Args:
      data_provider: Data `Provider` for dataset to model.
      run_config: `RunConfig` object to configure the runtime settings.
      hparams: `HParams` instance defining custom hyperparameters.
      train_steps: number of train steps.
      seed: An integer seed if determinism is required.

    Returns:
      Returns an `Estimator`.
    """

        max_iteration_steps = int(train_steps / hparams.boosting_iterations)

        optimizer_fn = optimizer.fn_with_name(
            hparams.optimizer,
            learning_rate_schedule=hparams.learning_rate_schedule,
            cosine_decay_steps=max_iteration_steps)
        hparams.add_hparam("total_training_steps", max_iteration_steps)

        if hparams.generator == GeneratorType.SIMPLE:
            subnetwork_generator = improve_nas.Generator(
                feature_columns=data_provider.get_feature_columns(),
                optimizer_fn=optimizer_fn,
                iteration_steps=max_iteration_steps,
                checkpoint_dir=run_config.model_dir,
                hparams=hparams,
                seed=seed)
        elif hparams.generator == GeneratorType.DYNAMIC:
            subnetwork_generator = improve_nas.DynamicGenerator(
                feature_columns=data_provider.get_feature_columns(),
                optimizer_fn=optimizer_fn,
                iteration_steps=max_iteration_steps,
                checkpoint_dir=run_config.model_dir,
                hparams=hparams,
                seed=seed)
        else:
            raise ValueError("Invalid generator: `%s`" % hparams.generator)

        evaluator = None
        if hparams.use_evaluator:
            evaluator = adanet.Evaluator(input_fn=data_provider.get_input_fn(
                partition="train",
                mode=tf.estimator.ModeKeys.EVAL,
                batch_size=hparams.evaluator_batch_size),
                                         steps=hparams.evaluator_steps)

        return adanet.Estimator(
            head=data_provider.get_head(),
            subnetwork_generator=subnetwork_generator,
            max_iteration_steps=max_iteration_steps,
            adanet_lambda=hparams.adanet_lambda,
            adanet_beta=hparams.adanet_beta,
            mixture_weight_type=hparams.mixture_weight_type,
            force_grow=hparams.force_grow,
            evaluator=evaluator,
            config=run_config,
            model_dir=run_config.model_dir)
Пример #20
0
def dnn_ada():
    print("==============================================")
    start = datetime.datetime.now()
    print("Start Train Adanet with [DNN Model] on Criteo at %s" % time_str(start))
    print("- - - - - - - - - - - - - - - - - - - - - - - -")

    # 根据论文参数调整
    LEARNING_RATE = LR

    model_dir = os.path.join(LOG_DIR, "dnn_%s" % time_str(start))
    result_file = os.path.join(RESULT_DIR, "dnn_%s" % time_str(start))
    valid_file = os.path.join(RESULT_DIR, "valid_%s" % time_str(start))
    test_file = os.path.join(RESULT_DIR, "test_%s" % time_str(start))
    tpred_file = os.path.join(RESULT_DIR, "tpred_%s" % time_str(start))
    vpred_file = os.path.join(RESULT_DIR, "vpred_%s" % time_str(start))

    config = tf.estimator.RunConfig(
        save_checkpoints_steps=50000,
        save_summary_steps=50000,
        tf_random_seed=RANDOM_SEED,
        model_dir=model_dir
    )

    # layer size 125 256 512
    estimator = adanet.Estimator(
        head=head,
        subnetwork_generator=simple_dnn.Generator(
            feature_columns=feature_columns,
            layer_size=LS,
            optimizer=tf.train.RMSPropOptimizer(learning_rate=LEARNING_RATE),
            seed=RANDOM_SEED),
        max_iteration_steps=TRAIN_STEPS // ADANET_ITERATIONS,
        evaluator=adanet.Evaluator(
            input_fn=input_fn("train"),
            steps=None),
        config=config
    )

    results, _ = tf.estimator.train_and_evaluate(
        estimator,
        train_spec=tf.estimator.TrainSpec(
            input_fn=input_fn("train"),
            max_steps=TRAIN_STEPS),
        eval_spec=tf.estimator.EvalSpec(
            input_fn=input_fn("test"),
            steps=None)
    )

    print("Accuracy:", results["accuracy"])
    print("AUC", results["auc"])
    print("Loss:", results["average_loss"])

    # 重新获取评测结果
    train_spec = estimator.evaluate(input_fn=input_fn("train"))
    test_spec = estimator.evaluate(input_fn=input_fn("test"))

    end = datetime.datetime.now()
    print("Training end at %s" % time_str(end))
    print("Time Spend %s" % str(end - start))
    print("==============================================")
    with open('{}.txt'.format(result_file), 'w') as f:
        f.write('Train Configs:\n')
        f.write('[Layer Size] {}\n'.format(LS))
        f.write('[Learning Rate] {}\n'.format(LR))
        f.write('[BATCH SIZE] {}\n'.format(BATCH_SIZE))
        f.write('[Train Step] {}\n'.format(TRAIN_STEPS))
        f.write('[Adanet Iteration] {}\n'.format(ADANET_ITERATIONS))
        f.write('\nResults:\n')
        f.write('[Accurary] {}\n'.format(results["accuracy"]))
        f.write('[AUC] {}\n'.format(results["auc"]))
        f.write('[Loss] {}\n'.format(results["average_loss"]))
        f.write('[Time Spend] {}\n'.format(str(end - start)))
        f.write('[Train Spec] {}\n'.format(str(train_spec)))
        f.write('[Test Spec] {}\n'.format(str(test_spec)))

    # 写入测试集
    print("export test data")
    test.to_csv('{}.txt'.format(test_file))
    print("export train data")
    train.to_csv('{}.txt'.format(valid_file))

    # 进行预测
    predictions = estimator.predict(input_fn=input_fn("test"))
    # 写入预测集
    with open('{}.txt'.format(tpred_file), 'w') as f:
        for pred in predictions:
            f.write(str(pred))
            f.write('\n')

    # 进行预测并写入预测集
    predictions = estimator.predict(input_fn=input_fn("valid"))
    # 写入预测集
    with open('{}.txt'.format(vpred_file), 'w') as f:
        for pred in predictions:
            f.write(str(pred))
            f.write('\n')