Exemplo n.º 1
0
def main(argv):
    """ Build, train and evaluate the model """
    args = parser.parse_args(argv[1:])

    (train_x, train_y), (test_x, test_y) = automobile_data.load_data()

    train_y /= args.price_norm_factor
    test_y /= args.price_norm_factor

    # build the training dataset
    training_input_fn = tf.estimator.inputs.pandas_input_fn(x=train_x,
                                                            y=train_y,
                                                            bach_size=64,
                                                            shuffle=True,
                                                            num_epochs=None)

    # build the validation data set
    eval_input_fn = tf.estimator.inputs.pandas_input_fn(x=test_x,
                                                        y=test_y,
                                                        batch_size=64,
                                                        shuffle=False)

    # build the estimator with simple linear regression
    # model = tf.estimator.LinearRegressor(feature_columns=automobile_data.features_columns(), model_dir=log_dir)

    # build the estimator with DNN regression
    model = tf.estimator.DNNRegressor(
        hidden_units=[50, 30, 10],
        feature_columns=automobile_data.features_columns(),
        model_dir=log_dir)

    # train the model
    # by default, the estimator log output every 100 steps
    model.train(input_fn=training_input_fn, steps=args.train_steps)

    # evaluate how the model performs on data it has not yet seen
    eval_result = model.evaluate(input_fn=eval_input_fn)

    # the evaluation retirns a python dictionary
    # the "average loss" key holds the Mean Suqared Error MSE
    average_loss = eval_result["average_loss"]

    # convert MSE to Root Mean Suqare Error
    print("\n" + 80 * "*")
    print("\nRMS error for the test set: ${:.0f}".format(
        args.price_norm_factor * average_loss**0.5))

    # run the model in prediction mode

    df = test_x[:2]
    pre_input_fn = tf.estimator.inputs.pandas_input_fn(x=df, shuffle=False)
    predict_results = model.predict(input_fn=pre_input_fn)

    # print prediction result
    print("\nPrediction results:")
    for i, prediction in enumerate(predict_results):
        print(args.price_norm_factor * prediction['predictions'])
    print()
Exemplo n.º 2
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  args = parser.parse_args(argv[1:])

  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()

  train_y /= args.price_norm_factor
  test_y /= args.price_norm_factor

  # Provide the training input dataset.
  train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)

  # Provide the validation input dataset.
  test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)

  # Use the same categorical columns as in `linear_regression_categorical`
  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
  body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
      key="body-style", vocabulary_list=body_style_vocab)
  make_column = tf.feature_column.categorical_column_with_hash_bucket(
      key="make", hash_bucket_size=50)

  feature_columns = [
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
      # Since this is a DNN model, categorical columns must be converted from
      # sparse to dense.
      # Wrap them in an `indicator_column` to create a
      # one-hot vector from the input.
      tf.feature_column.indicator_column(body_style_column),
      # Or use an `embedding_column` to create a trainable vector for each
      # index.
      tf.feature_column.embedding_column(make_column, dimension=3),
  ]

  # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns
  # defined above as input.
  model = tf.estimator.DNNRegressor(
      hidden_units=[20, 20], feature_columns=feature_columns)

  # Train the model.
  # By default, the Estimators log output every 100 steps.
  model.train(input_fn=train_input_fn, steps=args.train_steps)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=test_input_fn)

  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(args.price_norm_factor * average_loss**0.5))

  print()
Exemplo n.º 3
0
def main(argv):
    """Builds, trains, and evaluates the model."""
    args = parser.parse_args(argv[1:])

    (train_x, train_y), (test_x, test_y) = automobile_data.load_data()

    train_y /= args.price_norm_factor
    test_y /= args.price_norm_factor

    # Provide the training input dataset.
    train_input_fn = automobile_data.make_dataset(args.batch_size, train_x,
                                                  train_y, True, 1000)

    # Provide the validation input dataset.
    test_input_fn = automobile_data.make_dataset(args.batch_size, test_x,
                                                 test_y)

    # Use the same categorical columns as in `linear_regression_categorical`
    body_style_vocab = [
        "hardtop", "wagon", "sedan", "hatchback", "convertible"
    ]
    body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
        key="body-style", vocabulary_list=body_style_vocab)
    make_column = tf.feature_column.categorical_column_with_hash_bucket(
        key="make", hash_bucket_size=50)

    feature_columns = [
        tf.feature_column.numeric_column(key="curb-weight"),
        tf.feature_column.numeric_column(key="highway-mpg"),
        # Since this is a DNN model, categorical columns must be converted from sparse to dense.
        # Wrap them in an `indicator_column` to create a one-hot vector from the input.
        tf.feature_column.indicator_column(body_style_column),
        # Or use an `embedding_column` to create a trainable vector for each index.
        tf.feature_column.embedding_column(make_column, dimension=3),
    ]

    # Build a DNNRegressor, with 2x20-unit hidden layers, with the feature columns defined above as input.
    model = tf.estimator.DNNRegressor(hidden_units=[20, 20],
                                      feature_columns=feature_columns)

    # Train the model.
    # By default, the Estimators log output every 100 steps.
    model.train(input_fn=train_input_fn, steps=args.train_steps)

    # Evaluate how the model performs on data it has not yet seen.
    eval_result = model.evaluate(input_fn=test_input_fn)

    # The evaluation returns a Python dictionary. The "average_loss" key holds the Mean Squared Error (MSE).
    average_loss = eval_result["average_loss"]

    # Convert MSE to Root Mean Square Error (RMSE).
    print("\n" + 80 * "*")
    print("\nRMS error for the test set: ${:.0f}".format(
        args.price_norm_factor * average_loss**0.5))

    print()
def main(argv):
  """Builds, trains, and evaluates the model."""
  args = parser.parse_args(argv[1:])

  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()

  train_y /= args.price_norm_factor
  test_y /= args.price_norm_factor

  # Build the training dataset.
  train = (
      automobile_data.make_dataset(train_x, train_y)
      # Shuffling with a buffer larger than the data set ensures
      # that the examples are well mixed.
      .shuffle(1000).batch(args.batch_size)
      # Repeat forever
      .repeat())

  # Build the validation dataset.
  test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)

  # The following code demonstrates two of the ways that `feature_columns` can
  # be used to build a model with categorical inputs.

  # The first way assigns a unique weight to each category. To do this, you must
  # specify the category's vocabulary (values outside this specification will
  # receive a weight of zero).
  # Alternatively, you can define the vocabulary in a file (by calling
  # `categorical_column_with_vocabulary_file`) or as a range of positive
  # integers (by calling `categorical_column_with_identity`)
  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
  body_style_column = tf.feature_column.categorical_column_with_vocabulary_list(
      key="body-style", vocabulary_list=body_style_vocab)

  # The second way, appropriate for an unspecified vocabulary, is to create a
  # hashed column. It will create a fixed length list of weights, and
  # automatically assign each input category to a weight. Due to the
  # pseudo-randomness of the process, some weights may be shared between
  # categories, while others will remain unused.
  make_column = tf.feature_column.categorical_column_with_hash_bucket(
      key="make", hash_bucket_size=50)

  feature_columns = [
      # This model uses the same two numeric features as `linear_regressor.py`
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
      # This model adds two categorical colums that will adjust the price based
      # on "make" and "body-style".
      body_style_column,
      make_column,
  ]

  # Build the Estimator.
  model = tf.estimator.LinearRegressor(feature_columns=feature_columns)

  # Train the model.
  # By default, the Estimators log output every 100 steps.
  model.train(input_fn=from_dataset(train), steps=args.train_steps)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=from_dataset(test))

  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(args.price_norm_factor * average_loss**0.5))

  print()
def main(argv):
    """Builds, trains, and evaluates the model."""
    args = parser.parse_args(argv[1:])

    (train_x, train_y), (test_x, test_y) = automobile_data.load_data()

    train_y /= args.price_norm_factor
    test_y /= args.price_norm_factor

    # Provide the training input dataset.
    train_input_fn = automobile_data.make_dataset(args.batch_size, train_x,
                                                  train_y, True, 1000)

    # Provide the validation input dataset.
    test_input_fn = automobile_data.make_dataset(args.batch_size, test_x,
                                                 test_y)

    feature_columns = [
        # "curb-weight" and "highway-mpg" are numeric columns.
        tf.feature_column.numeric_column(key="curb-weight"),
        tf.feature_column.numeric_column(key="highway-mpg"),
    ]

    # Build the Estimator.
    model = tf.estimator.LinearRegressor(feature_columns=feature_columns)

    # Train the model.
    # By default, the Estimators log output every 100 steps.
    model.train(input_fn=train_input_fn, steps=args.train_steps)

    # Evaluate how the model performs on data it has not yet seen.
    eval_result = model.evaluate(input_fn=test_input_fn)

    # The evaluation returns a Python dictionary. The "average_loss" key holds the
    # Mean Squared Error (MSE).
    average_loss = eval_result["average_loss"]

    # Convert MSE to Root Mean Square Error (RMSE).
    print("\n" + 80 * "*")
    print("\nRMS error for the test set: ${:.0f}".format(
        args.price_norm_factor * average_loss**0.5))

    # Run the model in prediction mode.
    input_dict = {
        "curb-weight": np.array([2000, 3000]),
        "highway-mpg": np.array([30, 40])
    }

    # Provide the predict input dataset.
    predict_input_fn = automobile_data.make_dataset(1, input_dict)
    predict_results = model.predict(input_fn=predict_input_fn)

    # Print the prediction results.
    print("\nPrediction results:")
    for i, prediction in enumerate(predict_results):
        msg = ("Curb weight: {: 4d}lbs, "
               "Highway: {: 0d}mpg, "
               "Prediction: ${: 9.2f}")
        msg = msg.format(input_dict["curb-weight"][i],
                         input_dict["highway-mpg"][i],
                         args.price_norm_factor * prediction["predictions"][0])

        print("    " + msg)
    print()
Exemplo n.º 6
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  args = parser.parse_args(argv[1:])

  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()

  train_y /= args.price_norm_factor
  test_y /= args.price_norm_factor

  # Provide the training input dataset.
  train_input_fn = automobile_data.make_dataset(args.batch_size, train_x, train_y, True, 1000)

  # Provide the validation input dataset.
  test_input_fn = automobile_data.make_dataset(args.batch_size, test_x, test_y)

  feature_columns = [
      # "curb-weight" and "highway-mpg" are numeric columns.
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
  ]

  # Build the Estimator.
  model = tf.estimator.LinearRegressor(feature_columns=feature_columns)

  # Train the model.
  # By default, the Estimators log output every 100 steps.
  model.train(input_fn=train_input_fn, steps=args.train_steps)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=test_input_fn)

  # The evaluation returns a Python dictionary. The "average_loss" key holds the
  # Mean Squared Error (MSE).
  average_loss = eval_result["average_loss"]

  # Convert MSE to Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(args.price_norm_factor * average_loss**0.5))

  # Run the model in prediction mode.
  input_dict = {
      "curb-weight": np.array([2000, 3000]),
      "highway-mpg": np.array([30, 40])
  }

  # Provide the predict input dataset.
  predict_input_fn = automobile_data.make_dataset(1, input_dict)
  predict_results = model.predict(input_fn=predict_input_fn)

  # Print the prediction results.
  print("\nPrediction results:")
  for i, prediction in enumerate(predict_results):
    msg = ("Curb weight: {: 4d}lbs, "
           "Highway: {: 0d}mpg, "
           "Prediction: ${: 9.2f}")
    msg = msg.format(input_dict["curb-weight"][i], input_dict["highway-mpg"][i],
                     args.price_norm_factor * prediction["predictions"][0])

    print("    " + msg)
  print()
Exemplo n.º 7
0
def main(argv):
    """Builds, trains, and evaluates the model."""
    args = parser.parse_args(argv[1:])

    (train_x, train_y), (test_x, test_y) = automobile_data.load_data()

    train_y /= args.price_norm_factor
    test_y /= args.price_norm_factor

    # Provide the training input dataset.
    train_input_fn = automobile_data.make_dataset(args.batch_size, train_x,
                                                  train_y, True, 1000)

    # Build the validation dataset.
    test_input_fn = automobile_data.make_dataset(args.batch_size, test_x,
                                                 test_y)

    # The first way assigns a unique weight to each category. To do this you must
    # specify the category's vocabulary (values outside this specification will
    # receive a weight of zero). Here we specify the vocabulary using a list of
    # options. The vocabulary can also be specified with a vocabulary file (using
    # `categorical_column_with_vocabulary_file`). For features covering a
    # range of positive integers use `categorical_column_with_identity`.
    body_style_vocab = [
        "hardtop", "wagon", "sedan", "hatchback", "convertible"
    ]
    body_style = tf.feature_column.categorical_column_with_vocabulary_list(
        key="body-style", vocabulary_list=body_style_vocab)
    make = tf.feature_column.categorical_column_with_hash_bucket(
        key="make", hash_bucket_size=50)

    feature_columns = [
        tf.feature_column.numeric_column(key="curb-weight"),
        tf.feature_column.numeric_column(key="highway-mpg"),
        # Since this is a DNN model, convert categorical columns from sparse
        # to dense.
        # Wrap them in an `indicator_column` to create a
        # one-hot vector from the input.
        tf.feature_column.indicator_column(body_style),
        # Or use an `embedding_column` to create a trainable vector for each
        # index.
        tf.feature_column.embedding_column(make, dimension=3),
    ]

    # Build a custom Estimator, using the model_fn.
    # `params` is passed through to the `model_fn`.
    model = tf.estimator.Estimator(model_fn=my_dnn_regression_fn,
                                   params={
                                       "feature_columns": feature_columns,
                                       "learning_rate": 0.001,
                                       "optimizer": tf.train.AdamOptimizer,
                                       "hidden_units": [20, 20]
                                   })

    # Train the model.
    model.train(input_fn=train_input_fn, steps=args.train_steps)

    # Evaluate how the model performs on data it has not yet seen.
    eval_result = model.evaluate(input_fn=test_input_fn)

    # Print the Root Mean Square Error (RMSE).
    print("\n" + 80 * "*")
    print("\nRMS error for the test set: ${:.0f}".format(
        args.price_norm_factor * eval_result["rmse"]))

    print()
Exemplo n.º 8
0
def main(argv):
  """Builds, trains, and evaluates the model."""
  args = parser.parse_args(argv[1:])

  (train_x,train_y), (test_x, test_y) = automobile_data.load_data()

  train_y /= args.price_norm_factor
  test_y /= args.price_norm_factor

  # Build the training dataset.
  train = (
      automobile_data.make_dataset(train_x, train_y)
      # Shuffling with a buffer larger than the data set ensures
      # that the examples are well mixed.
      .shuffle(1000).batch(args.batch_size)
      # Repeat forever
      .repeat())

  # Build the validation dataset.
  test = automobile_data.make_dataset(test_x, test_y).batch(args.batch_size)

  # The first way assigns a unique weight to each category. To do this you must
  # specify the category's vocabulary (values outside this specification will
  # receive a weight of zero). Here we specify the vocabulary using a list of
  # options. The vocabulary can also be specified with a vocabulary file (using
  # `categorical_column_with_vocabulary_file`). For features covering a
  # range of positive integers use `categorical_column_with_identity`.
  body_style_vocab = ["hardtop", "wagon", "sedan", "hatchback", "convertible"]
  body_style = tf.feature_column.categorical_column_with_vocabulary_list(
      key="body-style", vocabulary_list=body_style_vocab)
  make = tf.feature_column.categorical_column_with_hash_bucket(
      key="make", hash_bucket_size=50)

  feature_columns = [
      tf.feature_column.numeric_column(key="curb-weight"),
      tf.feature_column.numeric_column(key="highway-mpg"),
      # Since this is a DNN model, convert categorical columns from sparse
      # to dense.
      # Wrap them in an `indicator_column` to create a
      # one-hot vector from the input.
      tf.feature_column.indicator_column(body_style),
      # Or use an `embedding_column` to create a trainable vector for each
      # index.
      tf.feature_column.embedding_column(make, dimension=3),
  ]

  # Build a custom Estimator, using the model_fn.
  # `params` is passed through to the `model_fn`.
  model = tf.estimator.Estimator(
      model_fn=my_dnn_regression_fn,
      params={
          "feature_columns": feature_columns,
          "learning_rate": 0.001,
          "optimizer": tf.train.AdamOptimizer,
          "hidden_units": [20, 20]
      })

  # Train the model.
  model.train(input_fn=from_dataset(train), steps=args.train_steps)

  # Evaluate how the model performs on data it has not yet seen.
  eval_result = model.evaluate(input_fn=from_dataset(test))

  # Print the Root Mean Square Error (RMSE).
  print("\n" + 80 * "*")
  print("\nRMS error for the test set: ${:.0f}"
        .format(args.price_norm_factor * eval_result["rmse"]))

  print()