Exemplo n.º 1
0
 def __init__(self, evaluate_every_n_games, job_dir):
     self.score_for_winning_position_history = []
     self.evaluate_every_n_games = evaluate_every_n_games
     self.job_dir = job_dir
     self.pct_loss_vs_minimax_history = []
     self.minimax_agent = MinimaxAgent(TicTacToe)
     self.hpt = hypertune.HyperTune()
Exemplo n.º 2
0
def _train_and_evaluate(estimator, dataset_path, output_dir):
    """Runs model training and evaluation.

    Args:
        estimator: (pipeline.Pipeline), Pipeline instance, assemble pre-processing
        steps and model training
        dataset_path: (string), Path containing training data
        output_dir: (string), directory that the trained model will be exported

    Returns:
        None
    """
    estimator.fit(dataset_path)

    loss = estimator.score(dataset_path)

    logging.info(loss)

    # Write model and eval metrics to `output_dir`
    model_output_path = os.path.join(output_dir, "model", MODEL_FILE_NAME)

    dump_object(estimator, model_output_path)

    # The default name of the metric is training/hptuning/metric.
    # We recommend that you assign a custom name
    # The only functional difference is that if you use a custom name,
    # you must set the hyperparameterMetricTag value in the
    # HyperparameterSpec object in your job request to match your chosen name.
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag="loss",
        metric_value=loss,
        global_step=1000,
    )
Exemplo n.º 3
0
def test(args, model, device, test_loader, epoch):
  model.eval()
  test_loss = 0
  correct = 0
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output = model(data)
      test_loss += F.nll_loss(
          output, target, size_average=False).item()  # sum up batch loss
      pred = output.max(
          1, keepdim=True)[1]  # get the index of the max log-probability
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
      test_loss, correct, len(test_loader.dataset),
      100. * correct / len(test_loader.dataset)))

  # Uses hypertune to report metrics for hyperparameter tuning.
  hpt = hypertune.HyperTune()
  hpt.report_hyperparameter_tuning_metric(
      hyperparameter_metric_tag='my_loss',
      metric_value=test_loss,
      global_step=epoch)
Exemplo n.º 4
0
def main(args):
    paths = {}
    if (args.cloud_type.lower() == "gcp"):
        paths = gcp_path_setup(args)
    elif (args.cloud_type.lower() in ("aws")):
        paths = aws_path_setup(args)
    elif (args.cloud_type.lower() in ("azure")):
        paths = azure_path_setup(args)

    config_params = {}
    config_params['CV_folds'] = args.cv_folds
    config_params['compute'] = args.compute_type
    config_params['dataset'] = 'airline'
    config_params['dataset_filename'] = args.data_name
    config_params['cloud_type'] = args.cloud_type
    config_params['model_type'] = args.model_type
    config_params['num_samples'] = args.num_samples
    config_params['paths'] = paths
    config_params['do_ax_hpo'] = args.do_ax_hpo
    config_params['ht_est_range'] = args.ht_est_range
    config_params['ht_depth_range'] = args.ht_depth_range
    config_params['ht_features_range'] = args.ht_features_range
    config_params['ht_experiments'] = args.ht_experiments

    if ('RandomForest' in args.model_type):
        model_params = {
            'max_depth': args.hpo_max_depth,
            'max_features': args.hpo_max_features,
            'n_bins': args.hpo_num_bins,
            'n_estimators': args.hpo_num_est,
            'seed': random.random(),
            # 'seed': 0
        }
    elif ('XGBoost' in args.model_type):
        model_params = {
            'alpha':
            args.hpo_alpha,
            'gamma':
            args.hpo_gamma,
            'lambda':
            args.hpo_lambda,
            'learning_rate':
            args.hpo_lr,
            'max_depth':
            args.hpo_max_depth,
            'num_boost_round':
            args.hpo_num_boost_round,
            'random_state':
            0,
            'tree_method':
            'gpu_hist' if ('GPU' in config_params['compute']) else 'hist'
        }

    model, accuracy = train(model_params=model_params,
                            config_params=config_params)

    if (args.cloud_type.lower() in ("gcp", ) and args.do_hpo):
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='hpo_accuracy', metric_value=accuracy)
def _train_and_evaluate(estimator, dataset, model_dir):
    """Runs model training and evaluation."""
    x_train, y_train, x_eval, y_eval = dataset
    estimator.fit(x_train, y_train)
    logging.info("Completed training XGBOOST model")

    bst = estimator.get_booster()
    bst_filename = 'model.bst'
    bst.save_model(bst_filename)
    model_output_path = os.path.join(model_dir, bst_filename)
    utils.upload_blob(model_output_path.split("/")[2], bst_filename,
                      "/".join(model_output_path.split("/")[3:]))
    logging.info("Successfully uploaded file to GCS at location %s",
                 model_dir)
    y_pred = estimator.predict(x_eval)

    # Binarize multiclass labels
    lb = preprocessing.LabelBinarizer()
    lb.fit(y_eval)
    y_test = lb.transform(y_eval)
    y_pred = lb.transform(y_pred)

    score = metrics.roc_auc_score(y_test, y_pred, average='macro')
    logging.info("AUC Score: %s", str(score))

    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='roc_auc',
        metric_value=score,
        global_step=1000
    )
Exemplo n.º 6
0
def _train_model_report_metrics(tree_params,
                                make_validation_labels_purchase_only):
    logging.info("setting group for dataset...")
    train_data = set_group_for_dataset(_LOCAL_TRAIN_FILE, query_id_column)
    valid_data = set_group_for_dataset(_LOCAL_VALID_FILE, query_id_column)

    alpha_values = np.arange(0.0, 1.1, 0.1)
    best_eval_result = []
    for alpha in alpha_values:
        evals_result = {}
        train_data.alpha = alpha
        valid_data.alpha = alpha
        logging.info("Training model...")
        lgb.train(
            params=tree_params,
            train_set=train_data,
            valid_sets=[valid_data],
            fobj=combined_objective,
            feval=combined_eval,
            # callbacks=[lgb.print_evaluation()],
            evals_result=evals_result)
        best_eval_result.append(_get_best_eval_result(evals_result))
    df = pandas.DataFrame(zip(alpha_values, best_eval_result))
    print(df)

    eval_scores = evals_result['valid_0']['ndcg_1']

    hpt = hypertune.HyperTune()
    for idx, score in enumerate(eval_scores):
        epoch = idx + 1
        _report_metric(hpt, epoch, score)
Exemplo n.º 7
0
def tune():
    X, y = get_data()

    too = torch.optim.Adam, torch.optim.Adadelta, torch.optim.Adagrad, torch.optim.ASGD
    to = ht.CategoricalParameter('torch_optimizer', options=too)
    eta = ht.ContinuousParameter('eta', lower_bound=1e-10, upper_bound=1e-1)
    mi = ht.DiscreteParameter('max_iter', lower_bound=1e2, upper_bound=1e4)

    hl1 = ht.DiscreteParameter('', lower_bound=10, upper_bound=100)
    hl2 = ht.DiscreteParameter('', lower_bound=10, upper_bound=100)
    hls = ht.TupleParameter('hidden_layer_sizes', values=(hl1, hl2))

    tp1 = ht.CategoricalParameter('', options=(nn.Linear, ))
    tp2 = ht.CategoricalParameter('', options=(nn.Linear, ))
    tp3 = ht.CategoricalParameter('', options=(nn.Linear, ))
    top = ht.TupleParameter('topology', values=(tp1, tp2, tp3))

    hypers = [to, eta, mi, hls, top]

    tuner = ht.HyperTune(algorithm=Net,
                         parameters=hypers,
                         train_func=Net.fit,
                         objective_func=Net.mse,
                         train_func_args=(X, y),
                         objective_func_args=(X, y),
                         max_evals=100,
                         maximize=False,
                         num_replications=1)

    tuner.tune()
    print(tuner.get_results())
Exemplo n.º 8
0
def train_and_report_metrics(xs,
                             ys,
                             num_repeat,
                             extractor_class,
                             useless_var_for_hparam_search=None):
    """
    Trains the model multiple times with the same parameters and returns the average metrics
    """

    all_val_auc = []
    all_val_accuracy = []

    for i in range(num_repeat):
        single_train_metrics = extractor_class().train_single_run(xs, ys, i)

        all_val_auc.append(single_train_metrics['val_auc'])
        all_val_accuracy.append(single_train_metrics['val_accuracy'])

    metrics = {
        "mean_val_auc": np.mean(all_val_auc),
        "mean_val_accuracy": np.mean(all_val_accuracy),
        "val_auc_std": np.std(all_val_auc),
        "val_accuracy_std": np.std(all_val_accuracy)
    }

    print(metrics, flush=True)

    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='mean_val_auc',
        metric_value=metrics['mean_val_auc'])

    return metrics
Exemplo n.º 9
0
def train_evaluate(job_dir, training_dataset_path, validation_dataset_path,
                   alpha, max_iter, hptune):
    """Trains the Covertype Classifier model."""

    df_train = pd.read_csv(training_dataset_path)
    df_validation = pd.read_csv(validation_dataset_path)

    if not hptune:
        df_train = pd.concat([df_train, df_validation])

    numeric_features = [
        'Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
        'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
        'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
        'Horizontal_Distance_To_Fire_Points'
    ]

    categorical_features = ['Wilderness_Area', 'Soil_Type']

    preprocessor = ColumnTransformer(transformers=[(
        'num', StandardScaler(),
        numeric_features), ('cat', OneHotEncoder(), categorical_features)])

    pipeline = Pipeline([('preprocessor', preprocessor),
                         ('classifier', SGDClassifier(loss='log'))])

    num_features_type_map = {
        feature: 'float64'
        for feature in numeric_features
    }
    df_train = df_train.astype(num_features_type_map)
    df_validation = df_validation.astype(num_features_type_map)

    print('Starting training: alpha={}, max_iter={}'.format(alpha, max_iter))
    X_train = df_train.drop('Cover_Type', axis=1)
    y_train = df_train['Cover_Type']

    pipeline.set_params(classifier__alpha=alpha, classifier__max_iter=max_iter)
    pipeline.fit(X_train, y_train)

    if hptune:
        X_validation = df_validation.drop('Cover_Type', axis=1)
        y_validation = df_validation['Cover_Type']
        accuracy = pipeline.score(X_validation, y_validation)
        print('Model accuracy: {}'.format(accuracy))
        # Log it with hypertune
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='accuracy', metric_value=accuracy)

    # Save the model
    if not hptune:
        model_filename = 'model.pkl'
        with open(model_filename, 'wb') as model_file:
            pickle.dump(pipeline, model_file)
        gcs_model_path = '{}/{}'.format(job_dir, model_filename)
        subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
                              stderr=sys.stdout)
        print('Saved model in: {}'.format(gcs_model_path))
Exemplo n.º 10
0
def _run(game, network_params, memory_params, explore_decay, ops):
    """Sets up and runs the gaming simulation.

    Initializes TensorFlow, the training agent, and the game environment.
    The agent plays the game from the starting state for a number of
    episodes set by the user.

    Args:
      args: The arguments from the command line parsed by_parse_arguments.
    """
    # Setup TensorBoard Writer.
    trial_id = json.loads(os.environ.get('TF_CONFIG',
                                         '{}')).get('task',
                                                    {}).get('trial', '')
    output_path = ops.job_dir if not trial_id else ops.job_dir + '/'
    hpt = hypertune.HyperTune()

    graph = tf.Graph()
    with graph.as_default():
        env = gym.make(game)
        agent = _create_agent(env, network_params, memory_params,
                              explore_decay)

        def _train_or_evaluate(print_score, training=False):
            """Runs a gaming simulation and writes results for tensorboard.

            Args:
                print_score (bool): True to print a score to the console.
                training (bool): True if the agent is training, False to eval.
            """
            reward = _play(agent, env, training)
            if print_score:
                print(
                    'Training - ' if training else 'Evaluating - ',
                    'Episode: {}'.format(episode),
                    'Total reward: {}'.format(reward),
                )

            if training:
                agent.learn()
                return

            hpt.report_hyperparameter_tuning_metric(
                hyperparameter_metric_tag='episode_reward',
                metric_value=reward,
                global_step=episode)
            return

        for episode in range(1, ops.episodes + 1):
            print_score = ops.print_rate and episode % ops.print_rate == 0
            get_summary = ops.eval_rate and episode % ops.eval_rate == 0
            _train_or_evaluate(print_score, training=True)

            if get_summary:
                _train_or_evaluate(print_score)

        _record_video(env, agent, output_path)
        agent.network.save(output_path, save_format='tf')
    def test_parameter_scope(self):
        cca = ht.ConstantParameter('a', value='cc.a')
        ccb = ht.ConstantParameter('b', value='cc.b')
        aio = ht.ObjectParameter('O', obj=CC, parameters=(cca, ccb))
        aia = ht.ConstantParameter('a', value='ai.a')

        r = ht.HyperTune(AI, [aia, aio], fit, acc,
                         max_evals=0).tune()['params']
        exp = {'O': {'a': 'cc.a', 'b': 'cc.b'}, 'a': 'ai.a'}
        self.assertEqual(r, exp)

        r = ht.HyperTune(AI, [aia, aio], fit, acc,
                         max_evals=0).tune()['params']
        self.assertEqual(r, exp)

        aio = ht.ObjectParameter('O', obj=CC, parameters=(ccb, cca))
        r = ht.HyperTune(AI, [aia, aio], fit, acc,
                         max_evals=0).tune()['params']
        self.assertEqual(r, exp)
Exemplo n.º 12
0
def train_evaluate(job_dir, training_dataset_path, validation_dataset_path,
                   alpha, max_iter, hptune):

    df_train = pd.read_csv(training_dataset_path)
    df_validation = pd.read_csv(validation_dataset_path)

    if not hptune:
        df_train = pd.concat([df_train, df_validation])

    numeric_feature_indexes = slice(0, 10)
    categorical_feature_indexes = slice(10, 12)

    preprocessor = ColumnTransformer(
        transformers=[('num', StandardScaler(), numeric_feature_indexes
                       ), ('cat', OneHotEncoder(),
                           categorical_feature_indexes)])

    pipeline = Pipeline([('preprocessor', preprocessor),
                         ('classifier', SGDClassifier(loss='log', tol=1e-3))])

    num_features_type_map = {
        feature: 'float64'
        for feature in df_train.columns[numeric_feature_indexes]
    }
    df_train = df_train.astype(num_features_type_map)
    df_validation = df_validation.astype(num_features_type_map)

    print('Starting training: alpha={}, max_iter={}'.format(alpha, max_iter))
    X_train = df_train.drop('Cover_Type', axis=1)
    y_train = df_train['Cover_Type']

    pipeline.set_params(classifier__alpha=alpha, classifier__max_iter=max_iter)
    pipeline.fit(X_train, y_train)

    if hptune:
        X_validation = df_validation.drop('Cover_Type', axis=1)
        y_validation = df_validation['Cover_Type']
        accuracy = pipeline.score(X_validation, y_validation)
        print('Model accuracy: {}'.format(accuracy))
        # Log it with hypertune
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='accuracy', metric_value=accuracy)
    # TODO: Score the model with the validation data and capture the result
    # with the hypertune library

    # Save the model
    if not hptune:
        model_filename = 'model.pkl'
        with open(model_filename, 'wb') as model_file:
            pickle.dump(pipeline, model_file)
        gcs_model_path = "{}/{}".format(job_dir, model_filename)
        subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
                              stderr=sys.stdout)
        print("Saved model in: {}".format(gcs_model_path))
Exemplo n.º 13
0
def train_and_evaluate(hparams):
    batch_size = hparams['batch_size']
    eval_data_path = hparams['eval_data_path']
    nnsize = hparams['nnsize']
    nbuckets = hparams['nbuckets']
    lr = hparams['lr']
    num_evals = hparams['num_evals']
    num_examples_to_train_on = hparams['num_examples_to_train_on']
    output_dir = hparams['output_dir']
    train_data_path = hparams['train_data_path']

    if tf.io.gfile.exists(output_dir):
        tf.io.gfile.rmtree(output_dir)

    timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    savedmodel_dir = os.path.join(output_dir, 'savedmodel')
    model_export_path = os.path.join(savedmodel_dir, timestamp)
    checkpoint_path = os.path.join(output_dir, 'checkpoints')
    tensorboard_path = os.path.join(output_dir, 'tensorboard')

    dnn_model = build_dnn_model(nbuckets, nnsize, lr)
    logging.info(dnn_model.summary())

    trainds = create_train_dataset(train_data_path, batch_size)
    evalds = create_eval_dataset(eval_data_path, batch_size)

    steps_per_epoch = num_examples_to_train_on // (batch_size * num_evals)

    checkpoint_cb = callbacks.ModelCheckpoint(checkpoint_path,
                                              save_weights_only=True,
                                              verbose=1)

    tensorboard_cb = callbacks.TensorBoard(tensorboard_path, histogram_freq=1)

    history = dnn_model.fit(
        trainds,
        validation_data=evalds,
        epochs=num_evals,
        steps_per_epoch=max(1, steps_per_epoch),
        verbose=2,  # 0=silent, 1=progress bar, 2=one line per epoch
        callbacks=[checkpoint_cb, tensorboard_cb])

    # Exporting the model with default serving function.
    tf.saved_model.save(dnn_model, model_export_path)

    # TODO 1
    hp_metric = history.history['val_rmse'][num_evals - 1]

    # TODO 1
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(hyperparameter_metric_tag='rmse',
                                            metric_value=hp_metric,
                                            global_step=num_evals)

    return history
    def test_default_parameters(self):
        cea = ht.ConstantParameter('a', value='ce.a')
        cec = ht.ConstantParameter('c', value='ce.c')

        toa = (None, ) * 2
        args = CE, [cec, cea], fit, acc, *toa, 0
        print(args)

        r = ht.HyperTune(*args).tune()['params']
        exp = {'a': 'ce.a', 'c': 'ce.c'}
        self.assertEqual(r, exp)
Exemplo n.º 15
0
def _train_and_evaluate(estimator, output_dir):
    """Runs model training and evaluation.

    Args:
      estimator: (pipeline.Pipeline), Pipeline instance, in this case, model training
      dataset: (pandas.DataFrame), DataFrame containing training data
      output_dir: (string), directory that the trained model will be exported

    Returns:
      None
    """
    """X_train, y_train =utils._feature_label_split(df_train,"is_churn","msno")
    df_val = utils.read_from_bigquery("amiable-octane-267022.kkbox.output_val_1","amiable-octane-267022")
    X_val, y_val =utils._feature_label_split(df_val,"is_churn","msno")"""

    df_train = utils.over_sample("amiable-octane-267022.kkbox.output_train_1",
                                 "amiable-octane-267022")
    X_train, y_train = utils._feature_label_split(df_train, "is_churn", "msno")
    df_val = utils.over_sample("amiable-octane-267022.kkbox.output_val_1",
                               "amiable-octane-267022")
    X_val, y_val = utils._feature_label_split(df_val, "is_churn", "msno")

    estimator.fit(X_train, y_train)
    f1_scorer = make_scorer(f1_score)
    accuracy_scorer = make_scorer(accuracy_score)

    if metadata.HYPERPARAMTER_TUNING:
        scores = model_selection.cross_val_score(estimator,
                                                 X_val,
                                                 y_val,
                                                 cv=3,
                                                 scoring=f1_scorer)
        #,scoring=f1_scorer

        logging.info('Score: %s', scores)

        #tune hyper
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='F1_SCORE',
            metric_value=np.mean(scores),
            global_step=10000)


#joblib.dump(estimator, 'model.joblib')

# Write model and eval metrics to `output_dir`
    model_output_path = os.path.join(output_dir, 'model',
                                     metadata.MODEL_FILE_NAME)

    utils.dump_object(estimator, model_output_path)
def train_evaluate(gcs_csv_path, gcs_output_path, hptune=False,
                   n_estimators=300, learning_rate=0.1, scale_pos_weight='TRUE'):
    # Load dataframe from GCS
    cover_df = pd.read_csv(gcs_csv_path)

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(cover_df)
    n_pos = y_train.sum()
    n_neg = y_train.shape[0] - n_pos

    # Preprocess data
    preprocessor = fit_preprocessor(pd.concat([X_train, X_test]))
    X_train = preprocessor.transform(X_train)

    # Prepare hyperparams and train model
    hparams = {
        'n_estimators': n_estimators,
        'learning_rate': learning_rate
    }
    if scale_pos_weight in ('TRUE', 'True', 'true'):
        hparams['scale_pos_weight'] = n_neg / n_pos
    clf = train_model(X_train, y_train, **hparams)

    # Evaluate model on test set
    X_test = preprocessor.transform(X_test)
    acc, f1 = evaluate_model(clf, X_test, y_test)
    print(
        f'n_pos: {n_pos} - n_neg {n_neg}'
        f'\tAccuracy: {acc} \t F1-score: {f1}'
    )

    # Report metric to cloud hypertune
    if hptune:
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='f1-score',
            metric_value=f1
        )
    # Train model on all data and save to GCS
    else:
        clf = train_model(np.append(X_train, X_test, axis=0),
                          np.append(y_train, y_test),
                          **hparams)

        # Save clf and preprocessor
        with tempfile.TemporaryDirectory() as tmpdir:
            clf.save_model(f'{tmpdir}/xgboost.bin')
            joblib.dump(preprocessor, f'{tmpdir}/preprocessor.joblib')

            upload_file(f'{tmpdir}/xgboost.bin', f'{gcs_output_path}/xgboost.bin')
            upload_file(f'{tmpdir}/preprocessor.joblib', f'{gcs_output_path}/preprocessor.joblib')
Exemplo n.º 17
0
def _train_and_evaluate(estimator, dataset, model_dir, params):
    """Runs model training and evaluation."""
    x_train, y_train, x_eval, y_eval = dataset
    estimator.fit(x_train, y_train)

    model_path = os.path.join(model_dir, "model.joblib")
    utils.dump_object(estimator, model_path)

    scores = model_selection.cross_val_score(
        estimator, x_eval, y_eval, cv=params.cross_validations)
    metric_path = os.path.join(model_dir, "eval_metrics.joblib")
    utils.dump_object(scores, metric_path)

    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag="score",
        metric_value=np.mean(scores))
Exemplo n.º 18
0
def main():

    args = get_args()

    path_data = args.pathdata

    output_bucket = args.pathoutput

    storage = args.storage

    numberestimators = args.numberestimators

    full_table_path = args.bqtable

    if storage in ['BQ', 'bq' 'bigquery', 'BigQuery']:
        dataset = utils.read_df_from_bigquery(full_table_path)
    else:
        dataset = utils.get_data_from_gcs(path_data)

    x_train, y_train, x_val, y_val = utils.data_train_test_split(dataset)

    pipeline = model.get_pipeline(numberestimators, args.minsamplesleaf)

    pipeline.fit(x_train, y_train)

    scores = model_selection.cross_val_score(pipeline, x_val, y_val, cv=3)

    model_output_path = os.path.join(output_bucket, 'model',
                                     metadata.MODEL_FILE_NAME)

    metric_output_path = os.path.join(output_bucket, 'experiment',
                                      metadata.METRIC_FILE_NAME)

    utils.dump_object(pipeline, model_output_path)
    utils.dump_object(scores, metric_output_path)

    accuracy = pipeline.score(x_val, y_val)

    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='accuracy',
        metric_value=accuracy,
        global_step=1000)

    print("model score: %.3f" % pipeline.score(x_val, y_val))
    print('pipeline run done :)')
Exemplo n.º 19
0
def test(sequential_model, test_loader, criterion, epoch, report_metric=False):
    """Test / Evaluate the DNNs performance with a test / eval dataset.
     Read the data from the dataloader and calculate the loss. Lastly,
     display some statistics about the performance of the DNN during testing.

    Args:
      sequential_model: The neural network that you are testing, based on
      nn.Module
      test_loader: The test / evaluation dataset
      criterion: The loss function
      epoch: The current epoch that the training loop is on
      report_metric: Whether to report metrics for hyperparameter tuning
    """
    sequential_model.eval()
    test_loss = 0.0
    correct = 0

    with torch.no_grad():
        for _, data in enumerate(test_loader, 0):
            features = data['features']
            target = data['target']
            output = sequential_model(features)
            # sum up batch loss
            test_loss += criterion(output, target)
            # compute accuracy for a binary classifier
            #    Values > 0.5 = 1
            #    Values <= 0.5 = 0
            correct += ((output > 0.5) == (target > 0.5)).sum().item()

    # get the average loss for the test set.
    test_loss /= (len(test_loader.sampler) / test_loader.batch_size)

    if report_metric:
        # Uses hypertune to report metrics for hyperparameter tuning.
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='test_loss',
            metric_value=test_loss,
            global_step=epoch)

    # print statistics
    print('\nTest set:\n\tAverage loss: {:.4f}'.format(test_loss))
    print('\tAccuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(test_loader.sampler),
        100. * correct / len(test_loader.sampler)))
Exemplo n.º 20
0
    def test_import(self):
        print('go')

        a = ht.ContinuousParameter('a', lower_bound=0, upper_bound=1)
        hypers = [a]

        gs = ht.optimizers.GridSearch(depth=1, resolution=0.1)
        tuner = ht.HyperTune(algorithm=A,
                             parameters=hypers,
                             optimizer=gs,
                             train_func=A.fit,
                             objective_func=A.acc,
                             max_evals=100,
                             maximize=False,
                             num_replications=1)

        results = tuner.tune()
        print(results)
Exemplo n.º 21
0
 def __init__(self,
              path,
              train_config,
              update_freq='epoch',
              metric='epoch_acc/val',
              hparams=None):
     # Parse params
     self.path = path
     self.log_stage = self._parse_stage(update_freq)
     self.log_freq = self._parse_freq(update_freq)
     self.train_config = train_config
     self.metric = metric
     self.hpt = hypertune.HyperTune()
     self.hparams = hparams
     # Initialise summary writer
     if path.startswith('gs://'):
         gsutil.gcloud_auth()
     log_path = os.path.join(path, 'logs')
     super().__init__(log_path)
Exemplo n.º 22
0
def train_and_evaluate(args):
    model = build_wide_deep_model(args["nnsize"], args["nembeds"])
    print("Here is our Wide-and-Deep architecture so far:\n")
    print(model.summary())

    trainds = load_dataset(args["train_data_path"], args["batch_size"],
                           'train')

    evalds = load_dataset(args["eval_data_path"], 1000, 'eval')
    if args["eval_steps"]:
        evalds = evalds.take(count=args["eval_steps"])

    num_batches = args["batch_size"] * args["num_epochs"]
    steps_per_epoch = args["train_examples"] // num_batches

    checkpoint_path = os.path.join(args["output_dir"],
                                   "checkpoints/babyweight")
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                     verbose=1,
                                                     save_weights_only=True)

    history = model.fit(
        trainds,
        validation_data=evalds,
        epochs=args["num_epochs"],
        steps_per_epoch=steps_per_epoch,
        verbose=2,  # 0=silent, 1=progress bar, 2=one line per epoch
        callbacks=[cp_callback])

    EXPORT_PATH = os.path.join(
        args["output_dir"],
        datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
    tf.saved_model.save(
        obj=model, export_dir=EXPORT_PATH)  # with default serving function

    hp_metric = history.history['val_rmse'][-1]

    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(hyperparameter_metric_tag='rmse',
                                            metric_value=hp_metric,
                                            global_step=args['num_epochs'])

    print("Exported trained model to {}".format(EXPORT_PATH))
Exemplo n.º 23
0
def train_model(args):
    """Load the data, train the model, test the model, export / save the model
    """
    torch.manual_seed(args.seed)

    # Open our dataset
    train_loader, test_loader = data_utils.load_data(args.test_split,
                                                     args.seed,
                                                     args.batch_size)

    # Create the model
    net = model.SonarDNN().double()
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          nesterov=False)

    # Train / Test the model
    latest_accuracy = 0.0
    for epoch in range(1, args.epochs + 1):
        train(net, train_loader, optimizer)
        latest_accuracy = test(net, test_loader)

    # The default name of the metric is training/hptuning/metric.
    # We recommend that you assign a custom name. The only functional
    # difference is that if you use a custom name, you must set the
    # hyperparameterMetricTag value in the HyperparameterSpec object in your
    # job request to match your chosen name.
    # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#HyperparameterSpec
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='my_accuracy_tag',
        metric_value=latest_accuracy,
        global_step=args.epochs)

    # Export the trained model
    torch.save(net.state_dict(), args.model_name)

    if args.job_dir:
        # Save the model to GCS
        data_utils.save_model(args.job_dir, args.model_name)
    else:
        print('Accuracy: {:.0f}%'.format(latest_accuracy))
Exemplo n.º 24
0
def _train_and_evaluate(estimator, dataset, output_dir):
    """Runs model training and evaluation.

    Args:
      estimator: (pipeline.Pipeline), Pipeline instance, assemble pre-processing
        steps and model training
      dataset: (pandas.DataFrame), DataFrame containing training data
      output_dir: (string), directory that the trained model will be exported

    Returns:
      None
    """
    x_train, y_train, x_val, y_val = utils.data_train_test_split(dataset)
    estimator.fit(x_train, y_train)

    # Write model and eval metrics to `output_dir`
    model_output_path = os.path.join(output_dir, 'model',
                                     metadata.MODEL_FILE_NAME)

    utils.dump_object(estimator, model_output_path)

    if metadata.METRIC_FILE_NAME is not None:
        # Note: for now, use `cross_val_score` defaults (i.e. 3-fold)
        scores = model_selection.cross_val_score(estimator, x_val, y_val, cv=3)

        logging.info('Scores: %s', scores)

        metric_output_path = os.path.join(output_dir, 'experiment',
                                          metadata.METRIC_FILE_NAME)

        utils.dump_object(scores, metric_output_path)

        # The default name of the metric is training/hptuning/metric.
        # We recommend that you assign a custom name
        # The only functional difference is that if you use a custom name,
        # you must set the hyperparameterMetricTag value in the
        # HyperparameterSpec object in the job request to match your chosen name
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='my_metric_tag',
            metric_value=np.mean(scores),
            global_step=1000)
Exemplo n.º 25
0
    def __call__(self, trainer):
        log_report = self._log_report
        if isinstance(log_report, str):
            log_report = trainer.get_extension(log_report)
        elif isinstance(log_report, log_report_module.LogReport):
            log_report(trainer)  # update the log report
        else:
            raise TypeError('log report has a wrong type %s' %
                            type(log_report))

        log = log_report.log
        log_len = self._log_len
        hpt = hypertune.HyperTune()

        while len(log) > log_len:
            target_log = log[log_len]
            hpt.report_hyperparameter_tuning_metric(
                hyperparameter_metric_tag=self._hp_metric_tag,
                metric_value=target_log[self._hp_metric_val],
                global_step=target_log[self._hp_global_step])
            log_len += 1
        self.log_len = log_len
def run_job(opts):
    def input_and_label(rec):
        return rec['ref'], rec['ref']
    ds = read_dataset(opts['input']).map(input_and_label).batch(opts['batch_size']).repeat()
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(os.path.join(opts['job_dir'], 'checkpoints'))
    
    strategy = tf.distribute.MirroredStrategy()
    with strategy.scope():
        autoencoder = create_model(opts['num_layers'], opts['pool_size'])
        print(autoencoder)
        history = autoencoder.fit(ds, steps_per_epoch=opts['num_steps']//opts['num_checkpoints'],
                              epochs=opts['num_checkpoints'], shuffle=True, callbacks=[checkpoint])
    
        autoencoder.save(os.path.join(opts['job_dir'], 'savedmodel'))
        
        # report final metric to hyperparameter tuner
        hpt = hypertune.HyperTune()
        hpt.report_hyperparameter_tuning_metric(
            hyperparameter_metric_tag='final_loss',
            metric_value=history.history['loss'][-1],
            global_step=1
        )
Exemplo n.º 27
0
def train(job_dir, data_path, n_components, alpha):

    # Load data from GCS
    df_train = pd.read_csv(data_path)

    y = df_train.octane
    X = df_train.drop('octane', axis=1)

    # Configure a training pipeline
    pipeline = Pipeline([('scale', StandardScaler()),
                         ('reduce_dim', PCA(n_components=n_components)),
                         ('regress', Ridge(alpha=alpha))])

    # Calculate the performance metric
    scores = cross_val_score(pipeline,
                             X,
                             y,
                             cv=10,
                             scoring='neg_mean_squared_error')

    # Log it with hypertune
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='neg_mean_squared_error',
        metric_value=scores.mean())

    # Fit the model on a full dataset
    pipeline.fit(X, y)

    # Save the model
    model_filename = 'model.joblib'
    joblib.dump(value=pipeline, filename=model_filename)
    gcs_model_path = "{}/{}".format(job_dir, model_filename)
    subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
                          stderr=sys.stdout)
    logging.info("Saved model in: {}".format(gcs_model_path))
    def test_parameter_scope2(self):
        cca = ht.ConstantParameter('a', value='cc.a')
        ccb = ht.ConstantParameter('b', value='cc.b')
        aio = ht.ObjectParameter('O', obj=CC, parameters=(ccb, cca))
        aia = ht.ConstantParameter('a', value='ai.a')

        cdo = ht.ObjectParameter('O', obj=AI, parameters=(aio, aia))
        cda = ht.ConstantParameter('a', value='cd.a')
        cdb = ht.ConstantParameter('b', value='cd.b')

        r = ht.HyperTune(CD, [cda, cdb, cdo], fit, acc,
                         max_evals=0).tune()['params']
        exp = {
            'a': 'cd.a',
            'b': 'cd.b',
            'O': {
                'O': {
                    'a': 'cc.a',
                    'b': 'cc.b'
                },
                'a': 'ai.a'
            }
        }
        self.assertEqual(r, exp)
Exemplo n.º 29
0
def train_and_evaluate(
    model,
    num_epochs,
    steps_per_epoch,
    train_data,
    validation_steps,
    eval_data,
    output_dir,
    n_steps_history,
    FLAGS,
    decay_type,
    learning_rate=3e-5,
    s=1,
    n_batch_decay=1,
    metric_accuracy='metric',
):
    """
    Compiles keras model and loads data into it for training.
    """
    logging.info('training the model ...')
    model_callbacks = []

    # create meta data dictionary
    dict_model = {}
    dict_data = {}
    dict_parameter = {}
    dict_hardware = {}
    dict_results = {}
    dict_type_job = {}
    dict_software = {}

    # for debugging only
    activate_tensorboard = True
    activate_hp_tensorboard = False  # True
    activate_lr = False
    save_checkpoints = False  # True
    save_history_per_step = False  # True
    save_metadata = False  # True
    activate_timing = False  # True
    # drop official method that is not working
    activate_tf_summary_hp = True  # False
    # hardcoded way of doing hp
    activate_hardcoded_hp = True  # True

    # dependencies
    if activate_tf_summary_hp:
        save_history_per_step = True

    if FLAGS.is_hyperparameter_tuning:
        # get trial ID
        suffix = mu.get_trial_id()

        if suffix == '':
            logging.error('No trial ID for hyper parameter job!')
            FLAGS.is_hyperparameter_tuning = False
        else:
            # callback for hp
            logging.info('Creating a callback to store the metric!')
            if activate_tf_summary_hp:
                hp_metric = mu.HP_metric(metric_accuracy)
                model_callbacks.append(hp_metric)

    if output_dir:
        if activate_tensorboard:
            # tensorflow callback
            log_dir = os.path.join(output_dir, 'tensorboard')
            if FLAGS.is_hyperparameter_tuning:
                log_dir = os.path.join(log_dir, suffix)
            tensorboard_callback = tf.keras.callbacks.TensorBoard(
                log_dir=log_dir,
                histogram_freq=1,
                embeddings_freq=0,
                write_graph=True,
                update_freq='batch',
                profile_batch='10, 20')
            model_callbacks.append(tensorboard_callback)

        if save_checkpoints:
            # checkpoints callback
            checkpoint_dir = os.path.join(output_dir, 'checkpoint_model')
            if not FLAGS.is_hyperparameter_tuning:
                # not saving model during hyper parameter tuning
                # heckpoint_dir = os.path.join(checkpoint_dir, suffix)
                checkpoint_prefix = os.path.join(checkpoint_dir,
                                                 'ckpt_{epoch:02d}')
                checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
                    filepath=checkpoint_prefix,
                    verbose=1,
                    save_weights_only=True)
                model_callbacks.append(checkpoint_callback)

    if activate_lr:
        # decay learning rate callback

        # code snippet to make the switching between different learning rate decays possible
        if decay_type == 'exponential':
            decay_fn = mu.exponential_decay(lr0=learning_rate, s=s)
        elif decay_type == 'stepwise':
            decay_fn = mu.step_decay(lr0=learning_rate, s=s)
        elif decay_type == 'timebased':
            decay_fn = mu.time_decay(lr0=learning_rate, s=s)
        else:
            decay_fn = mu.no_decay(lr0=learning_rate)

        # exponential_decay_fn = mu.exponential_decay(lr0=learning_rate, s=s)
        # lr_scheduler = tf.keras.callbacks.LearningRateScheduler(exponential_decay_fn, verbose=1)
        # model_callbacks.append(lr_scheduler)

        # added these two lines for batch updates
        lr_decay_batch = mu.LearningRateSchedulerPerBatch(decay_fn,
                                                          n_batch_decay,
                                                          verbose=1)
        # lr_decay_batch = mu.LearningRateSchedulerPerBatch(exponential_decay_fn, n_batch_decay, verbose=0)
        # lambda step: ((learning_rate - min_learning_rate) * decay_rate ** step + min_learning_rate))
        model_callbacks.append(lr_decay_batch)

        # print_lr = mu.PrintLR()
        # model_callbacks.append(mu.PrintLR())
        # ---------------------------------------------------------------------------------------------------------------

        # callback to store all the learning rates
        # all_learning_rates = mu.LearningRateSchedulerPerBatch(model.optimizer, n_steps_history)
        # all_learning_rates = mu.LR_per_step()
        # all_learning_rates = mu.LR_per_step(model.optimizer)
        # model_callbacks.append(all_learning_rates)  # disble

    if save_history_per_step:
        # callback to create  history per step (not per epoch)
        histories_per_step = mu.History_per_step(eval_data, n_steps_history)
        model_callbacks.append(histories_per_step)

    if activate_timing:
        # callback to time each epoch
        timing = mu.TimingCallback()
        model_callbacks.append(timing)

    # checking model callbacks for
    logging.info('model\'s callback:\n {}'.format(str(model_callbacks)))

    # train the model
    # time the function
    start_time = time.time()

    logging.info('starting model.fit')
    # verbose = 0 (silent)
    # verbose = 1 (progress bar)
    # verbose = 2 (one line per epoch)
    verbose = 1
    history = model.fit(train_data,
                        epochs=num_epochs,
                        steps_per_epoch=steps_per_epoch,
                        validation_data=eval_data,
                        validation_steps=validation_steps,
                        verbose=verbose,
                        callbacks=model_callbacks)

    # print execution time
    elapsed_time_secs = time.time() - start_time
    logging.info('\nexecution time: {}'.format(
        timedelta(seconds=round(elapsed_time_secs))))

    # check model
    logging.info('model summary ={}'.format(model.summary()))
    logging.info('model input ={}'.format(model.inputs))
    logging.info('model outputs ={}'.format(model.outputs))

    # to be remove
    logging.info('\ndebugging .... : ')
    pp.print_info_data(train_data)

    if activate_timing:
        logging.info('timing per epoch:\n{}'.format(
            list(
                map(lambda x: str(timedelta(seconds=round(x))),
                    timing.timing_epoch))))
        logging.info('timing per validation:\n{}'.format(
            list(
                map(lambda x: str(timedelta(seconds=round(x))),
                    timing.timing_valid))))
        logging.info('sum timing over all epochs:\n{}'.format(
            timedelta(seconds=round(sum(timing.timing_epoch)))))

    # for hp parameter tuning in TensorBoard
    if FLAGS.is_hyperparameter_tuning:
        logging.info('setup hyperparameter tuning!')
        # test
        #params = json.loads(os.environ.get("CLUSTER_SPEC", "{}")).get("job", {})
        #print('debug: CLUSTER_SPEC1:', params)
        #params = json.loads(os.environ.get("CLUSTER_SPEC", "{}")).get("job", {}).get("job_args", {})
        #print('debug: CLUSTER_SPEC2:', params)
        logging.info('debug: os.environ.items():', os.environ.items())
        #
        if activate_hardcoded_hp:
            # trick to bypass ai platform bug
            logging.info('hardcoded hyperparameter tuning!')
            value_accuracy = histories_per_step.accuracies[-1]
            hpt = hypertune.HyperTune()
            hpt.report_hyperparameter_tuning_metric(
                hyperparameter_metric_tag=metric_accuracy,
                metric_value=value_accuracy,
                global_step=0)
        else:
            # should be extracted from /var/hypertune/output.metric
            logging.info('standard hyperparameter tuning!')
            # is this needed ?
            # value_accuracy = histories_per_step.accuracies[-1]

        # look at the content of the file
        path_metric = '/var/hypertune/output.metric'
        logging.info('checking if /var/hypertune/output.metric exist!')
        if os.path.isfile(path_metric):
            logging.info('file {} exist !'.format(path_metric))
            with open(path_metric, 'r') as f:
                logging.info('content of output.metric: {}'.format(f.read()))

        if activate_hp_tensorboard:
            logging.info('setup TensorBoard for hyperparameter tuning!')
            # CAIP
            #params = json.loads(os.environ.get("TF_CONFIG", "{}")).get("job", {}).get("hyperparameters", {}).get("params", {})
            #uCAIP
            params = json.loads(
                os.environ.get("CLUSTER_SPEC", "{}")
            )  #.get("job", {}).get("hyperparameters", {}).get("params", {})
            print('debug: CLUSTER_SPEC:', params)
            list_hp = []
            hparams = {}
            for el in params:
                hp_dict = dict(el)
                if hp_dict.get('type') == 'DOUBLE':
                    key_hp = hp.HParam(
                        hp_dict.get('parameter_name'),
                        hp.RealInterval(hp_dict.get('min_value'),
                                        hp_dict.get('max_value')))
                    list_hp.append(key_hp)
                    try:
                        hparams[key_hp] = FLAGS[hp_dict.get(
                            'parameter_name')].value
                    except KeyError:
                        logging.error(
                            'hyperparameter key {} doesn\'t exist'.format(
                                hp_dict.get('parameter_name')))

            hparams_dir = os.path.join(output_dir, 'hparams_tuning')
            with tf.summary.create_file_writer(hparams_dir).as_default():
                hp.hparams_config(
                    hparams=list_hp,
                    metrics=[
                        hp.Metric(metric_accuracy,
                                  display_name=metric_accuracy)
                    ],
                )

            hparams_dir = os.path.join(hparams_dir, suffix)
            with tf.summary.create_file_writer(hparams_dir).as_default():
                # record the values used in this trial
                hp.hparams(hparams)
                tf.summary.scalar(metric_accuracy, value_accuracy, step=1)

    if save_history_per_step:
        # save the history in a file
        search = re.search('gs://(.*?)/(.*)', output_dir)
        if search is not None:
            # temp folder locally and to be  ove on gcp later
            history_dir = os.path.join('./', model.name)
            os.makedirs(history_dir, exist_ok=True)
        else:
            # locally
            history_dir = os.path.join(output_dir, model.name)
            os.makedirs(history_dir, exist_ok=True)
        logging.debug('history_dir: \n {}'.format(history_dir))
        with open(history_dir + '/history', 'wb') as file:
            model_history = mu.History_trained_model(history.history,
                                                     history.epoch,
                                                     history.params)
            pickle.dump(model_history, file, pickle.HIGHEST_PROTOCOL)
        with open(history_dir + '/history_per_step', 'wb') as file:
            model_history_per_step = mu.History_per_steps_trained_model(
                histories_per_step.steps,
                histories_per_step.losses,
                histories_per_step.accuracies,
                histories_per_step.val_steps,
                histories_per_step.val_losses,
                histories_per_step.val_accuracies,
                0,  # all_learning_rates.all_lr,
                0,  # all_learning_rates.all_lr_alternative,
                0)  # all_learning_rates.all_lr_logs)
            pickle.dump(model_history_per_step, file, pickle.HIGHEST_PROTOCOL)

    if output_dir:
        # save the model
        savemodel_path = os.path.join(output_dir, 'saved_model')

        if not FLAGS.is_hyperparameter_tuning:
            # not saving model during hyper parameter tuning
            # savemodel_path = os.path.join(savemodel_path, suffix)
            model.save(os.path.join(savemodel_path, model.name))

            model2 = tf.keras.models.load_model(
                os.path.join(savemodel_path, model.name))
            # check model
            logging.info('model2 summary ={}'.format(model2.summary()))
            logging.info('model2 input ={}'.format(model2.inputs))
            logging.info('model2 outputs ={}'.format(model2.outputs))

            logging.info('model2 signature outputs ={}'.format(
                model2.signatures['serving_default'].structured_outputs))
            logging.info('model2 inputs ={}'.format(
                model2.signatures['serving_default'].inputs[0]))

        if save_history_per_step:
            # save history
            search = re.search('gs://(.*?)/(.*)', output_dir)
            if search is not None:
                bucket_name = search.group(1)
                blob_name = search.group(2)
                output_folder = blob_name + '/history'
                if FLAGS.is_hyperparameter_tuning:
                    output_folder = os.path.join(output_folder, suffix)
                mu.copy_local_directory_to_gcs(history_dir, bucket_name,
                                               output_folder)

    if save_metadata:
        # add meta data
        dict_model['pretrained_transformer_model'] = FLAGS.pretrained_model_dir
        dict_model['num_classes'] = FLAGS.num_classes

        dict_data['train'] = FLAGS.input_train_tfrecords
        dict_data['eval'] = FLAGS.input_eval_tfrecords

        dict_parameter[
            'use_decay_learning_rate'] = FLAGS.use_decay_learning_rate
        dict_parameter['epochs'] = FLAGS.epochs
        dict_parameter['steps_per_epoch_train'] = FLAGS.steps_per_epoch_train
        dict_parameter['steps_per_epoch_eval'] = FLAGS.steps_per_epoch_eval
        dict_parameter['n_steps_history'] = FLAGS.n_steps_history
        dict_parameter['batch_size_train'] = FLAGS.batch_size_train
        dict_parameter['batch_size_eval'] = FLAGS.batch_size_eval
        dict_parameter['learning_rate'] = FLAGS.learning_rate
        dict_parameter['epsilon'] = FLAGS.epsilon

        dict_hardware['is_tpu'] = FLAGS.use_tpu

        dict_type_job[
            'is_hyperparameter_tuning'] = FLAGS.is_hyperparameter_tuning
        dict_type_job['is_tpu'] = FLAGS.use_tpu

        dict_software['tensorflow'] = tf.__version__
        dict_software['transformer'] = __version__
        dict_software['python'] = sys.version

        # aggregate dictionaries
        dict_all = {
            'model': dict_model,
            'data': dict_data,
            'parameter': dict_parameter,
            'hardware': dict_hardware,
            'results': dict_results,
            'type_job': dict_type_job,
            'software': dict_software
        }

        # save metadata
        search = re.search('gs://(.*?)/(.*)', output_dir)
        if search is not None:
            bucket_name = search.group(1)
            blob_name = search.group(2)
            output_folder = blob_name + '/metadata'

            storage_client = storage.Client()
            bucket = storage_client.bucket(bucket_name)
            blob = bucket.blob(output_folder + '/model_job_metadata.json')
            blob.upload_from_string(data=json.dumps(dict_all),
                                    content_type='application/json')
Exemplo n.º 30
0
def report_metric_to_hypertune(metric_value, step, tag='Loss'):
    """Use hypertune to report metrics for hyperparameter tuning."""
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(hyperparameter_metric_tag=tag,
                                            metric_value=metric_value,
                                            global_step=step)