Exemplo n.º 1
0
def test_hook_from_json_config_for_losses(tmpdir, monkeypatch, params):
    out_dir = tmpdir.join("test_hook_from_json_config_for_losses")
    config_file = tmpdir.join("config.json")
    config_file.write(get_json_config_for_losses(str(out_dir)))
    monkeypatch.setenv(CONFIG_FILE_PATH_ENV_STR, str(config_file))
    hook = Hook.create_from_json_file()
    assert has_training_ended(out_dir) is False
    run_xgboost_model(hook=hook, params=params)
    trial = create_trial(str(out_dir))
    eval_metric = params["eval_metric"]
    test_metric = f"test-{eval_metric}"
    train_metric = f"train-{eval_metric}"
    if eval_metric == "rmse":
        assert train_metric in trial.tensor_names(
            collection=CollectionKeys.METRICS)
        assert train_metric in trial.tensor_names(
            collection=CollectionKeys.LOSSES)
        assert test_metric in trial.tensor_names(
            collection=CollectionKeys.METRICS)
        assert test_metric in trial.tensor_names(
            collection=CollectionKeys.LOSSES)
    if eval_metric == "auc" or eval_metric == "map":
        assert train_metric in trial.tensor_names(
            collection=CollectionKeys.METRICS)
        assert train_metric not in trial.tensor_names(
            collection=CollectionKeys.LOSSES)
        assert test_metric in trial.tensor_names(
            collection=CollectionKeys.METRICS)
        assert test_metric not in trial.tensor_names(
            collection=CollectionKeys.LOSSES)
Exemplo n.º 2
0
def test_hook_from_json_config_full(tmpdir, monkeypatch):
    out_dir = tmpdir.join("test_hook_from_json_config_full")
    config_file = tmpdir.join("config.json")
    config_file.write(get_json_config_full(str(out_dir)))
    monkeypatch.setenv(CONFIG_FILE_PATH_ENV_STR, str(config_file))
    hook = Hook.create_from_json_file()
    assert has_training_ended(out_dir) is False
    run_xgboost_model(hook=hook)
def main():

    args = parse_args()
    train_files_path, validation_files_path = args.train, args.validation

    train_features_path = os.path.join(args.train, 'train_features.csv')
    train_labels_path = os.path.join(args.train, 'train_labels.csv')

    val_features_path = os.path.join(args.validation, 'val_features.csv')
    val_labels_path = os.path.join(args.validation, 'val_labels.csv')

    print('Loading training dataframes...')
    df_train_features = pd.read_csv(train_features_path, header=None)
    df_train_labels = pd.read_csv(train_labels_path, header=None)

    print('Loading validation dataframes...')
    df_val_features = pd.read_csv(val_features_path, header=None)
    df_val_labels = pd.read_csv(val_labels_path, header=None)

    X = df_train_features.values
    y = df_train_labels.values.reshape(-1)

    val_X = df_val_features.values
    val_y = df_val_labels.values.reshape(-1)

    print('Train features shape: {}'.format(X.shape))
    print('Train labels shape: {}'.format(y.shape))
    print('Validation features shape: {}'.format(val_X.shape))
    print('Validation labels shape: {}'.format(val_y.shape))

    dtrain = xgboost.DMatrix(X, label=y)
    dval = xgboost.DMatrix(val_X, label=val_y)

    hook = Hook.create_from_json_file()
    hook.train_data = dtrain
    hook.validation_data = dval

    watchlist = [(dtrain, "train"), (dval, "validation")]

    params = {
        "max_depth": args.max_depth,
        "eta": args.eta,
        "gamma": args.gamma,
        "min_child_weight": args.min_child_weight,
        "silent": args.silent,
        "objective": args.objective,
        "eval_metric": args.eval_metric
    }

    bst = xgboost.train(params=params,
                        dtrain=dtrain,
                        evals=watchlist,
                        num_boost_round=args.num_round,
                        callbacks=[hook])

    model_dir = os.environ.get('SM_MODEL_DIR')
    pkl.dump(bst, open(model_dir + '/model.bin', 'wb'))
Exemplo n.º 4
0
def main():

    args = parse_args()
    train_files_path, validation_files_path = args.train, args.validation

    train_files_list = glob.glob(train_files_path + '/*.*')
    print(train_files_list)

    val_files_list = glob.glob(validation_files_path + '/*.*')
    print(val_files_list)

    print('Loading training data...')
    df_train = pd.concat(map(pd.read_csv, train_files_list))
    print('Loading validation data...')
    df_val = pd.concat(map(pd.read_csv, val_files_list))
    print('Data loading completed.')

    y = df_train.Target.values
    X = df_train.drop(['Target'], axis=1).values
    val_y = df_val.Target.values
    val_X = df_val.drop(['Target'], axis=1).values

    dtrain = xgboost.DMatrix(X, label=y)
    dval = xgboost.DMatrix(val_X, label=val_y)

    params = {
        "max_depth": args.max_depth,
        "eta": args.eta,
        "gamma": args.gamma,
        "min_child_weight": args.min_child_weight,
        "silent": args.silent,
        "objective": args.objective,
        "num_class": args.num_class
    }

    hook = Hook.create_from_json_file()
    hook.train_data = dtrain
    hook.validation_data = dval

    watchlist = [(dtrain, "train"), (dval, "validation")]

    bst = xgboost.train(params=params,
                        dtrain=dtrain,
                        evals=watchlist,
                        num_boost_round=args.num_round,
                        callbacks=[hook])

    model_dir = os.environ.get('SM_MODEL_DIR')
    pkl.dump(bst, open(model_dir + '/model.bin', 'wb'))