示例#1
0
def test_verbose_int(verbose):
    expected_line_count = {5: 2, False: 0, True: 10}
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    tmpfile = 'test_data_dumps'

    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {
            "iterations": 10,
            "random_seed": 0,
            "loss_function": "Logloss"
        },
           verbose=verbose)
    with open(tmpfile, 'r') as output:
        assert (sum(1 for line in output) == expected_line_count[verbose])

    with LogStdout(open(tmpfile, 'w')):
        train(pool, {
            "iterations": 10,
            "random_seed": 0,
            "loss_function": "Logloss"
        },
              verbose=verbose)
    with open(tmpfile, 'r') as output:
        assert (sum(1 for line in output) == expected_line_count[verbose])

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#2
0
def test_cv_logging():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    cv(
        pool, {
            "iterations": 5,
            "random_seed": 0,
            "loss_function": "Logloss",
            "json_log": JSON_LOG_PATH
        })
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#3
0
def test_cv_with_not_binarized_target():
    train_file = data_file('adult_not_binarized', 'train_small')
    cd = data_file('adult_not_binarized', 'train.cd')
    pool = Pool(train_file, column_description=cd)
    cv(
        pool, {
            "iterations": 5,
            "random_seed": 0,
            "loss_function": "Logloss",
            "json_log": JSON_LOG_PATH
        })
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#4
0
def test_verbose_int(verbose):
    expected_line_count = {5: 3, False: 0, True: 10}
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    tmpfile = 'test_data_dumps'

    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=verbose)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == expected_line_count[verbose])

    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=verbose)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == expected_line_count[verbose])

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#5
0
def test_eval_set():
    dataset = [(1, 2, 3, 4), (2, 2, 3, 4), (3, 2, 3, 4), (4, 2, 3, 4)]
    labels = [1, 2, 3, 4]
    train_pool = Pool(dataset, labels, cat_features=[0, 3, 2])

    model = CatBoost({'learning_rate': 1, 'loss_function': 'RMSE', 'iterations': 2, 'random_seed': 0})

    eval_dataset = [(5, 6, 6, 6), (6, 6, 6, 6)]
    eval_labels = [5, 6]
    eval_pool = (eval_dataset, eval_labels)

    model.fit(train_pool, eval_set=eval_pool)

    eval_pools = [eval_pool]

    model.fit(train_pool, eval_set=eval_pools)

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#6
0
def test_eval_set():
    dataset = [(1, 2, 3, 4), (2, 2, 3, 4), (3, 2, 3, 4), (4, 2, 3, 4)]
    labels = [1, 2, 3, 4]
    train_pool = Pool(dataset, labels, cat_features=[0, 3, 2])

    model = CatBoost({'learning_rate': 1, 'loss_function': 'RMSE', 'iterations': 2, 'random_seed': 0, "json_log": JSON_LOG_PATH})

    eval_dataset = [(5, 6, 6, 6), (6, 6, 6, 6)]
    eval_labels = [5, 6]
    eval_pool = (eval_dataset, eval_labels)

    model.fit(train_pool, eval_set=eval_pool)

    eval_pools = [eval_pool]

    model.fit(train_pool, eval_set=eval_pools)

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#7
0
def test_verbose_int():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    tmpfile = 'test_data_dumps'

    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=5)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 2)
    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=False)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 0)
    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=True)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 10)

    log_files = []
    for i in range(3):
        log_files.append(JSON_LOG_PATH[:-5]+str(i)+JSON_LOG_PATH[-5:])

    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[0]}, verbose=5)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 2)
    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[1]}, verbose=False)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 0)
    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[2]}, verbose=True)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 10)

    canonical_files = []

    for log_file in log_files:
        canonical_files.append(local_canonical_file(remove_time_from_json(log_file)))
    return canonical_files
示例#8
0
def test_cv_with_not_binarized_target():
    train_file = data_file('adult_not_binarized', 'train_small')
    cd = data_file('adult_not_binarized', 'train.cd')
    pool = Pool(train_file, column_description=cd)
    cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"})
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
示例#9
0
def test_cv_logging():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"})
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))