def test_verbose_int(verbose): expected_line_count = {5: 2, False: 0, True: 10} pool = Pool(TRAIN_FILE, column_description=CD_FILE) tmpfile = 'test_data_dumps' with LogStdout(open(tmpfile, 'w')): cv(pool, { "iterations": 10, "random_seed": 0, "loss_function": "Logloss" }, verbose=verbose) with open(tmpfile, 'r') as output: assert (sum(1 for line in output) == expected_line_count[verbose]) with LogStdout(open(tmpfile, 'w')): train(pool, { "iterations": 10, "random_seed": 0, "loss_function": "Logloss" }, verbose=verbose) with open(tmpfile, 'r') as output: assert (sum(1 for line in output) == expected_line_count[verbose]) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_cv_logging(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) cv( pool, { "iterations": 5, "random_seed": 0, "loss_function": "Logloss", "json_log": JSON_LOG_PATH }) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_cv_with_not_binarized_target(): train_file = data_file('adult_not_binarized', 'train_small') cd = data_file('adult_not_binarized', 'train.cd') pool = Pool(train_file, column_description=cd) cv( pool, { "iterations": 5, "random_seed": 0, "loss_function": "Logloss", "json_log": JSON_LOG_PATH }) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_verbose_int(verbose): expected_line_count = {5: 3, False: 0, True: 10} pool = Pool(TRAIN_FILE, column_description=CD_FILE) tmpfile = 'test_data_dumps' with LogStdout(open(tmpfile, 'w')): cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=verbose) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == expected_line_count[verbose]) with LogStdout(open(tmpfile, 'w')): train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=verbose) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == expected_line_count[verbose]) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_eval_set(): dataset = [(1, 2, 3, 4), (2, 2, 3, 4), (3, 2, 3, 4), (4, 2, 3, 4)] labels = [1, 2, 3, 4] train_pool = Pool(dataset, labels, cat_features=[0, 3, 2]) model = CatBoost({'learning_rate': 1, 'loss_function': 'RMSE', 'iterations': 2, 'random_seed': 0}) eval_dataset = [(5, 6, 6, 6), (6, 6, 6, 6)] eval_labels = [5, 6] eval_pool = (eval_dataset, eval_labels) model.fit(train_pool, eval_set=eval_pool) eval_pools = [eval_pool] model.fit(train_pool, eval_set=eval_pools) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_eval_set(): dataset = [(1, 2, 3, 4), (2, 2, 3, 4), (3, 2, 3, 4), (4, 2, 3, 4)] labels = [1, 2, 3, 4] train_pool = Pool(dataset, labels, cat_features=[0, 3, 2]) model = CatBoost({'learning_rate': 1, 'loss_function': 'RMSE', 'iterations': 2, 'random_seed': 0, "json_log": JSON_LOG_PATH}) eval_dataset = [(5, 6, 6, 6), (6, 6, 6, 6)] eval_labels = [5, 6] eval_pool = (eval_dataset, eval_labels) model.fit(train_pool, eval_set=eval_pool) eval_pools = [eval_pool] model.fit(train_pool, eval_set=eval_pools) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_verbose_int(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) tmpfile = 'test_data_dumps' with LogStdout(open(tmpfile, 'w')): cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=5) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == 2) with LogStdout(open(tmpfile, 'w')): cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=False) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == 0) with LogStdout(open(tmpfile, 'w')): cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=True) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == 10) log_files = [] for i in range(3): log_files.append(JSON_LOG_PATH[:-5]+str(i)+JSON_LOG_PATH[-5:]) with LogStdout(open(tmpfile, 'w')): train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[0]}, verbose=5) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == 2) with LogStdout(open(tmpfile, 'w')): train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[1]}, verbose=False) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == 0) with LogStdout(open(tmpfile, 'w')): train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[2]}, verbose=True) with open(tmpfile, 'r') as output: assert(sum(1 for line in output) == 10) canonical_files = [] for log_file in log_files: canonical_files.append(local_canonical_file(remove_time_from_json(log_file))) return canonical_files
def test_cv_with_not_binarized_target(): train_file = data_file('adult_not_binarized', 'train_small') cd = data_file('adult_not_binarized', 'train.cd') pool = Pool(train_file, column_description=cd) cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"}) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))
def test_cv_logging(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"}) return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))