Python local_canonical_file示例，catboost_pytest_lib.local_canonical_file Python示例

示例#1

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_shap():
    train_pool = Pool([[0, 0], [0, 1], [1, 0], [1, 1]], [0, 1, 5, 8], cat_features=[])
    test_pool = Pool([[0, 0], [0, 1], [1, 0], [1, 1]])
    model = CatBoostRegressor(iterations=1, random_seed=0, max_ctr_complexity=1, depth=2)
    model.fit(train_pool)
    shap_values = model.get_feature_importance(test_pool, fstr_type='ShapValues')

    dataset = [(0.5, 1.2), (1.6, 0.5), (1.8, 1.0), (0.4, 0.6), (0.3, 1.6), (1.5, 0.2)]
    labels = [1.1, 1.85, 2.3, 0.7, 1.1, 1.6]
    train_pool = Pool(dataset, labels, cat_features=[])

    model = CatBoost({'iterations': 10, 'random_seed': 0, 'max_ctr_complexity': 1})
    model.fit(train_pool)

    testset = [(0.6, 1.2), (1.4, 0.3), (1.5, 0.8), (1.4, 0.6)]
    predictions = model.predict(testset)
    shap_values = model.get_feature_importance(Pool(testset), fstr_type='ShapValues')
    assert(len(predictions) == len(shap_values))
    for pred_idx in range(len(predictions)):
        assert(abs(sum(shap_values[pred_idx]) - predictions[pred_idx]) < 1e-9)

    with open(FIMP_PATH, 'w') as out:
        out.write(shap_values)

    local_canonical_file(FIMP_PATH)

示例#2

0

显示文件

文件： test.py 项目： iamnik13/catboost

def test_predict_class():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=2, random_seed=0)
    model.fit(train_pool)
    pred = model.predict(test_pool, prediction_type="Class")
    np.save(PREDS_PATH, np.array(pred))
    return local_canonical_file(PREDS_PATH)

示例#3

0

显示文件

文件： test.py 项目： iamnik13/catboost

def test_ntree_limit():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=100, random_seed=0)
    model.fit(train_pool)
    pred = model.predict_proba(test_pool, ntree_end=10)
    np.save(PREDS_PATH, np.array(pred))
    return local_canonical_file(PREDS_PATH)

示例#4

0

显示文件

文件： test.py 项目： iamnik13/catboost

def test_multiclass():
    pool = Pool(CLOUDNESS_TRAIN_FILE, column_description=CLOUDNESS_CD_FILE)
    classifier = CatBoostClassifier(iterations=2, random_seed=0, loss_function='MultiClass', thread_count=8)
    classifier.fit(pool)
    classifier.save_model(OUTPUT_MODEL_PATH)
    new_classifier = CatBoostClassifier()
    new_classifier.load_model(OUTPUT_MODEL_PATH)
    pred = new_classifier.predict_proba(pool)
    np.save(PREDS_PATH, np.array(pred))
    return local_canonical_file(PREDS_PATH)

示例#5

0

显示文件

文件： test.py 项目： iamnik13/catboost

def test_staged_predict():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    test_pool = Pool(TEST_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=10, random_seed=0)
    model.fit(train_pool)
    preds = []
    for pred in model.staged_predict(test_pool):
        preds.append(pred)
    np.save(PREDS_PATH, np.array(preds))
    return local_canonical_file(PREDS_PATH)

示例#6

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_object_importances():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    pool = Pool(TEST_FILE, column_description=CD_FILE)

    model = CatBoost({'loss_function': 'RMSE', 'iterations': 10, 'random_seed': 0})
    model.fit(train_pool)
    indices, scores = model.get_object_importance(pool, train_pool, top_size=10)
    np.savetxt(OIMP_PATH, scores)

    return local_canonical_file(OIMP_PATH)

示例#7

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_coreml_import_export():
    train_pool = Pool(QUERYWISE_TRAIN_FILE, column_description=QUERYWISE_CD_FILE)
    test_pool = Pool(QUERYWISE_TEST_FILE, column_description=QUERYWISE_CD_FILE)
    model = CatBoost(params={'loss_function': 'QueryRMSE', 'random_seed': 0, 'iterations': 20, 'thread_count': 8})
    model.fit(train_pool)
    model.save_model(OUTPUT_COREML_MODEL_PATH, format="coreml")
    canon_pred = model.predict(test_pool)
    coreml_loaded_model = CatBoostRegressor()
    coreml_loaded_model.load_model(OUTPUT_COREML_MODEL_PATH, format="coreml")
    assert all(canon_pred == coreml_loaded_model.predict(test_pool))
    return local_canonical_file(OUTPUT_COREML_MODEL_PATH)

示例#8

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_verbose_int(verbose):
    expected_line_count = {5: 3, False: 0, True: 10}
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    tmpfile = 'test_data_dumps'

    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=verbose)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == expected_line_count[verbose])

    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=verbose)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == expected_line_count[verbose])

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#9

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_eval_set():
    dataset = [(1, 2, 3, 4), (2, 2, 3, 4), (3, 2, 3, 4), (4, 2, 3, 4)]
    labels = [1, 2, 3, 4]
    train_pool = Pool(dataset, labels, cat_features=[0, 3, 2])

    model = CatBoost({'learning_rate': 1, 'loss_function': 'RMSE', 'iterations': 2, 'random_seed': 0})

    eval_dataset = [(5, 6, 6, 6), (6, 6, 6, 6)]
    eval_labels = [5, 6]
    eval_pool = (eval_dataset, eval_labels)

    model.fit(train_pool, eval_set=eval_pool)

    eval_pools = [eval_pool]

    model.fit(train_pool, eval_set=eval_pools)

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#10

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_weights_without_bootstrap(boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    cd_file = data_file('adult_weight', 'train.cd')
    test_file = data_file('adult_weight', 'test_weight')
    params = {
        '--use-best-model': 'false',
        '--loss-function': 'Logloss',
        '-f': data_file('adult_weight', 'train_weight'),
        '-t': test_file,
        '--column-description': cd_file,
        '--boosting-type': boosting_type,
        '-i': '10',
        '-w': '0.03',
        '-T': '4',
        '-r': '0',
        '--bootstrap-type': 'No',
        '-m': output_model_path,
    }
    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)
    return [local_canonical_file(output_eval_path, diff_tool=diff_tool())]

示例#11

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_logloss_with_not_binarized_target(boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    cd_file = data_file('adult_not_binarized', 'train.cd')
    test_file = data_file('adult_not_binarized', 'test_small')
    params = {
        '--use-best-model': 'false',
        '--loss-function': 'Logloss',
        '-f': data_file('adult_not_binarized', 'train_small'),
        '-t': test_file,
        '--column-description': cd_file,
        '--boosting-type': boosting_type,
        '-i': '10',
        '-w': '0.03',
        '-T': '4',
        '-r': '0',
        '-m': output_model_path,
    }
    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)

    return [local_canonical_file(output_eval_path)]

示例#12

0

显示文件

def test_ctr_type(ctr_type, boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cd_file = data_file('adult_crossentropy', 'train.cd')
    test_file = data_file('adult_crossentropy', 'test_proba')
    params = (
        '--use-best-model', 'false',
        '--loss-function', 'RMSE',
        '-f', data_file('adult_crossentropy', 'train_proba'),
        '-t', test_file,
        '--column-description', cd_file,
        '--boosting-type', boosting_type,
        '-i', '3',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--ctr', ctr_type
    )
    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)
    return [local_canonical_file(output_eval_path)]

示例#13

0

显示文件

def test_has_time(boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    cd_file = data_file('adult', 'train.cd')
    test_file = data_file('adult', 'test_small')
    params = (
        '--use-best-model', 'false',
        '--loss-function', 'Logloss',
        '-f', data_file('adult', 'train_small'),
        '-t', test_file,
        '--column-description', cd_file,
        '--boosting-type', boosting_type,
        '-i', '10',
        '-w', '0.03',
        '-T', '4',
        '-r', '0',
        '--has-time',
        '-m', output_model_path,
    )
    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)
    return [local_canonical_file(output_eval_path)]

示例#14

0

显示文件

文件： test.py 项目： PashaSmirnov/catboost

def test_verbose_int():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    tmpfile = 'test_data_dumps'

    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=5)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 2)
    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=False)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 0)
    with LogStdout(open(tmpfile, 'w')):
        cv(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss"}, verbose=True)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 10)

    log_files = []
    for i in range(3):
        log_files.append(JSON_LOG_PATH[:-5]+str(i)+JSON_LOG_PATH[-5:])

    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[0]}, verbose=5)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 2)
    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[1]}, verbose=False)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 0)
    with LogStdout(open(tmpfile, 'w')):
        train(pool, {"iterations": 10, "random_seed": 0, "loss_function": "Logloss", "json_log": log_files[2]}, verbose=True)
    with open(tmpfile, 'r') as output:
        assert(sum(1 for line in output) == 10)

    canonical_files = []

    for log_file in log_files:
        canonical_files.append(local_canonical_file(remove_time_from_json(log_file)))
    return canonical_files

示例#15

0

显示文件

def test_feature_id_fstr():
    model_path = yatest.common.test_output_path('adult_model.bin')
    output_fstr_path = yatest.common.test_output_path('fstr.tsv')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function',
        'Logloss',
        '-f',
        data_file('adult', 'train_small'),
        '--column-description',
        data_file('adult', 'train.cd'),
        '-i',
        '10',
        '-T',
        '4',
        '-r',
        '0',
        '-m',
        model_path,
    )
    yatest.common.execute(cmd)

    fstr_cmd = (
        CATBOOST_PATH,
        'fstr',
        '--input-path',
        data_file('adult', 'train_small'),
        '--column-description',
        data_file('adult_with_id.cd'),
        '-m',
        model_path,
        '-o',
        output_fstr_path,
    )
    yatest.common.execute(fstr_cmd)

    return local_canonical_file(output_fstr_path)

示例#16

0

显示文件

文件： test.py 项目： supr1921/catboost

def test_class_weight_with_lost_class():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'MultiClass',
        '-f', data_file('cloudness_lost_class', 'train_small'),
        '-t', data_file('cloudness_lost_class', 'test_small'),
        '--column-description', data_file('cloudness_lost_class', 'train.cd'),
        '-i', '10',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--eval-file', output_eval_path,
        '--classes-count', '3',
        '--class-weights', '0.5,2,2'
    )
    yatest.common.execute(cmd)

    return [local_canonical_file(output_eval_path)]

示例#17

0

显示文件

文件： test.py 项目： supr1921/catboost

def test_meta():
    pool = 'no_split'
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    meta_path = 'meta.tsv'
    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'RMSE',
        '-f', data_file(pool, 'train_full3'),
        '-t', data_file(pool, 'test3'),
        '--column-description', data_file(pool, 'train_full3.cd'),
        '-i', '10',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--eval-file', output_eval_path,
        '--name', 'test experiment',
    )
    yatest.common.execute(cmd)

    return [local_canonical_file(meta_path)]

示例#18

0

显示文件

文件： test.py 项目： supr1921/catboost

def test_multi_leaf_estimation_method(leaf_estimation_method):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'MultiClass',
        '-f', data_file('cloudness_small', 'train_small'),
        '-t', data_file('cloudness_small', 'test_small'),
        '--column-description', data_file('cloudness_small', 'train.cd'),
        '-i', '10',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--eval-file', output_eval_path,
        '--leaf-estimation-method', leaf_estimation_method,
        '--gradient-iterations', '2'
    )
    yatest.common.execute(cmd)

    return [local_canonical_file(output_eval_path)]

示例#19

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_nan_mode(nan_mode, boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    test_file = data_file('adult_nan', 'test_small')
    cd_file = data_file('adult_nan', 'train.cd')

    params = {
        '--use-best-model': 'false',
        '-f': data_file('adult_nan', 'train_small'),
        '-t': test_file,
        '--column-description': cd_file,
        '--boosting-type': boosting_type,
        '-i': '20',
        '-T': '4',
        '-r': '0',
        '-m': output_model_path,
        '--nan-mode': nan_mode
    }

    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)
    return [local_canonical_file(output_eval_path)]

示例#20

0

显示文件

文件： test_gpu.py 项目： chunziwang/catboost

def test_custom_priors(boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    test_file = data_file('adult', 'test_small')
    cd_file = data_file('adult', 'train.cd')
    params = (
        '--use-best-model', 'false', '--loss-function', 'Logloss', '-f',
        data_file('adult',
                  'train_small'), '-t', test_file, '--column-description',
        cd_file, '--boosting-type', boosting_type, '-i', '10', '-w', '0.03',
        '-T', '4', '-r', '0', '-m', output_model_path, '--ctr',
        'Borders:Prior=-2:Prior=0:Prior=8/3:Prior=1:Prior=-1:Prior=3,'
        'FeatureFreq:Prior=0', '--per-feature-ctr',
        '4:Borders:Prior=0.444,FeatureFreq:Prior=0.444;'
        '6:Borders:Prior=0.666,FeatureFreq:Prior=0.666;'
        '8:Borders:Prior=-0.888:Prior=2/3,FeatureFreq:Prior=-0.888:Prior=0.888'
    )

    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)
    return [local_canonical_file(output_eval_path)]

示例#21

0

显示文件

文件： test_gpu.py 项目： erikalien5595/catboost

def test_fold_len_mult():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    cd_file = data_file('adult_not_binarized', 'train.cd')
    test_file = data_file('adult_not_binarized', 'test_small')
    params = {
        '--use-best-model': 'false',
        '--loss-function': 'Logloss',
        '-f': data_file('adult_not_binarized', 'train_small'),
        '-t': test_file,
        '--column-description': cd_file,
        '--boosting-type': 'Ordered',
        '-i': '10',
        '-T': '4',
        '-r': '0',
        '--fold-len-multiplier': 1.2,
        '-m': output_model_path,
    }
    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)

    return [local_canonical_file(output_eval_path)]

示例#22

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_crossentropy(boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cd_file = data_file('adult_crossentropy', 'train.cd')
    test_file = data_file('adult_crossentropy', 'test_proba')
    params = {
        '--loss-function': 'CrossEntropy',
        '-f': data_file('adult_crossentropy', 'train_proba'),
        '-t': test_file,
        '--column-description': cd_file,
        '--boosting-type': boosting_type,
        '-i': '10',
        '-w': '0.03',
        '-T': '4',
        '-r': '0',
        '-m': output_model_path,
    }

    fit_catboost_gpu(params)
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)
    return [local_canonical_file(output_eval_path)]

示例#23

0

显示文件

文件： test.py 项目： ntopi/catboost

def test_pairlogit_approx_on_full_history():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'PairLogit',
        '-f', data_file('zen', 'learn_small.tsv'),
        '-t', data_file('zen', 'test_small.tsv'),
        '--column-description', data_file('zen', 'zen.cd'),
        '--learn-pairs', data_file('zen', 'learn_pairs.tsv'),
        '--test-pairs', data_file('zen', 'test_pairs.tsv'),
        '--approx-on-full-history',
        '-i', '20',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--eval-file', output_eval_path,
    )
    yatest.common.execute(cmd)

    return [local_canonical_file(output_eval_path)]

示例#24

0

显示文件

文件： test.py 项目： zhouyonglong/catboost

def test_custom_priors():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'Logloss',
        '-f', data_file('adult', 'train_small'),
        '-t', data_file('adult', 'test_small'),
        '--column-description', data_file('adult', 'train.cd'),
        '-i', '10',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--priors', '-2:0:8:1:-1:3',
        '--ctr-priors', '0:0.111,1:0.222',
        '--feature-priors', '4:0.444,6:0.666,8:-0.888:0.888',
        '--feature-ctr-priors', '4:0:0.4040,8:1:0.8181',
        '--eval-file', output_eval_path,
    )
    yatest.common.execute(cmd)
    return [local_canonical_file(output_eval_path)]

示例#25

0

显示文件

def test_shap():
    train_pool = Pool([[0, 0], [0, 1], [1, 0], [1, 1]], [0, 1, 5, 8], cat_features=[])
    test_pool = Pool([[0, 0], [0, 1], [1, 0], [1, 1]])
    model = CatBoostRegressor(iterations=1, random_seed=0, max_ctr_complexity=1, depth=2)
    model.fit(train_pool)
    shap_values = model.get_feature_importance(fstr_type=EFstrType.ShapValues, data=test_pool)

    dataset = [(0.5, 1.2), (1.6, 0.5), (1.8, 1.0), (0.4, 0.6), (0.3, 1.6), (1.5, 0.2)]
    labels = [1.1, 1.85, 2.3, 0.7, 1.1, 1.6]
    train_pool = Pool(dataset, labels, cat_features=[])

    model = CatBoost({'iterations': 10, 'random_seed': 0, 'max_ctr_complexity': 1})
    model.fit(train_pool)

    testset = [(0.6, 1.2), (1.4, 0.3), (1.5, 0.8), (1.4, 0.6)]
    predictions = model.predict(testset)
    shap_values = model.get_feature_importance(fstr_type=EFstrType.ShapValues, data=Pool(testset))
    assert(len(predictions) == len(shap_values))
    for pred_idx in range(len(predictions)):
        assert(abs(sum(shap_values[pred_idx]) - predictions[pred_idx]) < 1e-9)

    np.savetxt(FIMP_TXT_PATH, shap_values)
    return local_canonical_file(FIMP_TXT_PATH)

示例#26

0

显示文件

文件： test.py 项目： tikyau/catboost

def test_only_categorical_features():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'Logloss',
        '-f', data_file('adult', 'train_small'),
        '-t', data_file('adult', 'test_small'),
        '--column-description', data_file('adult_all_categorical.cd'),
        '-i', '100',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--eval-file', output_eval_path,
        '-x', '1',
        '-n', '8',
        '-w', '0.1',
    )
    yatest.common.execute(cmd)

    return [local_canonical_file(output_eval_path)]

示例#27

0

显示文件

def test_eval_set():
    dataset = [(1, 2, 3, 4), (2, 2, 3, 4), (3, 2, 3, 4), (4, 2, 3, 4)]
    labels = [1, 2, 3, 4]
    train_pool = Pool(dataset, labels, cat_features=[0, 3, 2])

    model = CatBoost({
        'learning_rate': 1,
        'loss_function': 'RMSE',
        'iterations': 2,
        'random_seed': 0
    })

    eval_dataset = [(5, 6, 6, 6), (6, 6, 6, 6)]
    eval_labels = [5, 6]
    eval_pool = (eval_dataset, eval_labels)

    model.fit(train_pool, eval_set=eval_pool)

    eval_pools = [eval_pool]

    model.fit(train_pool, eval_set=eval_pools)

    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#28

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_fstr(fstr_type, boosting_type):
    model_path = yatest.common.test_output_path('adult_model.bin')
    output_fstr_path = yatest.common.test_output_path('fstr.tsv')

    fit_params = ('--use-best-model', 'false', '--loss-function', 'Logloss',
                  '-f', data_file('adult',
                                  'train_small'), '--column-description',
                  data_file('adult', 'train.cd'), '--boosting-type',
                  boosting_type, '-i', '10', '-w', '0.03', '-T', '4', '-r',
                  '0', '--one-hot-max-size', '10', '-m', model_path)

    if fstr_type == 'ShapValues':
        fit_params += ('--max-ctr-complexity', '1')

    fit_catboost_gpu(fit_params)

    fstr_params = ('--input-path',
                   data_file('adult', 'train_small'), '--column-description',
                   data_file('adult', 'train.cd'), '-m', model_path, '-o',
                   output_fstr_path, '--fstr-type', fstr_type)
    fstr_catboost_cpu(fstr_params)

    return local_canonical_file(output_fstr_path)

示例#29

0

显示文件

文件： large_dist_test.py 项目： zgj-gutou/catboost

def test_dist_train_many_trees(dev_score_calc_obj_block_size):
    pool_path = data_file('higgs', 'train_small')
    test_path = data_file('higgs', 'test_small')
    cd_path = data_file('higgs', 'train.cd')
    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function',
        'Logloss',
        '-f',
        pool_path,
        '-t',
        test_path,
        '--column-description',
        cd_path,
        '-i',
        '1000',
        '-w',
        '0.03',
        '-T',
        '4',
        '--random-strength',
        '0',
        '--has-time',
        '--bootstrap-type',
        'No',
        '--dev-score-calc-obj-block-size',
        dev_score_calc_obj_block_size,
    )

    eval_path = yatest.common.test_output_path('test.eval')
    execute_dist_train(cmd + (
        '--eval-file',
        eval_path,
    ))

    return [local_canonical_file(eval_path)]

示例#30

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_quantized_pool(loss_function, boosting_type):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    quantized_train_file = 'quantized://' + data_file('quantized_adult',
                                                      'train.qbin')
    quantized_test_file = 'quantized://' + data_file('quantized_adult',
                                                     'test.qbin')
    params = (
        '--use-best-model',
        'false',
        '--loss-function',
        loss_function,
        '-f',
        quantized_train_file,
        '-t',
        quantized_test_file,
        '--boosting-type',
        boosting_type,
        '-i',
        '10',
        '-w',
        '0.03',
        '-T',
        '4',
        '-r',
        '0',
        '-m',
        output_model_path,
    )

    fit_catboost_gpu(params)
    cd_file = data_file('quantized_adult', 'pool.cd')
    test_file = data_file('quantized_adult', 'test_small.tsv')
    apply_catboost(output_model_path, test_file, cd_file, output_eval_path)

    return [local_canonical_file(output_eval_path)]

示例#31

0

显示文件

文件： test.py 项目： tikyau/catboost

def test_custom_priors():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'Logloss',
        '-f', data_file('adult', 'train_small'),
        '-t', data_file('adult', 'test_small'),
        '--column-description', data_file('adult', 'train.cd'),
        '-i', '10',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--ctr', 'Borders:Prior=-2:Prior=0:Prior=8:Prior=1:Prior=-1:Prior=3,'
                 'Counter:Prior=0',
        '--per-feature-ctr', '4:Borders:Prior=0.444,Counter:Prior=0.444;'
                             '6:Borders:Prior=0.666,Counter:Prior=0.666;'
                             '8:Borders:Prior=-0.888:Prior=0.888,Counter:Prior=-0.888:Prior=0.888',
        '--eval-file', output_eval_path,
    )
    yatest.common.execute(cmd)
    return [local_canonical_file(output_eval_path)]

示例#32

0

显示文件

文件： test.py 项目： tikyau/catboost

def test_weight_sampling_per_tree():
    output_model_path = yatest.common.test_output_path('model.bin')
    output_eval_path = yatest.common.test_output_path('test.eval')
    learn_error_path = yatest.common.test_output_path('learn_error.tsv')
    test_error_path = yatest.common.test_output_path('test_error.tsv')

    cmd = (
        CATBOOST_PATH,
        'fit',
        '--loss-function', 'Logloss',
        '-f', data_file('adult', 'train_small'),
        '-t', data_file('adult', 'test_small'),
        '--column-description', data_file('adult', 'train.cd'),
        '-i', '10',
        '-T', '4',
        '-r', '0',
        '-m', output_model_path,
        '--eval-file', output_eval_path,
        '--learn-err-log', learn_error_path,
        '--test-err-log', test_error_path,
        '--weight-sampling-frequency', 'PerTree',
    )
    yatest.common.execute(cmd)
    return local_canonical_file(output_eval_path)

示例#33

0

显示文件

文件： test.py 项目： iamnik13/catboost

def compare_canonical_models(*args, **kwargs):
    return local_canonical_file(*args, diff_tool=model_diff_tool, **kwargs)

示例#34

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_python_export_no_cat_features():
    train_pool = Pool(QUERYWISE_TRAIN_FILE, column_description=QUERYWISE_CD_FILE)
    model = CatBoost({'iterations': 2, 'random_seed': 0, 'loss_function': 'RMSE'})
    model.fit(train_pool)
    model.save_model(OUTPUT_PYTHON_MODEL_PATH, format="python")
    return local_canonical_file(OUTPUT_PYTHON_MODEL_PATH)

示例#35

0

显示文件

文件： test.py 项目： shodanium/catboost

def compare_canonical_models(*args, **kwargs):
    return local_canonical_file(*args, diff_tool=model_diff_tool, **kwargs)

示例#36

0

显示文件

文件： test.py 项目： shodanium/catboost

def test_feature_importance():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=5, random_seed=0)
    model.fit(pool)
    np.save(FIMP_PATH, np.array(model.feature_importances_))
    return local_canonical_file(FIMP_PATH)

示例#37

0

显示文件

def test_cv_with_not_binarized_target():
    train_file = data_file('adult_not_binarized', 'train_small')
    cd = data_file('adult_not_binarized', 'train.cd')
    pool = Pool(train_file, column_description=cd)
    cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"})
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#38

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_python_export_with_cat_features():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoost({'iterations': 20, 'random_seed': 0})
    model.fit(train_pool)
    model.save_model(OUTPUT_PYTHON_MODEL_PATH, format="python")
    return local_canonical_file(OUTPUT_PYTHON_MODEL_PATH)

示例#39

0

显示文件

文件： test.py 项目： kevin3ma/catboost

def test_one_doc_feature_importance():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=5, random_seed=0)
    model.fit(pool)
    np.save(FIMP_PATH, np.array(model.get_feature_importance(np.ones(pool.num_col(), dtype=int), 0, cat_features=pool.get_cat_feature_indices(), fstr_type='Doc')))
    return local_canonical_file(FIMP_PATH)

示例#40

0

显示文件

def test_cv_logging():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"})
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#41

0

显示文件

文件： test_gpu.py 项目： tanmaypandey7/catboost

def test_train_on_binarized_equal_train_on_float(boosting_type, qwise_loss):
    output_model_path = yatest.common.test_output_path('model.bin')
    output_model_path_binarized = yatest.common.test_output_path(
        'model_binarized.bin')
    test_error_path = yatest.common.test_output_path('test_error.tsv')
    learn_error_path = yatest.common.test_output_path('learn_error.tsv')

    borders_file = yatest.common.test_output_path('borders.tsv')
    borders_file_output = borders_file + '.out'
    predictions_path_learn = yatest.common.test_output_path(
        'predictions_learn.tsv')
    predictions_path_learn_binarized = yatest.common.test_output_path(
        'predictions_learn_binarized.tsv')
    predictions_path_test = yatest.common.test_output_path(
        'predictions_test.tsv')
    predictions_path_test_binarized = yatest.common.test_output_path(
        'predictions_test_binarized.tsv')

    learn_file = data_file('querywise', 'train')
    cd_file = data_file('querywise', 'train.cd')
    test_file = data_file('querywise', 'test')
    params = {
        "--loss-function": qwise_loss,
        "-f": learn_file,
        "-t": test_file,
        '--column-description': cd_file,
        '--boosting-type': boosting_type,
        '-i': '100',
        '-T': '4',
        '-r': '0',
        '-m': output_model_path,
        '--learn-err-log': learn_error_path,
        '--test-err-log': test_error_path,
        '--use-best-model': 'false',
        '--output-borders-file': borders_file_output,
    }

    params_binarized = dict(params)
    params_binarized['--input-borders-file'] = borders_file_output
    params_binarized['--output-borders-file'] = borders_file
    params_binarized['-m'] = output_model_path_binarized

    fit_catboost_gpu(params)
    apply_catboost(output_model_path, learn_file, cd_file,
                   predictions_path_learn)
    apply_catboost(output_model_path, test_file, cd_file,
                   predictions_path_test)

    # learn_error_path and test_error_path already exist after first fit_catboost_gpu() call
    # and would be automatically marked as input_data for YT operation,
    # which will lead to error, because input files are available only for reading.
    # That's why we explicitly drop files from input_data and implicitly add them to output_data.
    fit_catboost_gpu(params_binarized,
                     input_data={
                         learn_error_path: None,
                         test_error_path: None
                     })

    apply_catboost(output_model_path_binarized, learn_file, cd_file,
                   predictions_path_learn_binarized)
    apply_catboost(output_model_path_binarized, test_file, cd_file,
                   predictions_path_test_binarized)

    assert (filecmp.cmp(predictions_path_learn,
                        predictions_path_learn_binarized))
    assert (filecmp.cmp(predictions_path_test,
                        predictions_path_test_binarized))

    return [
        local_canonical_file(learn_error_path, diff_tool=diff_tool()),
        local_canonical_file(test_error_path, diff_tool=diff_tool()),
        local_canonical_file(predictions_path_test, diff_tool=diff_tool()),
        local_canonical_file(predictions_path_learn, diff_tool=diff_tool()),
        local_canonical_file(borders_file, diff_tool=diff_tool())
    ]

示例#42

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_shap_feature_importance():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=5, random_seed=0, max_ctr_complexity=1)
    model.fit(pool)
    np.save(FIMP_PATH, np.array(model.get_feature_importance(pool, fstr_type='ShapValues')))
    return local_canonical_file(FIMP_PATH)

示例#43

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_cv_logging():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"})
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#44

0

显示文件

文件： test.py 项目： Xiaodingdangguaiguai/catboost

def test_cv_with_not_binarized_target():
    train_file = data_file('adult_not_binarized', 'train_small')
    cd = data_file('adult_not_binarized', 'train.cd')
    pool = Pool(train_file, column_description=cd)
    cv(pool, {"iterations": 5, "random_seed": 0, "loss_function": "Logloss"})
    return local_canonical_file(remove_time_from_json(JSON_LOG_PATH))

示例#45

0

显示文件

文件： test.py 项目： iamnik13/catboost

def test_interaction_feature_importance():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=5, random_seed=0)
    model.fit(pool)
    np.save(FIMP_PATH, np.array(model.get_feature_importance(pool, fstr_type='Interaction')))
    return local_canonical_file(FIMP_PATH)

示例#46

0

显示文件

文件： test.py 项目： iamnik13/catboost

def test_one_doc_feature_importance():
    pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoostClassifier(iterations=5, random_seed=0)
    model.fit(pool)
    np.save(FIMP_PATH, np.array(model.get_feature_importance(np.ones(pool.num_col(), dtype=int), 0, cat_features=pool.get_cat_feature_indices(), fstr_type='Doc')))
    return local_canonical_file(FIMP_PATH)

示例#47

0

显示文件

def test_python_export_with_cat_features():
    train_pool = Pool(TRAIN_FILE, column_description=CD_FILE)
    model = CatBoost({'iterations': 20, 'random_seed': 0})
    model.fit(train_pool)
    model.save_model(OUTPUT_PYTHON_MODEL_PATH, format="python")
    return local_canonical_file(OUTPUT_PYTHON_MODEL_PATH)