Exemplo n.º 1
0
def test_fit_data():
    pool = Pool(CLOUDNESS_TRAIN_FILE, column_description=CLOUDNESS_CD_FILE)
    eval_pool = Pool(CLOUDNESS_TEST_FILE, column_description=CLOUDNESS_CD_FILE)
    base_model = CatBoostClassifier(iterations=2,
                                    random_seed=0,
                                    loss_function="MultiClass")
    base_model.fit(pool)
    baseline = np.array(
        base_model.predict(pool, prediction_type='RawFormulaVal'))
    eval_baseline = np.array(
        base_model.predict(eval_pool, prediction_type='RawFormulaVal'))
    eval_pool._set_baseline(eval_baseline)
    model = CatBoostClassifier(iterations=2,
                               random_seed=0,
                               loss_function="MultiClass")
    data = map_cat_features(pool.get_features(),
                            pool.get_cat_feature_indices())
    model.fit(data,
              pool.get_label(),
              pool.get_cat_feature_indices(),
              sample_weight=np.arange(1,
                                      pool.num_row() + 1),
              baseline=baseline,
              use_best_model=True,
              eval_set=eval_pool)
    model.save_model(OUTPUT_MODEL_PATH)
    return local_canonical_file(OUTPUT_MODEL_PATH)