def test_staged_predict(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) test_pool = Pool(TEST_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=10, random_seed=0) model.fit(train_pool) preds = [] for pred in model.staged_predict(test_pool): preds.append(pred) np.save(PREDS_PATH, np.array(preds)) return local_canonical_file(PREDS_PATH)
# Fit model model.fit(train_pool) # Only prediction_type='RawFormulaVal' is allowed with custom `loss_function` preds_raw = model.predict(X_test, prediction_type='RawFormulaVal') # ### 3.7 Staged Predict # CatBoost model has `staged_predict` method. It allows you to iteratively get predictions for a given range of trees. # In[29]: model = CatBoostClassifier(iterations=10, random_seed=42, logging_level='Silent').fit(train_pool) ntree_start, ntree_end, eval_period = 3, 9, 2 predictions_iterator = model.staged_predict(validate_pool, 'Probability', ntree_start, ntree_end, eval_period) for preds, tree_count in zip(predictions_iterator, range(ntree_start, ntree_end, eval_period)): print('First class probabilities using the first {} trees: {}'.format( tree_count, preds[:5, 1])) # ### 3.8 Feature Importances # Sometimes it is very important to understand which feature made the greatest contribution to the final result. To do this, the CatBoost model has a `get_feature_importance` method. # In[30]: model = CatBoostClassifier(iterations=50, random_seed=42, logging_level='Silent').fit(train_pool) feature_importances = model.get_feature_importance(train_pool)