def train_preprocessor(path='.', train='train.csv'): print('start train trash preprocessor...') df = pd.read_csv(os.path.join(path, train)) train_data = df[:-100] validation_data = df[-100: -50] vectorizer = CountVectorizer() x_train_counts = vectorizer.fit_transform(train_data.text) x_validation_counts = vectorizer.transform(validation_data.text) model = CatBoostClassifier(iterations=250, train_dir=path, logging_level='Silent', allow_writing_files=False ) model.fit(X=x_train_counts.toarray(), y=train_data.status, eval_set=(x_validation_counts.toarray(), validation_data.status), use_best_model=True,) model.save_model(os.path.join(path, 'trash_model')) joblib.dump(vectorizer,os.path.join(path, 'trash_vectorizer')) print('end train sentiment preprocessor...')
def test_full_history(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) test_pool = Pool(TEST_FILE, column_description=CD_FILE) model = CatBoostClassifier(od_type='Iter', od_wait=20, random_seed=42, approx_on_full_history=True) model.fit(train_pool, eval_set=test_pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_zero_baseline(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) baseline = np.zeros(pool.num_row()) pool.set_baseline(baseline) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_predict_sklearn_class(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=2, random_seed=0, loss_function='Logloss:border=0.5') model.fit(train_pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_classification_ctr(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=5, random_seed=0, ctr_description=['Borders', 'Counter']) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_zero_baseline(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) baseline = np.zeros((pool.num_row(), 2)) pool.set_baseline(baseline) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_non_ones_weight(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) weight = np.arange(1, pool.num_row() + 1) pool.set_weight(weight) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_class_weights(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=5, random_seed=0, class_weights=[1, 2]) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_zero_baseline(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) baseline = np.zeros((pool.num_row(), 2)) pool = Pool(pool.get_features(), pool.get_label(), baseline=baseline) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return local_canonical_file(OUTPUT_MODEL_PATH)
def test_non_ones_weight(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) weight = np.arange(1, pool.num_row()+1) pool.set_weight(weight) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_ones_weight(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) pool2 = Pool(pool.get_features(), pool.get_label(), weight=np.ones(pool.num_row())) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool2) model.save_model(OUTPUT_MODEL_PATH) return local_canonical_file(OUTPUT_MODEL_PATH)
def save_catboost_model(catboost_model: CatBoostClassifier, model_name: str, pool_data: Pool) -> None: """Saves model `catboost_model` to `PATH_MODELS` with the name passed in `model_name` `pool_data` contains `Pool` object with features and labels used to fit the model and its categorical features """ catboost_model.save_model(str(PATH_MODELS / model_name), pool=pool_data)
def test_priors(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=5, random_seed=0, has_time=True, priors=[0, 0.6, 1, 5]) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_multiclass(): pool = Pool(CLOUDNESS_TRAIN_FILE, column_description=CLOUDNESS_CD_FILE) classifier = CatBoostClassifier(iterations=2, random_seed=0, loss_function='MultiClass', thread_count=8) classifier.fit(pool) classifier.save_model(OUTPUT_MODEL_PATH) new_classifier = CatBoostClassifier() new_classifier.load_model(OUTPUT_MODEL_PATH) pred = new_classifier.predict_proba(pool) np.save(PREDS_PATH, np.array(pred)) return local_canonical_file(PREDS_PATH)
def test_serialization_of_numpy_objects_save_model(): train_pool = Pool(*random_xy(10, 5)) model = CatBoostClassifier( iterations=np.int64(2), random_seed=np.int32(0), loss_function='Logloss' ) model.fit(train_pool) model.save_model(OUTPUT_MODEL_PATH, format='coreml', export_parameters=get_values_that_json_dumps_breaks_on())
def test_non_zero_bazeline(): pool = Pool(CLOUDNESS_TRAIN_FILE, column_description=CLOUDNESS_CD_FILE) base_model = CatBoostClassifier(iterations=2, random_seed=0, loss_function="MultiClass") base_model.fit(pool) baseline = np.array(base_model.predict(pool)) pool2 = Pool(pool.get_features(), pool.get_label(), baseline=baseline) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool2) model.save_model(OUTPUT_MODEL_PATH) return local_canonical_file(OUTPUT_MODEL_PATH)
def fit_chain(): train_file = '.././chains.df' cd_file = '.././chains.cd' train_pool = Pool(train_file, column_description=cd_file) model = CatBoostClassifier(depth=3, iterations=100, eval_metric='F1', task_type='CPU') model.fit(train_pool) model.save_model('chain.model')
def run_ex_gal(exp_name, data_type): tes_m = feather.read_dataframe('../others/tes_m.feather') le = load_pickle('../others/label_encoder.pkl') y = le.transform(np.load('../others/train_target.npy')) distmod_mask = np.load('../others/distmod_mask.npy') W = np.load('../others/W.npy') pseudo_idx = np.load('../others/pseudo_idx.npy') class_names = [99, 95, 92, 90, 88, 67, 65, 64, 62, 53, 52, 42, 16, 15, 6] obj_class = 90 W_tr = np.zeros(14) W_tr[le.transform(class_names[1:])] = W[1:] real_weight = get_real_weight(y, W_tr) ex_gal_labels = np.where(np.bincount(y[distmod_mask]) != 0)[0] ex_gal_label_map = np.zeros(np.max(ex_gal_labels) + 1, dtype=np.int32) ex_gal_label_map[ex_gal_labels] = np.arange(ex_gal_labels.shape[0]) X, X_pseudo = load_data_ex_gal(exp_name, data_type, pseudo_idx) y_pseudo = np.full(pseudo_idx.sum(), ex_gal_label_map[le.transform([obj_class])][0]) params = { 'iterations': 10000, 'learning_rate': 0.1, 'depth': 3, 'loss_function': 'MultiClass', 'colsample_bylevel': 0.7, 'random_seed': 0, 'class_weights': real_weight[ex_gal_labels] / real_weight[ex_gal_labels].sum() } iterations = load_pickle('../fi/' + exp_name + '_rounds.pkl') iteration = iterations[data_type] params['iterations'] = iteration print('iteration: ' + str(params['iterations'])) orig_size = np.bincount( ex_gal_label_map[y[distmod_mask]])[ex_gal_label_map[le.transform( [obj_class])][0]] whole_data = np.concatenate((X[distmod_mask], X_pseudo), axis=0) whole_labels = np.concatenate( (ex_gal_label_map[y[distmod_mask]], y_pseudo), axis=0) after_size = np.bincount(whole_labels)[ex_gal_label_map[le.transform( [obj_class])][0]] sample_weight = np.ones(whole_labels.shape[0]) sample_weight[whole_labels == ex_gal_label_map[le.transform([obj_class])] [0]] = orig_size / after_size model = CatBoostClassifier(**params) model.fit(whole_data, whole_labels, sample_weight=sample_weight) model.save_model('../models/' + exp_name + '_' + data_type + '.cbm')
def train(odir, trname, tsname, split_col, rs, train_size): """Train gradient boosting model.""" # To get consistent categories, we concatenate train and test train = pd.read_csv(trname, index_col="PassengerId") test = pd.read_csv(tsname, index_col="PassengerId") fts_cols = train.columns.drop("Survived") # Creating training/validation split cv = StratifiedShuffleSplit(n_splits=1, train_size=train_size, random_state=rs) tridx, cvidx = list(cv.split(train, train[split_col]))[0] # Fill missing values train.fillna(train.iloc[tridx].mean()[["Age", "Fare"]], inplace=True) test.fillna(train.iloc[tridx].mean()[["Age", "Fare"]], inplace=True) # Creating the model model = CatBoostClassifier(iterations=500, depth=4, rsm=0.75, learning_rate=0.001, early_stopping_rounds=250, random_state=rs, use_best_model=True) model.fit(train.iloc[tridx][fts_cols], train.iloc[tridx]["Survived"], eval_set=(train.iloc[cvidx][fts_cols], train.iloc[cvidx]["Survived"]), verbose=50) # Measuring performance cv_predictions = model.predict_proba(train.iloc[cvidx][fts_cols])[:, 1] auc = roc_auc_score(train.iloc[cvidx, 0], cv_predictions) acc = accuracy_score(train.iloc[cvidx, 0], cv_predictions > 0.5) # Saving the model, metrics file and submission odir = pathlib.Path(odir) model.save_model(odir.joinpath("cb-model.cbm").as_posix()) with open(odir.joinpath("cb-metrics.json"), "w") as metrics_file: metrics_file.write(json.dumps({"AUC": auc, "Accuracy": acc})) submission = pd.Series(model.predict(test.values), name="Survived", index=test.index, dtype=np.int) submission.to_csv(odir.joinpath("cb-submission.csv"), header=True)
def train_catboost(self): catboost_pool = self.to_ml_input(self.train_pool.pool, "train") self.logger.info("train_catboost iterations count={}".format(self.args.iter_count)) model = CatBoostClassifier(iterations=self.args.iter_count, depth=4, logging_level="Debug", loss_function='MultiClass', #verbose=True ) model.fit(catboost_pool) model.save_model(self.args.model_path)
def test_od(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) test_pool = Pool(TEST_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=1000, learning_rate=0.03, od_type='Iter', od_wait=20, random_seed=42) model.fit(train_pool, eval_set=test_pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_ignored_features(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) test_pool = Pool(TEST_FILE, column_description=CD_FILE) model1 = CatBoostClassifier(iterations=5, random_seed=0, ignored_features=[1, 2, 3]) model2 = CatBoostClassifier(iterations=5, random_seed=0) model1.fit(train_pool) model2.fit(train_pool) predictions1 = model1.predict(test_pool) predictions2 = model2.predict(test_pool) assert not _check_data(predictions1, predictions2) model1.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_priors(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=5, random_seed=0, has_time=True, ctr_description=[ "Borders:Prior=0:Prior=0.6:Prior=1:Prior=5", "Counter:Prior=0:Prior=0.6:Prior=1:Prior=5" ]) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def main() -> None: gameid_column = 'game_id' all_csv_file_pathes = glob.glob('../data/*/*.csv') all_frames = [] for csv_filepath in all_csv_file_pathes: game_frame = pd.read_csv(csv_filepath, index_col=False, sep=';') game_frame.insert(0, 'game_id', csv_filepath) all_frames.append(game_frame) all_games = pd.concat(all_frames, axis=0, ignore_index=True) frame = all_games.copy() cols = frame.columns[frame.dtypes.eq('object')] for col in cols: frame[col] = frame[col].astype('category') if col is not gameid_column: column_mapping = dict(enumerate(frame[col].cat.categories)) save_column_mapping(col, column_mapping) frame[col] = frame[col].cat.codes defects = frame.loc[frame["CT-Win"] == -1] frame.drop(defects.index, inplace=True) frame.dropna(axis='rows') y = frame["CT-Win"].astype(int) X = frame.drop(["CT-Win"], axis=1) X = X.drop(['game_id'], axis=1) all_column_map = {} for col_idx, col_name in enumerate(X.columns): all_column_map[col_idx] = col_name save_column_mapping('order.json', all_column_map) model = CatBoostClassifier( loss_function='Logloss', eval_metric="AUC", border_count=CTB_MODEL_PARAMETERS['border_count'], thread_count=CTB_MODEL_PARAMETERS['thread_count'], random_seed=CTB_MODEL_PARAMETERS['random_seed'], depth=CTB_MODEL_PARAMETERS['depth'], od_wait=CTB_MODEL_PARAMETERS['od_wait'], l2_leaf_reg=CTB_MODEL_PARAMETERS['l2_leaf_reg'], iterations=CTB_MODEL_PARAMETERS['iterations'], learning_rate=CTB_MODEL_PARAMETERS['learning_rate'], od_type='Iter') ctb_data = Pool(X, y) model.fit(ctb_data, verbose=False) os.makedirs('./' + MODEL_FOLDER, exist_ok=True) model.save_model('./' + MODEL_FOLDER + '/' + MODEL_NAME)
def test_fit_data(): pool = Pool(CLOUDNESS_TRAIN_FILE, column_description=CLOUDNESS_CD_FILE) eval_pool = Pool(CLOUDNESS_TEST_FILE, column_description=CLOUDNESS_CD_FILE) base_model = CatBoostClassifier(iterations=2, random_seed=0, loss_function="MultiClass") base_model.fit(pool) baseline = np.array(base_model.predict(pool, prediction_type='RawFormulaVal')) eval_baseline = np.array(base_model.predict(eval_pool, prediction_type='RawFormulaVal')) eval_pool.set_baseline(eval_baseline) model = CatBoostClassifier(iterations=2, random_seed=0, loss_function="MultiClass") data = map_cat_features(pool.get_features(), pool.get_cat_feature_indices()) model.fit(data, pool.get_label(), pool.get_cat_feature_indices(), sample_weight=np.arange(1, pool.num_row()+1), baseline=baseline, use_best_model=True, eval_set=eval_pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_non_zero_bazeline(): pool = Pool(CLOUDNESS_TRAIN_FILE, column_description=CLOUDNESS_CD_FILE) base_model = CatBoostClassifier(iterations=2, random_seed=0, loss_function="MultiClass") base_model.fit(pool) baseline = np.array( base_model.predict(pool, prediction_type='RawFormulaVal')) pool.set_baseline(baseline) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
class CatboostEnsemble(Ensemble): def __init__(self, params: dict, dataset: Dataset = None): super().__init__(params, dataset, name='CatboostEnsemble') self.clf = CatBoostClassifier(**params) self.tmp_json_path = '/tmp/catboost.model.json' def fit(self, dataset: Dataset): self.set_dataset(dataset) loss_function = 'MultiClass' if self.dataset.num_classes( ) > 2 else 'Logloss' self.clf.set_params(loss_function=loss_function, verbose=False) self.clf.fit(self.dataset.X, self.dataset.y) self.clf.save_model(self.tmp_json_path, format='json') with open(self.tmp_json_path, 'r') as fp: model = json.load(fp) self.trees = [ CatboostTree.parse(tree, self.dataset) for tree in model['oblivious_trees'] ] def predict_proba(self, dataset: Dataset) -> np.ndarray: if len(self.trees) == 0: raise ValueError('There are no trees available') encoded_dataset = self.encode_dataset(dataset) n_classes = len(self.clf.classes_) # pylint: disable=no-member # TODO: For single tree this is just [tree.predict(...)] preds = np.array( [tree.predict(encoded_dataset.X) for tree in self.trees]) preds = np.sum(preds, axis=0) if n_classes > 2: # https://catboost.ai/docs/concepts/loss-functions-multiclassification.html # Link above suggests different equation for this # results_proba = softmax(preds, axis=1) raise NotImplementedError('Only binary problems are implemented.') else: results_proba = np.array([[1 - v, v] for v in expit(preds)]) return results_proba def predict(self, dataset: Dataset) -> np.ndarray: results_proba = self.predict_proba(dataset) results_cls = np.argmax(results_proba, axis=1) return results_cls
def _test_influence_vs_tf_derivative(leaf_method): base_dir = 'data/adult/' train_documents, train_targets = read_train_documents_and_one_hot_targets( base_dir + 'train_data_catboost_format.tsv' ) train_documents = train_documents[:100] train_targets = train_targets[:100] train_targets = np.argmax(train_targets, axis=1) test_documents, test_targets = read_train_documents_and_one_hot_targets( base_dir + 'test_data_catboost_format.tsv' ) test_targets = np.argmax(test_targets, axis=1) train_dir = base_dir + 'ut_tmp/' if not isdir(train_dir): mkdir(train_dir) cbc_params = read_json_params(base_dir + 'catboost_params.json') cbc_params['iterations'] = 2 cbc_params['leaf_estimation_method'] = leaf_method cbc_params['random_seed'] = 10 cbc_params['train_dir'] = train_dir cbc = CatBoostClassifier(**cbc_params) cbc.set_params(boosting_type='Plain') cbc.fit(train_documents, train_targets) cbc.save_model(train_dir + 'model.bin', format='cbm') export_catboost_to_json(train_dir + 'model.bin', train_dir + 'model.json') full_model = CBLeafInfluenceEnsemble(train_dir + 'model.json', train_documents, train_targets, leaf_method=leaf_method, learning_rate=cbc_params['learning_rate'], loss_function=BinaryCrossEntropyLoss(), update_set='AllPoints') retrained_model_our = deepcopy(full_model) tf_checker = TFGBApplier(full_model, train_documents, train_targets, leaf_method) for remove_idx in np.random.randint(len(train_targets), size=30): full_model.fit(remove_idx, retrained_model_our) pred_ours = full_model(train_documents) pred_theirs = tf_checker.get_predicts() pred_cbc = cbc.predict(train_documents, prediction_type='RawFormulaVal') assert np.allclose(pred_ours, pred_theirs, rtol=1e-3) and np.allclose(pred_ours, pred_cbc, rtol=1e-3), (pred_ours, pred_theirs) der_ours = [t.leaf_values for t in retrained_model_our.influence_trees] der_theirs = tf_checker.get_derivs(remove_idx) assert all(np.allclose(o, t, rtol=1e-2) for o, t in zip(der_ours, der_theirs)), (der_ours, der_theirs) random_train_idx = np.random.randint(len(train_targets)) der_pred_ours = retrained_model_our.loss_derivative(train_documents[[random_train_idx]], train_targets[[random_train_idx]])[0] der_pred_theirs = tf_checker.get_train_prediction_deriv(remove_idx, random_train_idx) assert np.isclose(der_pred_ours, der_pred_theirs, rtol=1e-2), (der_pred_ours, der_pred_theirs)
def train_gbm(n_epochs=100): df = pd.read_csv('./data/df_super.csv') x, y = create_gbm_dataset(df) xtrain, xtest, ytrain, ytest = train_test_split(x, y, train_size=0.9) model = CatBoostClassifier( iterations=n_epochs, learning_rate=0.01 ) model.fit(xtrain, ytrain, eval_set=(xtest, ytest)) model.save_model('./models/gbm.cbm')
def train_call_models(): for i, name in enumerate(names): print(i, name) y_bot = y[y['private_bot_name'] == name] X_game = X.loc[y_bot.index] y_game = y_bot['private_bot_action'].replace({'FOLD': 0, 'CALL': 1, 'RAISE': 1}) X_train, X_test, y_train, y_test = train_test_split(X_game, y_game, test_size=0.1, random_state=1234) model = CatBoostClassifier(iterations=200, learning_rate=0.1, depth=8, thread_count=4, verbose=True, use_best_model=True) model.fit(X_train, y_train, eval_set=(X_test, y_test)) model.save_model(model_dir + 'pool/call/' + str(i) + '.model')
def generate_ensemble_classification(dataset_name, params, alg="sgb", num_models=10): # load and prepare data data_dir = os.path.join('datasets', dataset_name) full_train_file = os.path.join(data_dir, 'full_train') test_file = os.path.join(data_dir, 'test') cd_file = os.path.join(data_dir, 'pool.cd') full_train_pool = Pool(data=full_train_file, column_description=cd_file) test_pool = Pool(data=test_file, column_description=cd_file) # parameters depth = params['depth'] lr = params['lr'] sample = params['sample'] seed = 0 for i in range(num_models): if alg == 'sgb' or alg == 'sgb-fixed': model = CatBoostClassifier(loss_function='Logloss', verbose=False, learning_rate=lr, depth=depth, subsample=sample, bootstrap_type='Bernoulli', custom_metric='ZeroOneLoss', random_seed=seed) if alg == 'sglb' or alg == 'sglb-fixed': model = CatBoostClassifier(loss_function='Logloss', verbose=False, learning_rate=lr, depth=depth, subsample=sample, bootstrap_type='Bernoulli', posterior_sampling=True, custom_metric='ZeroOneLoss', random_seed=seed) seed += 1 # new seed for each ensemble element model.fit(full_train_pool, eval_set=test_pool, use_best_model=False ) # do not use test pool for choosing best iteration model.save_model("results/models/" + dataset_name + "_" + alg + "_" + str(i), format="cbm")
def test_metadata(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier( iterations=2, random_seed=0, loss_function='Logloss:border=0.5', metadata={"type": "AAA", "postprocess": "BBB"} ) model.fit(train_pool) model.save_model(OUTPUT_MODEL_PATH) model2 = CatBoost(model_file=OUTPUT_MODEL_PATH) assert 'type' in model2.metadata_ assert model2.metadata_['type'] == 'AAA' assert 'postprocess' in model2.metadata_ assert model2.metadata_['postprocess'] == 'BBB' return compare_canonical_models(OUTPUT_MODEL_PATH)
def main(args): X, y = get_gbm_database( args.telemetry_path, args.maint_path, args.machines_path, args.errors_path, args.failures_path, seq_len=args.out_seq_len, machine_id=args.machine_id, ) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9) model = CatBoostClassifier(iterations=args.n_iterations, learning_rate=args.gbm_learning_rate) model.fit(X_train, y_train, eval_set=(X_test, y_test)) model.save_model(args.checkpoint_path)
def test_predict_sklearn_class(): train_pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=2, random_seed=0) model.fit(train_pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)
def test_priors(): pool = Pool(TRAIN_FILE, column_description=CD_FILE) model = CatBoostClassifier(iterations=5, random_seed=0, has_time=True, ctr_description=["Borders:Prior=0:Prior=0.6:Prior=1:Prior=5", "Counter:Prior=0:Prior=0.6:Prior=1:Prior=5"]) model.fit(pool) model.save_model(OUTPUT_MODEL_PATH) return compare_canonical_models(OUTPUT_MODEL_PATH)