Exemplo n.º 1
0
create_cd(label=0,
          cat_features=list(range(1, train_df.shape[1])),
          feature_names=features,
          output_path=os.path.join(dataset_dir, 'train.cd'))

# Store data set in a Pool class
# *Notes: Tinker with Pool more
pool_b = Pool(data=os.path.join(dataset_dir, 'train.csv'),
              delimiter=',',
              column_description=os.path.join(dataset_dir, 'train.cd'),
              has_header=True)

# Check the contents
print('Dataset shape: {}\n'.format(pool_b.shape))
print('Column names: {}'.format(pool_b.get_feature_names()))

# Fit models
CatBoostClassifier(iterations=3).fit(X, y, cat_features=cat_features)
CatBoostClassifier(iterations=3).fit(pool_a)
CatBoostClassifier(iterations=3).fit(pool_b)

# Split into train & validation sets
data = train_test_split(X, y, test_size=0.2, random_state=random_seed)
X_train, X_validation, y_train, y_validation = data

train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)

validation_pool = Pool(data=X_validation,
                       label=y_validation,
                       cat_features=cat_features)