sub['Outlet_Identifier'] = test['Outlet_Identifier']
sub.to_csv('pseudo-labelling.csv', index='False')

model_factory = [
    XGBRegressor(nthread=1),
    PseudoLabeler(
        XGBRegressor(nthread=1),
        test,
        features,
        target,
        sample_rate=0.2  #0.3
    ),
]

for model in model_factory:
    model.seed = 42
    num_folds = 2  #8

    scores = cross_val_score(model,
                             X_train,
                             y_train,
                             cv=num_folds,
                             scoring='neg_mean_squared_error')  #n_jobs=8
    score_description = "MSE: %0.4f (+/- %0.4f)" % (np.sqrt(
        scores.mean() * -1), scores.std() * 2)

    print('{model:25} CV-{num_folds} {score_cv}'.format(
        model=model.__class__.__name__,
        num_folds=num_folds,
        score_cv=score_description))
'''