示例#1
0
def test_save_model_joblib():
    rf = RandomForestClassifier()
    save_model(model=rf, name='tests/randomforest', method='jb')
    expected = 'randomforest.jbl'
    output = os.listdir('tests/')
    assert expected in output
    os.remove('tests/randomforest.jbl')
示例#2
0
def test_get_model():
    rf = RandomForestClassifier()
    save_model(rf,
               name='tests/sampletest/outputs/models/rf_model',
               method='jb')
    output = get_model(path='tests/sampletest/outputs/models/rf_model.jbl')
    assert hasattr(rf, 'fit')
示例#3
0
def test_get_output():
    temp = pd.DataFrame([1, 2, 3, 4, 5, 6])
    save_model(temp, name='tests/sampletest/outputs/submit', method='jb')
    output = get_output(path='tests/sampletest/outputs/submit.jbl')
    assert hasattr(temp, 'sample')
示例#4
0
# +
import datasist.project as dp
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

#retrieve data from the processed folder
data = dp.get_data("train_proc.csv", method='csv')
label = dp.get_data("train_labels.csv", method='csv')

#base model with random forest
rf = RandomForestRegressor(n_estimators=10, random_state=2)
score = cross_val_score(estimator=rf,
                        X=data,
                        y=label.Rating,
                        cv=5,
                        scoring="neg_mean_squared_error",
                        n_jobs=-1)
score = -1 * np.mean(score)
print("RMSE is {}".format(score))

#save the model
dp.save_model(rf, name='rf_model_n10')

# save the result
result = {"rmse_rf_model_n10": score}
dp.save_outputs(result, name='rmse_rf_model_n10')

# -