Exemplo n.º 1
0
def getting_single_predictions_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train, model_names=model_name)

    file_name = ml_predictor.save(str(random.random()))

    saved_ml_pipeline = load_ml_model(file_name)
    # if model_name == 'DeepLearningClassifier':
    #     from auto_ml.utils_models import load_keras_model

    #     saved_ml_pipeline = load_keras_model(file_name)
    # else:
    #     with open(file_name, 'rb') as read_file:
    #         saved_ml_pipeline = dill.load(read_file)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                   predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.215
    if model_name == 'DeepLearningClassifier':
        lower_bound = -0.245

    assert lower_bound < first_score < -0.17

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 15

    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)
    print('df_titanic_test_dictionaries')
    print(df_titanic_test_dictionaries)
    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                    predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert lower_bound < second_score < -0.17
Exemplo n.º 2
0
    #         writer.writerow(row)

else:
    # load short dataset
    with open('numerai_datasets_early_aug/numerai_short.csv', 'rU') as input_file:
        training_rows = csv.DictReader(input_file)

        training_data = []
        testing_data = []

        for row in training_rows:
            if random.random() > 0.8:
                testing_data.append(row)
            else:
                training_data.append(row)


ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions={'target': 'output'})

# split out out output column so we have a proper X, y dataset
X_test, y_test = utils.split_output(testing_data, 'target')
for idx, pred in enumerate(y_test):
    y_test[idx] = int(pred)

# ml_predictor.train(training_data, optimize_entire_pipeline=True, optimize_final_model=True)
ml_predictor.train(training_data, X_test=X_test, y_test=y_test)


# ml_predictor.predict_proba(X_test)
print(ml_predictor.score(X_test, y_test))
Exemplo n.º 3
0
class deepl(object):
    def __init__(self):
        self.train_df = pd.read_csv("..\\mlweb\\input\\bs140513_032310.csv")
        # self.data = pd.read_csv("..\\input\\bsNET140513_032310.csv")
        self.ml_predictor = Predictor(type_of_estimator='regressor',
                                      column_descriptions=column_descriptions)
        self.model = Sequential()

    def handle_non_numerical_data(self):
        columns = self.train_df.columns.values
        for column in columns:
            text_digit_vals = {}

            def convert_to_int(val):
                return text_digit_vals[val]

            if self.train_df[column].dtype != np.int64 and self.train_df[
                    column].dtype != np.float64:
                column_contents = self.train_df[column].values.tolist()
                unique_elements = set(column_contents)
                x = 0
                for unique in unique_elements:
                    if unique not in text_digit_vals:
                        text_digit_vals[unique] = x
                        x += 1

                self.train_df[column] = list(
                    map(convert_to_int, self.train_df[column]))

    def getX_Y(self):
        return train_test_split(self.train_df, test_size=0.2, shuffle=True)

    def get_train_X_Y(self):
        train_X = self.train_df.drop(columns=['fraud'])
        train_Y = self.train_df[['fraud']]
        print(train_X.head())
        print(train_Y.head())
        return train_X, train_Y

    def create_dl_model(self):
        train_X, train_Y = self.get_train_X_Y()
        #get number of columns in training data
        n_cols = train_X.shape[1]
        #add model layers
        self.model.add(Dense(10, activation='relu', input_shape=(n_cols, )))
        self.model.add(Dense(10, activation='relu'))
        self.model.add(Dense(1))
        self.model.compile(optimizer='adam', loss='mean_squared_error')

    def get_dl_model(self):
        return self.model

    def train_dl_model(self):
        train_X, train_Y = self.get_train_X_Y()
        #set early stopping monitor so the model stops training when it won't improve anymore
        early_stopping_monitor = EarlyStopping(patience=3)
        #train model
        self.model.fit(train_X,
                       train_Y,
                       validation_split=0.2,
                       epochs=30,
                       callbacks=[early_stopping_monitor])

    def sav_dl_model(self):
        self.model.save_weights(
            '..\\mlweb\\trained_pipeline\\deepl\\deep_learning.h5')

    def get_features(self):
        return list(self.train_df)

    def send_tojson(self, list):
        features = {}
        list = self.get_features()
        for x in range(len(list)):
            features["feature" + str(x)] = list[x]
        with open(jsonpath, 'w') as outfile:
            json.dump(features, outfile)

    def learn_model(self):
        df_train, df_test = self.getX_Y()
        # self.ml_predictor.train(df_train, model_names='DeepLearningRegressor')
        self.ml_predictor.train(df_train,
                                feature_learning=True,
                                fl_data=df_test,
                                model_names='DeepLearningRegressor')
        self.ml_predictor.score(df_test, df_test.fraud)

    def sav_model(self):
        self.ml_predictor.save()


# dmodel = deepl()
# dmodel.send_tojson(dmodel.get_features())
# # dmodel.getX_Y()
# dmodel.learn_model()
# dmodel.sav_model()

#2
# dmodel = deepl()
# dmodel.handle_non_numerical_data()
# dmodel.create_dl_model()
# dmodel.train_dl_model()
# dmodel.sav_dl_model()

# print(dmodel.get_features())

# SITE_ROOT = os.path.realpath(os.path.dirname(__file__))
# pipline_path = os.path.join(SITE_ROOT, "trained_pipeline/deepl", "deepLearning.h5")
# print(pipline_path)
    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    model_names = [
        'LGBMRegressor',
        "AdaBoostRegressor",
        "XGBRegressor",
        "ExtraTreesRegressor",
        "RANSACRegressor",
    ]
    model_names = [
        "GradientBoostingRegressor", "DeepLearningRegressor",
        "RandomForestRegressor", "SGDRegressor", "PassiveAggressiveRegressor"
    ]
    #     ml_predictor.train(df_train , model_names = model_names )
    ml_predictor.train(df_train)

    # Score the model on test data
    test_score = ml_predictor.score(df_test, df_test.y)

    # auto_ml is specifically tuned for running in production
    # It can get predictions on an individual row (passed in as a dictionary)
    # A single prediction like this takes ~1 millisecond
    # Here we will demonstrate saving the trained model, and loading it again
    file_name = ml_predictor.save(file_name="d:/automl.saved")
    test_score = ml_predictor.score(df_test, df_test.y)
    print(test_score)

    trained_model = load_ml_model(file_name)

    # .predict and .predict_proba take in either:
Exemplo n.º 5
0
    df_test_middle = df_test_middle.dropna()

    df_test = df_test_middle.drop(columns='daysOnMarket')
    df_test_label = df_test_middle['daysOnMarket']

    value_list = []
    for i in range(len(df_train.columns)):
        value_list.append('categorical')

    column_description1 = {
        key: value
        for key in df_train.columns for value in value_list
        if df_train[key].dtype == 'object'
    }
    column_description2 = {
        'daysOnMarket': 'output',
        'buildingTypeId': 'categorical',
    }

    print(column_description1)
    column_descriptions = dict(column_description1, **column_description2)

    ml_predictor = Predictor(type_of_estimator='Regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_train, model_names='DeepLearningRegressor')

    # ml_predictor.score(df_test)
    x = ml_predictor.predict(df_test)
    print(mean_absolute_error(df_test_label, x))
Exemplo n.º 6
0
predictions = pd.DataFrame()
for classname in target:
    print('****** start to train ')
    b = set(target) - set([classname])
    cols = list(set(ml_train.columns.values) - b)
    test_cols = list(set(ml_train.columns.values) - set(target))
    print(classname)
    column_descriptions = {
        classname: 'output',
    }

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)
    ml_predictor.train(train_ml[cols],
                       model_names=['LGBMRegressor'],
                       feature_learning=True,
                       fl_data=sam,
                       verbose=False)
    from auto_ml.utils_models import load_ml_model
    file_name = ml_predictor.save()
    trained_model = load_ml_model(file_name)
    predictions[classname] = trained_model.predict(ml_test[test_cols])
    score[classname] = trained_model.predict(test_ml[test_cols])
    print('****** over to train ')

# In[31]:
mm = []
for class_name in target:
    print(
        np.mean(
            np.power(
Exemplo n.º 7
0
def test_predict_intervals_takes_in_custom_intervals():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    # df_boston_train = pd.concat([df_boston_train, df_boston_train, df_boston_train])

    ml_predictor.train(df_boston_train, predict_intervals=[0.4, 0.6])

    custom_intervals = ml_predictor.predict_intervals(df_boston_test,
                                                      return_type='list')

    assert isinstance(custom_intervals, list)

    singles = df_boston_test.head().to_dict('records')

    acceptable_keys = set(['prediction', 'interval_0.4', 'interval_0.6'])
    for row in singles:
        result = ml_predictor.predict_intervals(row)
        assert isinstance(result, dict)
        assert 'prediction' in result
        assert 'interval_0.4' in result
        assert 'interval_0.6' in result
        for key in result.keys():
            assert key in acceptable_keys

    for row in singles:
        result = ml_predictor.predict_intervals(row, return_type='list')
        assert isinstance(result, list)
        assert len(result) == 3

    df_intervals = ml_predictor.predict_intervals(df_boston_test,
                                                  return_type='df')
    assert df_intervals.shape[0] == df_boston_test.shape[0]
    assert isinstance(df_intervals, pd.DataFrame)

    # Now make sure that the interval values are actually different
    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train, predict_intervals=True)

    default_intervals = ml_predictor.predict_intervals(df_boston_test,
                                                       return_type='list')

    # This is a super flaky test, because we've got such a small datasize, and we're trying to get distributions from it
    len_intervals = len(custom_intervals)
    num_failures = 0
    for idx, custom_row in enumerate(custom_intervals):
        default_row = default_intervals[idx]

        if int(custom_row[1]) <= int(default_row[1]):
            num_failures += 1
            print('{} should be higher than {}'.format(custom_row[1],
                                                       default_row[1]))
        if int(custom_row[2]) >= int(default_row[2]):
            print('{} should be lower than {}'.format(custom_row[1],
                                                      default_row[1]))
            num_failures += 1

    assert num_failures < 0.18 * len_intervals
Exemplo n.º 8
0
print("Training set: (" + str(len(df_train.index)) + ")")
print(df_train.head())

print("Testing set:  (" + str(len(df_test.index)) + ")")
print(df_test.head())

try:
    last_output = pd.read_csv('cache_of_regression_output.csv')
    df_test = last_output
except:
    column_descriptions = {'lmp': 'output', 'time_utc': 'ignore'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)
    # ml_predictor.train(df_train, model_names=['DeepLearningRegressor'])
    ml_predictor.train(
        df_train)  # just use gradient-boosted regressor instead of tensorflow

    ml_predictor.score(df_test, df_test.lmp)

    predictions = ml_predictor.predict(df_test)
    df_test['PredictedLMP'] = predictions

    df_test.to_csv('cache_of_regression_output.csv',
                   columns=['time_utc', 'lmp', 'PredictedLMP'])

# trying to follow this here: https://www.dataquest.io/blog/tutorial-time-series-analysis-with-pandas/
import seaborn as sns
import matplotlib.pyplot as plt
# Use seaborn style defaults and set the default figure size

# sns.set(rc={'figure.figsize':(11, 4)})
Exemplo n.º 9
0
        'price': 'output',
        'buildingTypeId': 'categorical',
        "tradeTypeId": 'categorical',
        # 'bedrooms': 'categorical',
        # 'year': 'categorical',
        # 'month': 'categorical',

    }

    print(column_description1)
    # 合并两个字典
    column_descriptions = dict(column_description1, **column_description2)
    # 定义预测
    ml_predictor = Predictor(type_of_estimator='Regressor', column_descriptions=column_descriptions)
    # 训练,model_name指定算法
    ml_predictor.train(df_train,model_names='XGBRegressor') #  XGBRegressor DeepLearningRegressor
    # 保存模型
    ml_predictor.save('auto_ml_new.h5')


    # 预测预测数据
    x = ml_predictor.predict(df_test)
    x_dataframe = pd.DataFrame(x,columns=['predictions'])
    merge_data = pd.concat((origin_data,x_dataframe),axis=1)
    merge_data_df = pd.DataFrame(merge_data)
    merge_data_df.to_csv('./merge_data_bak/merge_data_predictions_auto_ml.csv',index=False)
    print(x_dataframe.describe())
    print(df_test_label.describe())

    print(mean_absolute_error(df_test_label,x))
    compute_ratio(merge_data_df)
Exemplo n.º 10
0
        'daysOnMarket': 'output',
        'buildingTypeId': 'categorical',
        "tradeTypeId": 'categorical',
        # 'bedrooms': 'categorical',
        # 'year': 'categorical',
        # 'month': 'categorical',
    }

    print(column_description1)
    # 合并两个字典
    column_descriptions = dict(column_description1, **column_description2)

    ml_predictor = Predictor(type_of_estimator='Regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_train, model_names='XGBRegressor')

    # ml_predictor.score(df_test)
    x = ml_predictor.predict(df_test)

    # log还原
    # x = np.expm1(x)

    x_dataframe = pd.DataFrame(x, columns=['predictions'])
    merge_data = pd.concat((origin_data, x_dataframe), axis=1)
    merge_data_df = pd.DataFrame(merge_data)
    merge_data_df.to_csv(
        './merge_data_bak/merge_data_auto_ml_test_listing_17.csv', index=False)
    print(x_dataframe.describe())
    print(df_test_label.describe())
Exemplo n.º 11
0
# print type(pd)

# print "Testing with dict objects"

# ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions={'sentence':'text','target': 'output'})

# # X_test, y_test = utils.split_output_dataframe(testing_data, output_column_name='target')

# # ml_predictor.train(training_data, optimize_entire_pipeline=True, optimize_final_model=True)
# ml_predictor.train(training_data)

# # ml_predictor.predict_proba(X_test)
# print(ml_predictor.score(X_test, y_test))


print "Testing with dataframes"
training_data=pd.DataFrame.from_dict(training_data)
testing_data=pd.DataFrame.from_dict(testing_data)

ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions={'sentence':'text','target': 'output'})

# X_test, y_test = utils.split_output_dataframe(testing_data, output_column_name='target')

# ml_predictor.train(training_data, optimize_entire_pipeline=True, optimize_final_model=True)
ml_predictor.train(training_data)

# ml_predictor.predict_proba(X_test)
y_test = testing_data['target']
print(ml_predictor.score(testing_data, y_test))

def train_old_model():
    print('auto_ml_version')
    print(auto_ml_version)
    if auto_ml_version > '2.1.6':
        raise(TypeError)

    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'sex': 'categorical'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train)

    file_name = ml_predictor.save('trained_ml_model_v_2_1_6.dill')

    saved_ml_pipeline = load_ml_model(file_name)

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.16

    assert -0.16 < first_score < -0.135

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 15


    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    # Make sure our score is good, but not unreasonably good

    assert -0.16 < second_score < -0.135
Exemplo n.º 13
0
def test_ignores_new_invalid_features():

    # One of the great unintentional features of auto_ml is that you can pass in new features at prediction time, that weren't present at training time, and they're silently ignored!
    # One edge case here is new features that are strange objects (lists, datetimes, intervals, or anything else that we can't process in our default data processing pipeline). Initially, we just ignored them in dict_vectorizer, but we need to ignore them earlier.
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train)

    file_name = ml_predictor.save(str(random.random()))

    saved_ml_pipeline = load_ml_model(file_name)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass

    df_boston_test_dictionaries = df_boston_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_boston_test_dictionaries:
        if random.random() > 0.9:
            row['totally_new_feature'] = datetime.datetime.now()
            row['really_strange_feature'] = random.random
            row['we_should_really_ignore_this'] = Predictor
            row['pretty_vanilla_ignored_field'] = 8
            row['potentially_confusing_things_here'] = float('nan')
            row['potentially_confusing_things_again'] = float('inf')
            row['this_is_a_list'] = [1, 2, 3, 4, 5]
        predictions.append(saved_ml_pipeline.predict(row))

    print('predictions')
    print(predictions)
    print('predictions[0]')
    print(predictions[0])
    print('type(predictions)')
    print(type(predictions))
    first_score = utils.calculate_rmse(df_boston_test.MEDV, predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -3.0
    assert lower_bound < first_score < -2.7

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_boston_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_boston_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.1 < duration.total_seconds() / 1.0 < 15

    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_boston_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict(row))

    second_score = utils.calculate_rmse(df_boston_test.MEDV, predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert lower_bound < second_score < -2.7
def test_user_input_func_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    def age_bucketing(data):

        def define_buckets(age):
            if age <= 17:
                return 'youth'
            elif age <= 40:
                return 'adult'
            elif age <= 60:
                return 'adult2'
            else:
                return 'over_60'

        if isinstance(data, dict):
            data['age_bucket'] = define_buckets(data['age'])
        else:
            data['age_bucket'] = data.age.apply(define_buckets)

        return data

    column_descriptions = {
        'survived': 'output'
        , 'sex': 'categorical'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
        , 'age_bucket': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train, user_input_func=age_bucketing)


    file_name = ml_predictor.save(str(random.random()))

    saved_ml_pipeline = load_ml_model(file_name)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass


    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.16

    assert -0.16 < first_score < -0.135

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 15


    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)
    print('df_titanic_test_dictionaries')
    print(df_titanic_test_dictionaries)
    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert -0.16 < second_score < -0.135
Exemplo n.º 15
0
def getting_single_predictions_regressor_test():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {
        'MEDV': 'output'
        , 'CHAS': 'categorical'
    }

    ensemble_config = [
        {
            'model_name': 'LGBMRegressor'
        }
        , {
            'model_name': 'RandomForestRegressor'
        }

    ]

    ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)

    # NOTE: this is bad practice to pass in our same training set as our fl_data set, but we don't have enough data to do it any other way
    ml_predictor.train(df_boston_train, ensemble_config=ensemble_config)

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    assert -3.5 < test_score < -2.8


    file_name = ml_predictor.save(str(random.random()))

    # from auto_ml.utils_models import load_keras_model

    # saved_ml_pipeline = load_keras_model(file_name)

    saved_ml_pipeline = load_ml_model(file_name)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass



    df_boston_test_dictionaries = df_boston_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_boston_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict(row))

    first_score = utils.calculate_rmse(df_boston_test.MEDV, predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -3.5

    assert lower_bound < first_score < -2.8

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_boston_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_boston_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() / 1.0 < 60


    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_boston_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict(row))

    second_score = utils.calculate_rmse(df_boston_test.MEDV, predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert lower_bound < second_score < -2.8
    def test_getting_single_predictions_nlp_date_multilabel_classification():

        np.random.seed(0)

        df_twitter_train, df_twitter_test = utils.get_twitter_sentiment_multilabel_classification_dataset()

        column_descriptions = {
            'airline_sentiment': 'output'
            , 'airline': 'categorical'
            , 'text': 'nlp'
            , 'tweet_location': 'categorical'
            , 'user_timezone': 'categorical'
            , 'tweet_created': 'date'
        }

        ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)
        ml_predictor.train(df_twitter_train)

        file_name = ml_predictor.save(str(random.random()))

        saved_ml_pipeline = load_ml_model(file_name)

        os.remove(file_name)
        try:
            keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
            os.remove(keras_file_name)
        except:
            pass

        df_twitter_test_dictionaries = df_twitter_test.to_dict('records')

        # 1. make sure the accuracy is the same

        predictions = []
        for row in df_twitter_test_dictionaries:
            predictions.append(saved_ml_pipeline.predict(row))

        print('predictions')
        print(predictions)

        first_score = accuracy_score(df_twitter_test.airline_sentiment, predictions)
        print('first_score')
        print(first_score)
        # Make sure our score is good, but not unreasonably good
        lower_bound = 0.73
        assert lower_bound < first_score < 0.79

        # 2. make sure the speed is reasonable (do it a few extra times)
        data_length = len(df_twitter_test_dictionaries)
        start_time = datetime.datetime.now()
        for idx in range(1000):
            row_num = idx % data_length
            saved_ml_pipeline.predict(df_twitter_test_dictionaries[row_num])
        end_time = datetime.datetime.now()
        duration = end_time - start_time

        print('duration.total_seconds()')
        print(duration.total_seconds())

        # It's very difficult to set a benchmark for speed that will work across all machines.
        # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
        # That's about 1 millisecond per prediction
        # Assuming we might be running on a test box that's pretty weak, multiply by 3
        # Also make sure we're not running unreasonably quickly
        assert 0.2 < duration.total_seconds() < 15


        # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

        predictions = []
        for row in df_twitter_test_dictionaries:
            predictions.append(saved_ml_pipeline.predict(row))

        print('predictions')
        print(predictions)
        print('df_twitter_test_dictionaries')
        print(df_twitter_test_dictionaries)
        second_score = accuracy_score(df_twitter_test.airline_sentiment, predictions)
        print('second_score')
        print(second_score)
        # Make sure our score is good, but not unreasonably good
        assert lower_bound < second_score < 0.79
Exemplo n.º 17
0
class prediction(model):
    @property
    def estimator(self):
        raise NotImplementedError()

    def build(self, meta_data, verbose, max_time_mins, max_eval_time_mins,
              config_dict, warm_start, scoring):
        if self.estimator == 'TPOTRegressor':
            if globals_file.run_experiment_warm_start == False or warm_start == False:
                self.__model = regressor(verbose, max_time_mins,
                                         max_eval_time_mins, config_dict,
                                         warm_start, scoring)
            else:
                self.__model = globals_file.run_experiment_model
        elif self.estimator == 'TPOTClassifier':
            if globals_file.run_experiment_warm_start == False or warm_start == False:
                self.__model = classifier(verbose, max_time_mins,
                                          max_eval_time_mins, config_dict,
                                          warm_start, scoring)
            else:
                self.__model = globals_file.run_experiment_model
        else:
            self.__model = Predictor(type_of_estimator=self.estimator,
                                     column_descriptions=meta_data)
        self.__label = self.meta_data_key(meta_data, "output")

    def train_and_score(self, data, labels, verbose, interpret, warm_start,
                        export_pipeline):
        ##Train and score
        if self.estimator == 'TPOTRegressor' or self.estimator == 'TPOTClassifier':
            self.__model = train_model(data, self.__label, self.__model,
                                       self.estimator, interpret, warm_start,
                                       export_pipeline)
        else:
            # create training and test data
            training_data, test_data = train_test_split(data, test_size=0.2)

            # train the model
            if verbose == False:
                warnings.filterwarnings('ignore')
                text_trap = io.StringIO()
                with redirect_stdout(text_trap):
                    self.__model.train(training_data,
                                       verbose=False,
                                       ml_for_analytics=False)
            else:
                warnings.filterwarnings('ignore')
                self.__model.train(training_data,
                                   verbose=True,
                                   ml_for_analytics=False)

        # score the model
            if verbose == False:
                self.__model.score(test_data,
                                   test_data[self.__label],
                                   verbose=0)
            else:
                self.__model.score(test_data,
                                   test_data[self.__label],
                                   verbose=1)

    def interpret(self):
        pass

    def python_object(self):
        return self.__model