Exemplo n.º 1
0
def run_fractional_stratification_model(
        estimator=None,
        data_path=None,
        config_path=None,
        num_iter=1,
        seed=None):
    '''
    Fractional Stratification Model Analysis
    '''
    if estimator is None or data_path is None or config_path is None:
        raise ValueError('Need Estimator, Data path and Config Path as arguments !')
    data, config_map = read_input(data_path, config_path)
    data = data_preprocessor(data, config_map, 5, 'string')
    training_map = {}
    for _ in range(0, num_iter):
        training_data, validation_data, testing_data = fractional_stratification(
                data, data.columns, 4, [0.6, 0.2, 0.2], config_map, seed)
        X_train, y_train = split_data(training_data, config_map)
        X_validation, y_validation = split_data(validation_data, config_map)
        X_test, y_test = split_data(testing_data, config_map)
        X_scaler, y_scaler, model, training_rmse = training(
                estimator, X_train, y_train, config_map)
        validation_rmse = calculate_rmse(
                X_validation,
                y_validation,
                X_scaler,
                y_scaler,
                model,
                config_map)
        testing_rmse = calculate_rmse(
                X_test, y_test, X_scaler, y_scaler, model, config_map)
        if training_rmse < validation_rmse:
            model_properties = {}
            model_properties['estimator'] = estimator
            model_properties['config_map'] = config_map
            model_properties['X_train'] = X_train
            model_properties['y_train'] = y_train
            model_properties['X_validation'] = X_validation
            model_properties['y_validation'] = y_validation
            model_properties['X_test'] = X_test
            model_properties['y_test'] = y_test
            model_properties['X_scaler'] = X_scaler
            model_properties['y_scaler'] = y_scaler
            model_properties['model'] = model
            model_properties['training_rmse'] = training_rmse
            model_properties['validation_rmse'] = validation_rmse
            model_properties['testing_rmse'] = testing_rmse
            training_map[validation_rmse] = model_properties
    if(len(training_map) > 0):
        best_model_properties = training_map[min(training_map)]
        print('Best Model train error: {} | Best Model validation error: {} | Best Model test error: {}'.format(
            round(best_model_properties['training_rmse'], 7),
            round(best_model_properties['validation_rmse'], 7),
            round(best_model_properties['testing_rmse'], 7)))
        return best_model_properties
    return None
Exemplo n.º 2
0
def run_model_tuning(model_properties=None):
    '''
    Tunes the Model based on Training and Validation error
    '''
    if model_properties is None:
        raise ValueError('Need Model Properties as argument !')
    alphas = np.logspace(-10, 1, 400)
    config_map = model_properties['config_map']
    X_train = model_properties['X_train']
    y_train = model_properties['y_train']
    X_validation = model_properties['X_validation']
    y_validation = model_properties['y_validation']
    X_test = model_properties['X_test']
    y_test = model_properties['y_test']
    tuning_map = {}
    for alpha in alphas:
        estimator = Ridge(alpha=alpha)
        X_scaler, y_scaler, model, training_rmse = training(
                estimator, X_train, y_train, config_map)
        validation_rmse = calculate_rmse(
                X_validation,
                y_validation,
                X_scaler,
                y_scaler,
                model,
                config_map)
        testing_rmse = calculate_rmse(
                X_test, y_test, X_scaler, y_scaler, model, config_map)
        tuning_properties = {}
        tuning_properties['estimator'] = estimator
        tuning_properties['config_map'] = config_map
        tuning_properties['X_scaler'] = X_scaler
        tuning_properties['y_scaler'] = y_scaler
        tuning_properties['model'] = model
        tuning_properties['training_rmse'] = training_rmse
        tuning_properties['validation_rmse'] = validation_rmse
        tuning_properties['testing_rmse'] = testing_rmse
        tuning_map[validation_rmse] = tuning_properties
    if(len(tuning_map) > 0):
        best_model_properties = tuning_map[min(tuning_map)]
        best_model_properties['config_map'] = config_map
        best_model_properties['X_train'] = X_train
        best_model_properties['y_train'] = y_train
        best_model_properties['X_validation'] = X_validation
        best_model_properties['y_validation'] = y_validation
        best_model_properties['X_test'] = X_test
        best_model_properties['y_test'] = y_test
        print('Best Model train error: {} | Best Model validation error: {} | Best Model test error: {}'.format(
            round(best_model_properties['training_rmse'], 7),
            round(best_model_properties['validation_rmse'], 7),
            round(best_model_properties['testing_rmse'], 7)))
        return best_model_properties
    return None
Exemplo n.º 3
0
def run_train_test_model(
        estimator=None,
        data_path=None,
        config_path=None,
        num_iter=1,
        seed=None):
    '''
    Train/test Model Analysis
    '''
    if estimator is None or data_path is None or config_path is None:
        raise ValueError('Need Estimator, Data path and Config Path as arguments !')
    data, config_map = read_input(data_path, config_path)
    data = data_preprocessor(data, config_map, 5, 'string')
    training_map = {}
    for _ in range(0, num_iter):
        X_train, y_train, X_test, y_test = split_train_test_data(
                data, config_map, 0.3, seed)
        X_scaler, y_scaler, model, training_rmse = training(
                estimator, X_train, y_train, config_map)
        testing_rmse = calculate_rmse(
                X_test, y_test, X_scaler, y_scaler, model, config_map)
        if training_rmse < testing_rmse:
            model_properties = {}
            model_properties['estimator'] = estimator
            model_properties['config_map'] = config_map
            model_properties['X_train'] = X_train
            model_properties['y_train'] = y_train
            model_properties['X_test'] = X_test
            model_properties['y_test'] = y_test
            model_properties['X_scaler'] = X_scaler
            model_properties['y_scaler'] = y_scaler
            model_properties['model'] = model
            model_properties['training_rmse'] = training_rmse
            model_properties['testing_rmse'] = testing_rmse
            training_map[testing_rmse] = model_properties
    if(len(training_map) > 0):
        best_model_properties = training_map[min(training_map)]
        print('Best Model train error: {} | Best Model test error: {}'.format(
            round(best_model_properties['training_rmse'], 7),
            round(best_model_properties['testing_rmse'], 7)))
        return best_model_properties
    return None
Exemplo n.º 4
0
 def test_testing_error(self):
     testing_rmse = calculate_rmse(self.X_test, self.y_test, self.X_scaler,
                                   self.y_scaler, self.model,
                                   self.config_map)
     self.assertAlmostEqual(testing_rmse, 0.0007581966925813912)
Exemplo n.º 5
0
 def test_validation_error(self):
     validation_rmse = calculate_rmse(self.X_validation, self.y_validation,
                                      self.X_scaler, self.y_scaler,
                                      self.model, self.config_map)
     self.assertAlmostEqual(validation_rmse, 0.0007615906548161986)