Exemplo n.º 1
0
def en():
    df = get_dataset('friedman1')
    X, y = get_xy(df)

    level = 2
    l1_ratio = 0.3
    reg_type = sg.RegularizationType_ElasticNet
    grid_config = model.GridConfig(type=sg.GridType_ModLinear,
                                   T=0.0,
                                   level=level)
    estimator, Phi, max_lambda, grid = get_estimator(grid_config, X, y,
                                                     l1_ratio, reg_type)
    df = calculate_weight_path(estimator,
                               X,
                               y,
                               max_lambda,
                               num_lambdas=50,
                               verbose=1)

    df.to_csv('path-f1-l2-en.csv', index=True)
Exemplo n.º 2
0
def grp_heat():
    df = get_dataset('friedman1')
    X, y = get_xy(df)

    level = 3
    l1_ratio = 1.0
    reg_type = sg.RegularizationType_GroupLasso
    grid_config = model.GridConfig(type=sg.GridType_ModLinear,
                                   T=0.0,
                                   level=level)
    estimator, Phi, max_lambda, grid = get_estimator(grid_config, X, y,
                                                     l1_ratio, reg_type)
    df = calculate_weight_path(estimator,
                               X,
                               y,
                               max_lambda,
                               num_lambdas=10,
                               verbose=1)

    df.to_csv('path-f1-l3-grp.csv', index=True)
Exemplo n.º 3
0
def main(level, num, T, dataset, _log):
    sg.omp_set_num_threads(4)
    if 'optdigits' in dataset:
        if 'sub' in dataset:
            df_train = get_dataset('optdigits_sub_train')
            df_test = get_dataset('optdigits_sub_test')
        else:
            df_train = get_dataset('optdigits_train')
            df_test = get_dataset('optdigits_test')
        X_train, y_train = get_xy(df_train)
        X_test, y_test = get_xy(df_test)
    else:
        df = get_dataset(dataset)
        train, test = split(df)
        X_train, y_train = get_xy(train)
        X_test, y_test = get_xy(test)
    _log.debug("Read file.")

    session = model.make_session()
    _log.debug("Created SQL session.")

    grid_config = model.GridConfig(type=6, level=level, T=T)
    adaptivity_config = model.AdaptivityConfig(num_refinements=0,
                                               no_points=3,
                                               treshold=0.0,
                                               percent=0.0)
    epsilon = np.sqrt(np.finfo(np.float).eps)
    solver_type = sg.SLESolverType_CG
    solver_config = model.SolverConfig(type=solver_type,
                                       max_iterations=70,
                                       epsilon=epsilon,
                                       threshold=10e-5)
    final_solver_config = model.SolverConfig(type=solver_type,
                                             max_iterations=150,
                                             epsilon=epsilon,
                                             threshold=10e-6)

    # solver_type = sg.SLESolverType_FISTA
    # solver_config = model.SolverConfig(type=solver_type, max_iterations=200, epsilon=0.0, threshold=10e-5)
    # final_solver_config = model.SolverConfig(type=solver_type, max_iterations=400, epsilon=0.0, threshold=10e-8)
    regularization_type = sg.RegularizationType_Identity
    #regularization_type = sg.RegularizationType_ElasticNet
    regularization_config = model.RegularizationConfig(
        type=regularization_type, l1_ratio=0.05, exponent_base=1.0)
    experiment = model.Experiment(dataset=dataset)

    session.add(grid_config)
    session.add(adaptivity_config)
    session.add(solver_config)
    session.add(final_solver_config)
    session.add(experiment)

    _log.debug("Created configurations.")

    interactions = None
    #interactions = mnist_interactions(l2_distance, 2*np.sqrt(2), level)
    grid_config.interactions = str(interactions)
    if 'optdigits' in dataset:
        estimator = SGClassificationLearner(grid_config, regularization_config,
                                            solver_config, final_solver_config,
                                            adaptivity_config, interactions)
    else:
        estimator = SGRegressionLearner(grid_config, regularization_config,
                                        solver_config, final_solver_config,
                                        adaptivity_config, interactions)
    cv = get_cv(dataset, y_train)
    experiment.cv = str(cv)
    #lambda_grid = np.logspace(-6, -1, num=num)
    lambda_grid = [0.1]
    parameters = {'regularization_config__lambda_reg': lambda_grid}
    grid_search = GridSearch(estimator, parameters, cv)
    _log.info("Start learning.")
    grid_search.fit(X_train, y_train)
    _log.info("Finished learning.")

    first = True
    for score in sorted(grid_search.grid_scores_,
                        key=itemgetter(1),
                        reverse=True):
        if 'optdigits' in dataset:
            error_mult = 1
        else:
            error_mult = -1
        validation_mse = error_mult * score.mean_validation_score
        validation_std = np.std(np.abs(score.cv_validation_scores))
        validation_grid_sizes = score.cv_grid_sizes
        params = estimator.get_params()
        params.update(score.parameters)
        regularization_config = model.RegularizationConfig(
            type=params['regularization_config__type'],
            lambda_reg=params['regularization_config__lambda_reg'],
            exponent_base=params['regularization_config__exponent_base'],
            l1_ratio=params['regularization_config__l1_ratio'])
        session.add(regularization_config)
        result = model.Result(
            validation_mse=validation_mse,
            validation_std=validation_std,
            grid_config=grid_config,
            adaptivity_config=adaptivity_config,
            solver_config=solver_config,
            final_solver_config=final_solver_config,
            regularization_config=regularization_config,
            experiment=experiment,
            validation_grid_points_mean=np.mean(validation_grid_sizes),
            validation_grid_points_stdev=np.std(validation_grid_sizes))
        # Retrain best learner and validate test set:
        if first:
            first = False
            estimator.set_params(**params)
            estimator.fit(X_train, y_train)
            result.train_grid_points = estimator.get_grid_size()
            result.train_mse = error_mult * estimator.score(X_train, y_train)
            result.test_mse = error_mult * estimator.score(X_test, y_test)
            if 'optdigits' not in dataset:
                result.train_r2 = get_r_squared(estimator, X_train, y_train)
                result.test_r2 = get_r_squared(estimator, X_test, y_test)

        session.add(result)

    _log.debug("Pushing to database.")
    session.commit()

    _log.info("Finished experiment.")
Exemplo n.º 4
0
def main(level, num, num_init, T, dataset, _log):
    sg.omp_set_num_threads(4)
    if 'optdigits' in dataset:
        df_train = get_dataset('optdigits_train')
        df_test = get_dataset('optdigits_test')
        X_train, y_train = get_xy(df_train)
        X_test, y_test = get_xy(df_test)
    else:
        df = get_dataset(dataset)
        train, test = split(df)
        X_train, y_train = get_xy(train)
        X_test, y_test = get_xy(test)
    _log.debug("Read file.")

    session = model.make_session()
    _log.debug("Created SQL session.")

    grid_config = model.GridConfig(type=6, level=level, T=T)
    adaptivity_config = model.AdaptivityConfig(num_refinements=0, no_points=0, treshold=0.0, percent=0.0)
    epsilon = np.sqrt(np.finfo(np.float).eps)
    solver_type = sg.SLESolverType_CG
    solver_config = model.SolverConfig(type=solver_type, max_iterations=70, epsilon=epsilon, threshold=10e-5)
    final_solver_config = model.SolverConfig(type=solver_type, max_iterations=250, epsilon=epsilon, threshold=10e-6)
    # solver_type = sg.SLESolverType_FISTA
    # solver_config = model.SolverConfig(type=solver_type, max_iterations=200, epsilon=0.0, threshold=10e-5)
    # final_solver_config = model.SolverConfig(type=solver_type, max_iterations=400, epsilon=0.0, threshold=10e-6)
    regularization_type = sg.RegularizationType_Identity
    regularization_config = model.RegularizationConfig(type=regularization_type, l1_ratio=1.0, exponent_base=1.0)
    experiment = model.Experiment(dataset=dataset)

    _log.debug("Created configurations.")

    interactions = [[0,1]] + [[i] for i in range(0,11)]
    #interactions = None
    grid_config.interactions = str(interactions)
    estimator = SGRegressionLearner(grid_config, regularization_config, solver_config,
                                    final_solver_config, adaptivity_config, interactions)

    cv = get_cv(dataset, X_train)
    experiment.cv = str(cv)
    params = [Hyp_param('regularization_config__lambda_reg', 0.0, 0.3)]
              #Hyp_param('regularization_config__exponent_base', 3.0, 7.0)]

    bayes_search = BayesOptReg(estimator, cv, X_train, y_train,
                               params, num, n_init_samples=num_init)

    _log.info("Start learning.")
    validation_score, best_params, cv_grid_sizes = bayes_search.optimize()
    _log.info("Finished learning.")
    _log.info("Best CV-MSE was {}. With params {}".format(validation_score, best_params))
    validation_mse = validation_score
    validation_grid_sizes = cv_grid_sizes

    #Retrain estimator with best parameters
    params = estimator.get_params()
    params.update(best_params)
    estimator.set_params(**params)

    estimator.fit(X_train, y_train)

    result = model.Result(validation_mse=validation_mse,
                            grid_config=grid_config,
                            adaptivity_config=adaptivity_config,
                            solver_config=solver_config,
                            final_solver_config=final_solver_config,
                            regularization_config=regularization_config,
                            experiment=experiment,
                            validation_grid_points_mean=np.mean(validation_grid_sizes),
                            validation_grid_points_stdev=np.std(validation_grid_sizes))

    result.train_grid_points = estimator.get_grid_size()
    result.train_mse = -estimator.score(X_train, y_train)
    result.train_r2 = get_r_squared(estimator, X_train, y_train)
    result.test_mse = -estimator.score(X_test, y_test)
    result.test_r2 = get_r_squared(estimator, X_test, y_test)

    regularization_config = model.RegularizationConfig(
                            type=params['regularization_config__type'],
                            lambda_reg=params['regularization_config__lambda_reg'],
                            exponent_base=params['regularization_config__exponent_base'])

    session.add(grid_config)
    session.add(adaptivity_config)
    session.add(solver_config)
    session.add(final_solver_config)
    session.add(experiment)
    session.add(regularization_config)
    session.add(result)

    _log.debug("Pushing to database.")
    session.commit()

    _log.info("Finished experiment.")