Exemplo n.º 1
0
def search_space():
    # Create search space for optimization
    search_space = {
        "alpha": Real(low=0.001, high=5.0),
        "eta": Real(low=0.001, high=5.0)
    }

    return search_space
Exemplo n.º 2
0
def extract_search_space(flat_search_config):
    """ Find the variable dimensions and convert them to a skopt search space.
    """
    search_space = OrderedDict()
    for k,v in flat_search_config.items():
        # Lists with more than one value are search dimensions
        if isinstance(v, list) and len(v) > 1:
            force_categorical = len(v) > 2

            # Dedupe the list, escaping specials, and sort smallest to largest
            ds = sorted({escape_special(u) for u in v})
            prior = flat_search_config.get(f'{k}__PRIOR', None)
            base = flat_search_config.get(f'{k}__BASE', 10)

            if force_categorical or isinstance(ds[0], str):
                transform = flat_search_config.get(f'{k}__TRANSFORM', 'onehot')
                dim = Categorical(ds, prior=prior, transform=transform, name=k)
            elif isinstance(ds[0], int):
                transform = flat_search_config.get(f'{k}__TRANSFORM', 'normalize')
                dim = Integer(*tuple(ds), prior=prior, transform=transform, base=base, name=k)
            elif isinstance(ds[0], float):
                transform = flat_search_config.get(f'{k}__TRANSFORM', 'normalize')
                dim = Real(*tuple(ds), prior=prior, transform=transform, base=base, name=k)

            search_space[k] = dim
    return search_space
Exemplo n.º 3
0
def load_search_space(search_space):
    """
    Load the search space from the json file

    :param search_space: dictionary of the search space (insertable in a json file)
    :type dict:
    :return: dictionary for the search space (for scikit optimize)
    :rtype: dict
    """
    from skopt.space.space import Real, Categorical, Integer

    ss = dict()
    for key in list(search_space.keys()):
        if search_space[key][0] == 'Real':
            ss[key] = Real(low=search_space[key][1][0],
                           high=search_space[key][1][1],
                           prior=search_space[key][2])
        elif search_space[key][0] == 'Integer':
            ss[key] = Integer(low=search_space[key][1][0],
                              high=search_space[key][1][1],
                              prior=search_space[key][2])
        elif search_space[key][0] == 'Categorical':
            ss[key] = Categorical(categories=search_space[key][1])

    return ss
Exemplo n.º 4
0
def create_skopt_space():
    from skopt.space.space import Real

    return [Real(1e-10, 1, prior="log-uniform"), (0.1, 0.9),], [
        "lr",
        "momentum",
    ]
Exemplo n.º 5
0
    def test_warm_start_detection(self):
        parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
        from sklearn.ensemble import RandomForestClassifier
        clf = RandomForestClassifier(max_depth=2, random_state=0)
        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   n_jobs=1,
                                   max_iters=10,
                                   local_dir="./test-result")
        self.assertFalse(tune_search._can_early_stop())

        from sklearn.tree import DecisionTreeClassifier
        clf = DecisionTreeClassifier(random_state=0)
        tune_search2 = TuneSearchCV(clf,
                                    parameter_grid,
                                    n_jobs=1,
                                    max_iters=10,
                                    local_dir="./test-result")
        self.assertFalse(tune_search2._can_early_stop())

        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression()
        tune_search3 = TuneSearchCV(clf,
                                    parameter_grid,
                                    n_jobs=1,
                                    max_iters=10,
                                    local_dir="./test-result")

        self.assertTrue(tune_search3._can_early_stop())
Exemplo n.º 6
0
    def test_warm_start_detection(self):
        parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
        from sklearn.ensemble import VotingClassifier, RandomForestClassifier
        clf = VotingClassifier(estimators=[(
            "rf", RandomForestClassifier(n_estimators=50, random_state=0))])
        tune_search = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search.early_stop_type,
                         EarlyStopping.NO_EARLY_STOP)

        from sklearn.tree import DecisionTreeClassifier
        clf = DecisionTreeClassifier(random_state=0)
        tune_search2 = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search2.early_stop_type,
                         EarlyStopping.NO_EARLY_STOP)

        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression()
        tune_search3 = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")

        self.assertEqual(tune_search3.early_stop_type,
                         EarlyStopping.NO_EARLY_STOP)

        tune_search4 = TuneSearchCV(
            clf,
            parameter_grid,
            early_stopping=True,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search4.early_stop_type,
                         EarlyStopping.WARM_START_ITER)

        clf = RandomForestClassifier()
        tune_search5 = TuneSearchCV(
            clf,
            parameter_grid,
            early_stopping=True,
            n_jobs=1,
            max_iters=10,
            local_dir="./test-result")
        self.assertEqual(tune_search5.early_stop_type,
                         EarlyStopping.WARM_START_ENSEMBLE)
Exemplo n.º 7
0
    def test_local_mode(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        clf = SGDClassifier()
        parameter_grid = {
            "alpha": Real(1e-4, 1e-1, 1),
            "epsilon": Real(0.01, 0.1)
        }
        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   n_jobs=1,
                                   max_iters=10,
                                   local_dir="./test-result")
        import ray
        with patch.object(ray, "init", wraps=ray.init) as wrapped_init:
            tune_search.fit(x, y)
        self.assertTrue(wrapped_init.call_args[1]["local_mode"])
Exemplo n.º 8
0
    def test_local_dir(self):
        digits = datasets.load_digits()
        x = digits.data
        y = digits.target

        clf = SGDClassifier()
        parameter_grid = {
            "alpha": Real(1e-4, 1e-1, 1),
            "epsilon": Real(0.01, 0.1)
        }

        scheduler = MedianStoppingRule(grace_period=10.0)

        tune_search = TuneSearchCV(clf,
                                   parameter_grid,
                                   early_stopping=scheduler,
                                   max_iters=10,
                                   local_dir="./test-result")
        tune_search.fit(x, y)

        self.assertTrue(len(os.listdir("./test-result")) != 0)
    def map_dim(values):
        if isinstance(values, tuple):  # linear subspace
            low, high, n_steps, value_type = values

            if value_type == 'i':
                return Integer(low, high)
            elif value_type == 'f':
                return Real(low, high)
            else:
                raise ValueError(f'Unknown value type "{value_type}"')
        else:  # exhaustive list of options
            return Categorical(values)
Exemplo n.º 10
0
 def test_warn_reduce_maxiters(self):
     parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
     from sklearn.ensemble import RandomForestClassifier
     clf = RandomForestClassifier(max_depth=2, random_state=0)
     with self.assertWarnsRegex(UserWarning, "max_iters is set"):
         TuneSearchCV(
             clf, parameter_grid, max_iters=10, local_dir="./test-result")
     with self.assertWarnsRegex(UserWarning, "max_iters is set"):
         TuneSearchCV(
             SGDClassifier(),
             parameter_grid,
             max_iters=10,
             local_dir="./test-result")
Exemplo n.º 11
0
def create_skopt_space():
    from skopt.space.space import Real

    return [
        Real(1e-10, 1, prior="log-uniform"),
        (0.1, 0.9),
        (0.1, 0.7),
        (32, 700),
        (32, 256),
    ], [
        "lr",
        "momentum",
        "drop_out",
        "hidden_layer1",
        "hidden_layer2",
    ]
Exemplo n.º 12
0
    def test_warm_start_error(self):
        parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
        from sklearn.ensemble import VotingClassifier, RandomForestClassifier
        clf = VotingClassifier(estimators=[(
            "rf", RandomForestClassifier(n_estimators=50, random_state=0))])
        tune_search = TuneSearchCV(
            clf,
            parameter_grid,
            n_jobs=1,
            early_stopping=False,
            max_iters=10,
            local_dir="./test-result")
        self.assertFalse(tune_search._can_early_stop())
        with self.assertRaises(ValueError):
            tune_search = TuneSearchCV(
                clf,
                parameter_grid,
                n_jobs=1,
                early_stopping=True,
                max_iters=10,
                local_dir="./test-result")

        from sklearn.linear_model import LogisticRegression
        clf = LogisticRegression()
        with self.assertRaises(ValueError):
            parameter_grid = {"max_iter": [1, 2]}
            TuneSearchCV(
                clf,
                parameter_grid,
                early_stopping=True,
                n_jobs=1,
                max_iters=10,
                local_dir="./test-result")

        from sklearn.ensemble import RandomForestClassifier
        clf = RandomForestClassifier()
        with self.assertRaises(ValueError):
            parameter_grid = {"n_estimators": [1, 2]}
            TuneSearchCV(
                clf,
                parameter_grid,
                early_stopping=True,
                n_jobs=1,
                max_iters=10,
                local_dir="./test-result")
Exemplo n.º 13
0
 def test_warm_start_error(self):
     parameter_grid = {"alpha": Real(1e-4, 1e-1, 1)}
     from sklearn.ensemble import RandomForestClassifier
     clf = RandomForestClassifier(max_depth=2, random_state=0)
     tune_search = TuneSearchCV(clf,
                                parameter_grid,
                                n_jobs=1,
                                early_stopping=False,
                                max_iters=10,
                                local_dir="./test-result")
     self.assertFalse(tune_search._can_early_stop())
     with self.assertRaises(ValueError):
         tune_search = TuneSearchCV(clf,
                                    parameter_grid,
                                    n_jobs=1,
                                    early_stopping=True,
                                    max_iters=10,
                                    local_dir="./test-result")
Exemplo n.º 14
0
def create_skopt_space():
    from skopt.space.space import Real

    return [
        Real(1e-3, 1, prior="log-uniform"),
        (0.1, 0.9),
        (0.01, 0.1),
        (0.05, 0.2),
        (32, 128),
        (64, 256),
        (128, 1024),
    ], [
        "lr",
        "momentum",
        "dropout_1",
        "dropout_2",
        "conv_1",
        "conv_1",
        "dense_1",
    ]
Exemplo n.º 15
0
def test_mixed_categoricals(initgen):

    space = Space([
        Categorical(name="x", categories=["1", "2", "3"]),
        Categorical(name="y", categories=[4, 5, 6]),
        Real(name="z", low=1.0, high=5.0)
    ])

    def objective(param_list):
        x = param_list[0]
        y = param_list[1]
        z = param_list[2]
        loss = int(x) + y * z
        return loss

    res = gp_minimize(objective,
                      space,
                      n_calls=12,
                      random_state=1,
                      initial_point_generator=initgen)
    assert res["x"] in [['1', 4, 1.0], ['2', 4, 1.0]]
Exemplo n.º 16
0
def main(arguments):
    global train_mode
    EVALS = 50
    use_mp = True
    run_all = False
    selected_exp = []
    selected_datasets = []

    if '--build_datasets' in arguments:
        print('Building all necessary datasets required for the experiments. Disregarding other arguments! ' +
        'You will need to run this script again without --build_datasets in order to run experiments!')
        # Make all datasets
        for d in all_datasets:
            load_URMs(d, dataset_kwargs)
        return

    if '--no_mp' in arguments:
        print('No multiprocessing requested! Falling back to serial execution of experiments!')
        use_mp = False
        arguments.remove('--no_mp')

    if '--run_all' in arguments:
        print('All datasets selected for each algorithm!')
        selected_datasets = all_datasets
        run_all = True

    # user-based 训练
    if '--user' in arguments:
        train_mode = 'user'

    # item-based 训练
    if '--item' in arguments:
        train_mode = 'item'

    for arg in arguments:
        if not run_all and arg in name_datasets:
            selected_datasets.append(all_datasets[name_datasets.index(arg)])
        if arg in all_recommenders:
            selected_exp.append(arg)


    dict_rec_classes = {}
    dict_dimensions = {}
    dict_fit_params = {}
    dict_init_configs = {}


    # Experiment parameters
    # puresvd参数
    puresvd_dimensions = [
        Integer(1, 250, name='num_factors', dtype=int)
    ]
    puresvd_fit_params = [d.name for d in puresvd_dimensions]


    # als参数
    ials_dimensions = [
        Integer(1, 250, name='num_factors', dtype=int),
        Categorical(["linear", "log"], name='confidence_scaling'),
        Real(low=1e-3, high=50, prior='log-uniform', name='alpha', dtype=float),
        Real(low=1e-5, high=1e-2, prior='log-uniform', name='reg', dtype=float),
        Real(low=1e-3, high=10.0, prior='log-uniform', name='epsilon', dtype=float)
    ]
    ials_fit_params = [d.name for d in ials_dimensions]


    # bpr参数 150epcochs
    bpr_dimensions = [
        Categorical([150], name='epochs'),
        Integer(1, 250, name='num_factors', dtype=int),
        Categorical([128, 256, 512, 1024], name='batch_size'),
        Categorical(["adagrad", "adam"], name='sgd_mode'),
        Real(low=1e-12, high=1e-3, prior='log-uniform', name='positive_reg'),
        Real(low=1e-12, high=1e-3, prior='log-uniform', name='negative_reg'),
        Real(low=1e-6, high=1e-2, prior='log-uniform', name='learning_rate'),
    ]
    bpr_fit_params = [d.name for d in bpr_dimensions]


    # nmf参数
    nmf_dimensions = [
        Integer(1, 500, name='num_factors', dtype=int),
        Real(low=1e-5, high=1, prior='log-uniform', name='l1_ratio', dtype=float),
        Categorical(['coordinate_descent', 'multiplicative_update'], name='solver'),
        Categorical(['nndsvda'], name='init_type'),
        Categorical(['frobenius', 'kullback-leibler'], name='beta_loss')
    ]
    nmf_fit_params = [d.name for d in nmf_dimensions]


    # slimbpr参数 150epochs
    slimbpr_dimensions = [
        Integer(low=5, high=1000, prior='uniform', name='topK', dtype=int),
        Categorical([150], name='epochs'),
        Categorical([True, False], name='symmetric'),
        Categorical(["sgd", "adagrad", "adam"], name='sgd_mode'),
        Real(low=1e-9, high=1e-3, prior='log-uniform', name='lambda_i', dtype=float),
        Real(low=1e-9, high=1e-3, prior='log-uniform', name='lambda_j', dtype=float),
        Real(low=1e-4, high=1e-1, prior='log-uniform', name='learning_rate', dtype=float)
    ]
    slimbpr_fit_names = [d.name for d in slimbpr_dimensions]


    # cfgan参数
    cfgan_dimensions = [
        Categorical([300], name='epochs'),
        Integer(1, 5, prior='uniform', name='d_steps', dtype=int),
        Integer(1, 5, prior='uniform', name='g_steps', dtype=int),
        Integer(1, 5, prior='uniform', name='d_layers', dtype=int),
        Integer(1, 5, prior='uniform', name='g_layers', dtype=int),
        Categorical(['linear', 'tanh', 'sigmoid'], name='d_hidden_act'),
        Categorical(['linear', 'tanh', 'sigmoid'], name='g_hidden_act'),
        Categorical(['ZR', 'PM', 'ZP'], name='scheme'),
        Categorical([64, 128, 256, 512, 1024], name='d_batch_size'),
        Categorical([64, 128, 256, 512, 1024], name='g_batch_size'),
        Real(low=0, high=1, prior='uniform', name='zr_ratio', dtype=float),
        Real(low=0, high=1, prior='uniform', name='zp_ratio', dtype=float),
        Real(low=0, high=1, prior='uniform', name='zr_coefficient', dtype=float),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='d_lr', dtype=float),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='g_lr', dtype=float),
        Real(low=1e-6, high=1e-4, prior='log-uniform', name='d_reg', dtype=float),
        Real(low=1e-6, high=1e-4, prior='log-uniform', name='g_reg', dtype=float),
    ]
    cfgan_fit_params = [d.name for d in cfgan_dimensions]


    # ganmf参数
    ganmf_dimensions = [
        Categorical([300], name='epochs'),
        Integer(low=1, high=250, name='num_factors', dtype=int),
        Categorical([64, 128, 256, 512, 1024], name='batch_size'),
        Integer(low=1, high=10, name='m', dtype=int),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='d_lr', dtype=float),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='g_lr', dtype=float),
        Real(low=1e-6, high=1e-4, prior='log-uniform', name='d_reg', dtype=float),
        Real(low=1e-2, high=0.5, prior='uniform', name='recon_coefficient', dtype=float),
        # Integer(5, 400, name='emb_dim', dtype=int),
        # Integer(1, 10, name='d_steps', dtype=int),
        # Integer(1, 10, name='g_steps', dtype=int),
        # Real(low=1e-6, high=1e-4, prior='log-uniform', name='g_reg', dtype=float),
    ]
    ganmf_fit_params = [d.name for d in ganmf_dimensions]


    # disgan参数
    disgan_dimensions = [
        Categorical([300], name='epochs'),
        Categorical(['linear', 'tanh', 'relu', 'sigmoid'], name='d_hidden_act'),
        Integer(low=1, high=5, prior='uniform', name='d_layers', dtype=int),
        Integer(low=1, high=250, name='num_factors', dtype=int),
        Categorical([64, 128, 256, 512, 1024], name='batch_size'),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='d_lr', dtype=float),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='g_lr', dtype=float),
        Real(low=1e-6, high=1e-4, prior='log-uniform', name='d_reg', dtype=float),
        Real(low=1e-2, high=0.5, prior='uniform', name='recon_coefficient', dtype=float)
    ]
    disgan_fit_params = [d.name for d in disgan_dimensions]


    # deepganmf参数
    deepganmf_dimensions = [
        Categorical([300], name='epochs'),
        Categorical(['linear', 'tanh', 'relu', 'sigmoid'], name='d_hidden_act'),
        Categorical(['linear', 'tanh', 'relu', 'sigmoid'], name='g_hidden_act'),
        Categorical(['linear', 'tanh', 'relu', 'sigmoid'], name='g_output_act'),
        Categorical([1, 3, 5], name='d_layers'),
        Categorical([1, 2, 3, 4, 5], name='g_layers'),
        Categorical([64, 128, 256, 512, 1024], name='batch_size'),
        Integer(low=1, high=10, name='m', dtype=int),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='d_lr', dtype=float),
        Real(low=1e-4, high=1e-2, prior='log-uniform', name='g_lr', dtype=float),
        Real(low=1e-6, high=1e-4, prior='log-uniform', name='d_reg', dtype=float),
        Real(low=1e-2, high=0.5, prior='uniform', name='recon_coefficient', dtype=float),
    ]
    deepganmf_fit_params = [d.name for d in deepganmf_dimensions]



    dict_rec_classes['TopPop'] = TopPop
    dict_rec_classes['Random'] = Random
    dict_rec_classes['PureSVD'] = PureSVDRecommender
    dict_rec_classes['BPR'] = MatrixFactorization_BPR_Cython
    dict_rec_classes['ALS'] = IALSRecommender
    dict_rec_classes['NMF'] = NMFRecommender
    dict_rec_classes['GANMF'] = GANMF
    dict_rec_classes['CFGAN'] = CFGAN
    dict_rec_classes['DisGANMF'] = DisGANMF
    dict_rec_classes['SLIMBPR'] = SLIM_BPR_Cython
    dict_rec_classes['DeepGANMF'] = DeepGANMF

    dict_dimensions['TopPop'] = []
    dict_dimensions['Random'] = []
    dict_dimensions['PureSVD'] = puresvd_dimensions
    dict_dimensions['BPR'] = bpr_dimensions
    dict_dimensions['ALS'] = ials_dimensions
    dict_dimensions['NMF'] = nmf_dimensions
    dict_dimensions['GANMF'] = ganmf_dimensions
    dict_dimensions['CFGAN'] = cfgan_dimensions
    dict_dimensions['DisGANMF'] = disgan_dimensions
    dict_dimensions['SLIMBPR'] = slimbpr_dimensions
    dict_dimensions['DeepGANMF'] = deepganmf_dimensions

    dict_fit_params['TopPop'] = []
    dict_fit_params['Random'] = []
    dict_fit_params['PureSVD'] = puresvd_fit_params
    dict_fit_params['BPR'] = bpr_fit_params
    dict_fit_params['ALS'] = ials_fit_params
    dict_fit_params['NMF'] = nmf_fit_params
    dict_fit_params['GANMF'] = ganmf_fit_params
    dict_fit_params['CFGAN'] = cfgan_fit_params
    dict_fit_params['DisGANMF'] = disgan_fit_params
    dict_fit_params['SLIMBPR'] = slimbpr_fit_names
    dict_fit_params['DeepGANMF'] = deepganmf_fit_params

    pool_list_experiments = []
    pool_list_dimensions = []

    for exp in selected_exp:
        for d in selected_datasets:
            new_exp = RecSysExp(dict_rec_classes[exp], dataset=d, fit_param_names=dict_fit_params[exp],
                                method='bayesian', seed=seed)
            if use_mp:
                pool_list_experiments.append(new_exp)
                pool_list_dimensions.append(dict_dimensions[exp])
            else:
                new_exp.tune(dict_dimensions[exp], evals=EVALS,
                             init_config=dict_init_configs[exp] if exp in dict_init_configs else None)

    if use_mp:
        # Need to turn off MKL's own threading mechanism in order to use MP
        # https://github.com/joblib/joblib/issues/138
        os.environ['MKL_NUM_THREADS'] = '1'
        os.environ['OMP_NUM_THREADS'] = '1'
        os.environ['MKL_DYNAMIC'] = 'FALSE'
        
        pool = mp.Pool(initializer=set_affinity_on_worker)
        pool.starmap_async(run_exp, zip(pool_list_experiments, pool_list_dimensions, [EVALS]*len(pool_list_experiments)))
        pool.close()
        pool.join()
Exemplo n.º 17
0
num_batches = total_songs // batch_size
num_batches

# In[21]:

curr_steps = np.sort(factors(num_batches))
#drop last because it does not make any sense
curr_steps = curr_steps[:-1]
curr_steps = curr_steps[curr_steps >= 10]
curr_steps

# In[22]:

currStepsSpace = Categorical(curr_steps)
learningRateSpace = Real(1e-5, 1e-2, "log-uniform")
inputProbSpace = Real(0.4, 1.0, "uniform")
hiddenProbSpace = Real(0.4, 1.0, "uniform")
l2RegSpace = Real(1e-3, 1., "log-uniform")
space = [
    currStepsSpace, learningRateSpace, inputProbSpace, hiddenProbSpace,
    l2RegSpace
]

# In[23]:


def saveStatsCollection(filename, key, stats):
    statsCollection = np.load(filename)[(
    )] if os.path.isfile(filename) else dict()
    statsCollection[key] = stats
Exemplo n.º 18
0
    if plotting:
        fig_1, ax_1, fig_2, ax_2 = plotStats(stats, keys)
        plt.show()

    validAccs = stats[:, -1]
    length10percent = len(validAccs) // 10
    best10percent = np.sort(validAccs)[-length10percent:]
    # We want to maximise the MEAN validation accuracy,
    # i.e. minimise minus
    return -np.mean(best10percent)


# In[14]:

inputKeepProbSpace = Real(0.5, 1.0, "uniform")
hiddenKeepProbSpace = Real(0.5, 1.0, "uniform")
hiddenDimSpace = Integer(20, 2000)
lamda2Space = Real(1e-3, 10, "log-uniform")
space = [inputKeepProbSpace, hiddenKeepProbSpace, hiddenDimSpace, lamda2Space]

# TARGET IS 58% as the original Deep Neural Net

# In[15]:

if jupyterNotebookEnabled:
    get_ipython().magic(u'%time')

#this might crash so you need to run it outside as a python file (file -> save as python)
if not os.path.isfile(res_gp_save_filename):
    if os.path.isfile(statsCollectionFilename):
Exemplo n.º 19
0
    model.add(layers.Dropout(dropout))

    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(10))

    adam = optimizers.Adam(lr, beta1, beta2)
    model.compile(optimizer=adam,
                  loss=losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])

    return model


if __name__ == "__main__":
    dim_lr = Real(low=1e-4, high=5e-3, prior='log-uniform', name='lr')
    dim_b1 = Real(low=0.7, high=0.99, name='beta1')
    dim_b2 = Real(low=0.9, high=0.999, name='beta2')
    dim_drop = Real(low=0.25, high=0.75, name='dropout')
    dimensions = [dim_lr, dim_b1, dim_b2, dim_drop]
    default_param = [0.0005, 0.75, 0.95, 0.4]
    best_accuracy = 0

    @use_named_args(dimensions=dimensions)
    def fitness(lr, beta1, beta2, dropout):
        model = create_model(lr, beta1, beta2, dropout)

        history = model.fit(train_data,
                            train_labels,
                            epochs=5,
                            validation_data=(test_data, test_labels))
Exemplo n.º 20
0
from tune_sklearn import TuneSearchCV
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from skopt.space.space import Real

digits = datasets.load_digits()
X = digits.data
y = digits.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

space = {
    "n_estimators": (100, 200),
    "min_weight_fraction_leaf": Real(0.0, 0.5),
    "min_samples_leaf": (1, 5)
}

tune_search = TuneSearchCV(RandomForestClassifier(),
                           space,
                           search_optimization="bayesian",
                           n_iter=3,
                           max_iters=10)
tune_search.fit(X_train, y_train)

print(tune_search.cv_results_)
print(tune_search.best_params_)
Exemplo n.º 21
0
def startExperiment(parameters):
    """
    Starts an experiment with the given parameters

    :param parameters: parameters of the experiment
    :type parameters: Dict
    """

    optimizationPath = str(
        os.path.join(parameters["path"], parameters["experimentId"]))
    json_file = str(
        os.path.join(optimizationPath, parameters["experimentId"] + ".json"))
    if os.path.isfile(json_file):
        Optimizer = importOptimizer()
        optimizer = Optimizer()
        optimizer.resume_optimization(json_file)
    else:
        # Import dataset class and initialize an instance with the chosen dataset
        dataset_class = importDataset()
        dataset = dataset_class()
        dataset_path = str(
            os.path.join(pathDataset, "preprocessed_datasets",
                         parameters["dataset"]))
        dataset.load_custom_dataset_from_folder(dataset_path)

        model_class = importModel(parameters["model"]["name"])
        model = model_class()

        model.hyperparameters.update(parameters["model"]["parameters"])
        model.partitioning(parameters["partitioning"])

        search_space = {}

        for key, value in parameters["optimization"]["search_spaces"].items():
            if "low" in value:
                if isinstance(value["low"], float) or isinstance(
                        value["high"], float):
                    search_space[key] = Real(low=value["low"],
                                             high=value["high"])
                else:
                    search_space[key] = Integer(low=value["low"],
                                                high=value["high"])
            else:
                search_space[key] = Categorical(value)

        metric_parameters = parameters["optimize_metrics"][0]["parameters"]
        for key in metric_parameters:
            if metric_parameters[key] == "use dataset texts":
                metric_parameters[key] = dataset.get_corpus()
            elif metric_parameters[key] == "use selected dataset":
                metric_parameters[key] = dataset
            elif os.path.isdir(str(metric_parameters[key])):
                metricDataset = dataset_class()
                metricDataset.load_custom_dataset_from_folder(
                    metric_parameters[key])
                metric_parameters[key] = metricDataset.get_corpus()

        metric_class = importMetric(parameters["optimize_metrics"][0]["name"])
        metric = metric_class(**metric_parameters)

        metrics_to_track = []
        for single_metric in parameters["track_metrics"]:
            metric_class = importMetric(single_metric["name"])
            single_metric_parameters = single_metric["parameters"]
            for key in single_metric_parameters:
                if single_metric_parameters[key] == "use dataset texts":
                    single_metric_parameters[key] = dataset.get_corpus()
                elif single_metric_parameters[key] == "use selected dataset":
                    single_metric_parameters[key] = dataset
            new_metric = metric_class(**single_metric_parameters)
            metrics_to_track.append(new_metric)

        vocabulary_path = str(
            os.path.join(parameters["path"], parameters["experimentId"],
                         "models"))

        Path(vocabulary_path).mkdir(parents=True, exist_ok=True)

        vocabulary_path = str(os.path.join(vocabulary_path, "vocabulary.json"))

        file = open(vocabulary_path, "w")
        json.dump(dict(corpora.Dictionary(dataset.get_corpus())), file)
        file.close()

        Optimizer = importOptimizer()
        optimizer = Optimizer()
        optimizer.optimize(
            model,
            dataset,
            metric,
            search_space,
            metrics_to_track,
            random_state=True,
            initial_point_generator="random",
            surrogate_model=parameters["optimization"]["surrogate_model"],
            model_runs=parameters["optimization"]["model_runs"],
            n_random_starts=parameters["optimization"]["n_random_starts"],
            acq_func=parameters["optimization"]["acquisition_function"],
            number_of_call=parameters["optimization"]["iterations"],
            save_models=True,
            save_name=parameters["experimentId"],
            save_path=optimizationPath)
Exemplo n.º 22
0
# numerical pipeline
numeric_pipeline = Pipeline([('select_numeric', TypeSelector(dtype='number'))])

# processing pipeline
cat_num_featun = FeatureUnion([('categorical', categorical_pipeline),
                               ('numerical', numeric_pipeline)])

# combined pipeline
estimator_pipeline = Pipeline([('Features', feature_pipeline),
                               ('Categorical_Numeric', cat_num_featun),
                               ('Estimator', LogisticRegression(penalty="l1"))
                               ])

# search space
search_space = {
    "Estimator__C": Real(.000001, 2),
    "Estimator__class_weight": Categorical(['balanced', None]),
}

# scorer
metric = make_scorer(score_func=log_loss,
                     greater_is_better=False,
                     needs_proba=True,
                     labels=train['Category'].unique())

# cv
kfold_cv = KFold(n_splits=5, shuffle=True, random_state=42)

# bayessearch cv
bayes_tuned_pipeline = BayesSearchCV(estimator=estimator_pipeline,
                                     search_spaces=search_space,
    statsCollection[(state_size, num_steps, learning_rate)] = stats
    np.save(filename, statsCollection)
    
    if plotting:
        fig_1, ax_1, fig_2, ax_2 = plotStats(stats, DynStats.keys)
        plt.show()
    
    # We want to minimize the amount of epochs required to reach 23% accuracy
    return metric


# In[13]:

stateSizeSpace = Integer(15, 1000)
numStepSpace = Categorical(numLens)
learningRateSpace = Real(1e-6, 1e-1, prior="log-uniform")
space  = [stateSizeSpace, numStepSpace, learningRateSpace]


# In[14]:

if jupyterNotebookEnabled:
    get_ipython().magic(u'%time')

if not os.path.isfile(best_params_filename):
    if os.path.isfile(stats_coll_filename):
        os.remove(stats_coll_filename)
    
    res_gp = gp_minimize(
            func=objective_min_epochs, # function that we wish to minimise
            dimensions=space, #the search space for the hyper-parameters