def report_perf(self, optimizer, X, y, title, callbacks=None):
        """
        optimizer = a sklearn or a skopt optimizer
        X = the training set
        y = our target
        title = a string label for the experiment
        """
        start = time.time()
        start_cpu = time.process_time()
        if callbacks:
            mprint(f'start tuning {title}...')

            optimizer.fit(X, y, callback=callbacks)
        else:
            mprint(f'start tuning {title}...')

            optimizer.fit(X, y)

        time_cost_CPU = time.process_time() - start_cpu
        time_cost = time.time() - start
        result = {}
        result['best_score'] = optimizer.best_score_
        result['best_score_std'] = optimizer.get_cv_results_(
        )['std_test_score'][optimizer.best_index_]
        result['best_parmas'] = optimizer.best_params_["model"]
        result['params'] = optimizer.get_cv_results_()['params']
        result['CPU_Time'] = round(time_cost_CPU, 0)
        result['Time_cost'] = round(time_cost, 0)
        result['all_cv_results'] = optimizer.get_cv_results_(
        )['mean_test_score'][:]
        result['CV'] = optimizer.get_cv_results_()
        print("")
        time_cost_CPU = round(result['CPU_Time'], 0)
        time_cost = round(result['Time_cost'], 0)
        cand = len(result['all_cv_results'])
        best_cv = round(result['best_score'], 8)
        best_cv_sd = round(result['best_score_std'], 4)
        print(
            f'took CPU Time: {time_cost_CPU}s,clock time: {time_cost}s, candidates checked:{cand} ,best CV score: {best_cv} \u00B1 {best_cv_sd}'
        )
        print("")

        return result
NORTH = 0
SOUTH = 1
EAST = 2
WEST = 3

if __name__ == "__main__":
    comm = MPI.COMM_WORLD

    mpi_rows = int(np.floor(np.sqrt(comm.size)))
    mpi_cols = comm.size // mpi_rows
    if mpi_rows * mpi_cols > comm.size:
        mpi_cols -= 1
    if mpi_rows * mpi_cols > comm.size:
        mpi_rows -= 1

    mprint("=" * 78)
    mprint("Running %d parallel processes (ranks)" % (comm.size))
    mprint("Creating a %d x %d processor grid..." % (mpi_rows, mpi_cols))

    # Create a 2d cartesian grid with periodic boundary conditions
    ccomm = comm.Create_cart((mpi_rows, mpi_cols),
                             periods=(True, True),
                             reorder=True)

    my_mpi_row, my_mpi_col = ccomm.Get_coords(ccomm.rank)

    # Identifiy our neighbours on the grid
    neigh = [0, 0, 0, 0]
    neigh[NORTH], neigh[SOUTH] = ccomm.Shift(0, 1)
    neigh[EAST], neigh[WEST] = ccomm.Shift(1, 1)
    def optimise_step(self,
                      df_train,
                      df_target,
                      npoints=1,
                      nrandom=1,
                      n_iter=50,
                      set_callbacks=True):
        """Evaluates the data.
        Build the pipeline. If no parameters are set, default configuration for
        each step is used
        Parameters
        ----------
        space : dict, default = None.
        df_train : pandas dataframe of shape = (n_train, n_features)
            The train dataset with numerical features.
        y_train : pandas series of shape = (n_train,)
            The numerical encoded target for classification tasks.
        max_evals : int, default = 20, max evaluation times
        set_callbacks (opt): bool,default: True
             If callable then callback(res) is called after each call to func. If list of callables, then each callable in the list is called.
        ----------
        Returns
        ---------
        result : dict
            - result['best_score'] : Best Score after Tuning
            - result['best_score_std'] : Standar Divation of best score
            - result['best_parmas'] : Best parameters
            - result['params'] : all paramsters (# = checked candicated)
            - result['time_cost(s)'] : total time of finding out the best parameters
            - result['all_cv_results'] : all cv results
            - result['mean_score_time'] : time for each cv result
        """
        # checke parallel strategy

        ce = Categorical_encoder()
        X = ce.fit_transform(df_train, df_target)

        if len(df_train.dtypes[df_train.dtypes == 'float'].index) != 0:
            scal = Scaler()
            X = scal.fit_transform(X, df_target)
            self.perform_scaling is True
        else:
            pass

        mid_result = {}
        tuning_result = {}
        if len(pd.DataFrame(X).columns) > 20:
            search_space_LGB = Classifier(
                strategy="LightGBM").get_search_spaces(
                    need_feature_selection=True)
            search_space_SVC = Classifier(strategy="SVC").get_search_spaces(
                need_feature_selection=True)
            search_spaces = [search_space_SVC, search_space_LGB]
        else:
            search_space_LGB = Classifier(
                strategy="LightGBM").get_search_spaces(
                    need_feature_selection=False)
            search_space_SVC = Classifier(strategy="SVC").get_search_spaces(
                need_feature_selection=False)
            search_spaces = [search_space_SVC, search_space_LGB]

        # Initialize a pipeline
        fs = None
        for i in range(len(search_spaces)):
            if isinstance(search_spaces, tuple):
                for p in search_spaces[i][0].keys():
                    if (p.startswith("fs__")):
                        fs = feature_selector()
                    else:
                        print(
                            ">> Number of Features < 20, ignore feature selection"
                        )
                        pass
            else:
                for p in search_spaces[i].keys():
                    if (p.startswith("fs__")):
                        fs = feature_selector()
                    else:
                        pass

        # Do we need to cache transformers?
        cache = False

        if (fs is not None):
            if ("fs__strategy" in search_spaces):
                if (search_spaces["fs__strategy"] != "variance"):
                    cache = True
                else:
                    pass
        else:
            pass
        mprint(f'Start turning Hyperparameters .... ')
        print("")
        print(">>> Categorical Features have encoded with :" +
              str({'strategy': ce.strategy}))
        print("")
        if self.perform_scaling is True:
            print(">>> Numerical Features have encoded with :" +
                  scal.__class__.__name__)
            print("")

        for baseestimator in self.baseEstimator:
            # Pipeline creation

            lgb = Classifier(strategy="LightGBM").get_estimator()
            #  rf = Classifier(strategy="RandomForest").get_estimator()
            #  svc = Classifier(strategy="SVC").get_estimator()

            if (fs is not None):
                if cache:
                    pipe = Pipeline([('fs', fs), ('model', lgb)],
                                    memory=self.to_path)
                else:
                    pipe = Pipeline([('fs', fs), ('model', lgb)])
            else:
                if cache:
                    pipe = Pipeline([('model', lgb)], memory=self.to_path)
                else:
                    pipe = Pipeline([('model', lgb)])

            if (self.parallel_strategy is True):
                opt = BayesSearchCV(pipe,
                                    search_spaces=search_spaces,
                                    scoring=self.scoring,
                                    cv=self.cv,
                                    npoints=npoints,
                                    n_jobs=-1,
                                    n_iter=n_iter,
                                    nrandom=nrandom,
                                    return_train_score=False,
                                    optimizer_kwargs={
                                        'base_estimator': baseestimator,
                                        "acq_func": "EI"
                                    },
                                    random_state=self.random_state,
                                    verbose=self.verbose,
                                    refit=self.refit)
            else:
                opt = BayesSearchCV(pipe,
                                    search_spaces=search_spaces,
                                    scoring=self.scoring,
                                    cv=self.cv,
                                    npoints=npoints,
                                    n_jobs=1,
                                    n_iter=n_iter,
                                    nrandom=nrandom,
                                    return_train_score=False,
                                    optimizer_kwargs={
                                        'base_estimator': baseestimator,
                                        "acq_func": "EI"
                                    },
                                    random_state=self.random_state,
                                    verbose=self.verbose,
                                    refit=self.refit)

            if not isinstance(baseestimator, GaussianProcessRegressor):
                if set_callbacks is True:
                    mid_result = self.report_perf(
                        opt,
                        X,
                        df_target,
                        ' with Surrogate Model:' + baseestimator,
                        callbacks=[
                            self.on_step,
                            DeadlineStopper(60 *
                                            60)  # ,DeltaYStopper(0.000001)
                        ])
                else:
                    mid_result = self.report_perf(
                        opt,
                        X,
                        df_target,
                        ' with Surrogate Model: ' + baseestimator,
                    )
                tuning_result[baseestimator] = mid_result

            else:
                if set_callbacks is True:
                    mid_result = self.report_perf(
                        opt,
                        X,
                        df_target,
                        ' with Surrogate Model:' +
                        baseestimator.__class__.__name__,
                        callbacks=[
                            self.on_step,
                            DeadlineStopper(60 *
                                            60)  # ,DeltaYStopper(0.000001)
                        ])
                else:
                    mid_result = self.report_perf(
                        opt,
                        X,
                        df_target,
                        ' with Surrogate Model: ' +
                        baseestimator.__class__.__name__,
                    )
                tuning_result[baseestimator.__class__.__name__] = mid_result

        bests = pd.DataFrame()
        for key in tuning_result.keys():
            if tuning_result[key]['best_score'] == max(
                    d['best_score'] for d in tuning_result.values()):
                bests = bests.append(
                    {
                        'best_score': tuning_result[key]['best_score'],
                        'best_SM': key,
                        'time': tuning_result[key]['Time_cost']
                    },
                    ignore_index=True)
                bests = bests.sort_values(
                    by=['time'], ascending=True).reset_index(drop=True)
                best_base_estimator = bests['best_SM'][0]
                best_param = tuning_result[best_base_estimator]['best_parmas']

        print("")
        print('######## Congratulations! Here is the Best Parameters: #######')
        print('Best Score is:',
              tuning_result[best_base_estimator]['best_score'])
        try:
            print('with Surrogate Model ' + best_base_estimator)
        except:
            print('with Surrogate Model ' +
                  best_base_estimator.__class__.__name__)
        pprint.pprint(best_param)

        self.best_param_ = best_param

        return best_param, tuning_result
예제 #4
0
#!/usr/bin/env python

from __future__ import division

from time import time
from mpi4py import MPI
import numpy as np

from util import mprint, num_bytes

sizes = [2**n for n in range(1, 24)]
runs = 50

comm = MPI.COMM_WORLD

mprint("Benchmarking Reduce performance on %d parallel MPI processes..." %
       comm.size)
mprint("%15s | %12s | %12s" % ("Size (bytes)", "Time (msec)", "Bandwidth"))

for s in sizes:
    data = np.ones(s)
    res = np.empty_like(data)

    comm.Barrier()
    t_min = np.inf
    for i in range(runs):
        t0 = time()
        comm.Reduce([data, MPI.DOUBLE], [res, MPI.DOUBLE])
        t = time() - t0
        t_min = min(t, t_min)
    comm.Barrier()
예제 #5
0
tf.global_variables_initializer().run()

#TODO Load net seems not work
origiter = saver.iter

train_batch_nums = len(trainset) // batch_size
test_batch_nums = len(testset) // batch_size
iters = origiter

if origiter > 0:
    util.loadNet(saver.latest, model, sess)
    if os.path.isfile('../wts/train/' + mission_path + 'popt.npz'):
        util.loadAdam('../wts/train/' + mission_path + 'popt.npz', opt,
                      model.weights, sess)
    util.mprint("Restored to iteration %d" % origiter)

test_image_paths = testset[:batch_size]
contour, contour_imgs, ori_imgs, sparse_colors = sess.run(
    [d.precontour, d.imgs_color, imgs_pos, d.sparse_color],
    feed_dict=d.dict(test_image_paths, gen_learning_rate_val,
                     dis_learning_rate_val, False))

np.save("../results/output.npy", contour_imgs)
np.save("../results/sparse.npy", sparse_colors)
np.save("../results/ori.npy", ori_imgs)
np.save("../results/contour.npy", contour[:, :, :, 0])

for i in range(5):
    output_img = (255 * (1 + ori_imgs[i]) / 2).astype(int)
    skimage.io.imsave(os.path.join(result_path, 'ori_' + str(i) + '.jpg'),
예제 #6
0
    def fit(self, X, y_bin, n_iter=10, need_callback=True):

        enc = encoder.Categorical_encoder()
        X = enc.fit_transform(X, y_bin)
        if len(X.dtypes[X.dtypes == 'float'].index) != 0:
            scal = Scaler()
            X = scal.fit_transform(X, y_bin)
        else:
            pass

        np.random.seed(55)
        s_lgb, f_lgb = 0, 0
        s_svc, f_svc = 0, 0

        p_lgb = np.random.beta(s_lgb + 1.0, f_lgb + 1.0)
        p_svc = np.random.beta(s_svc + 1.0, f_svc + 1.0)

        # Converting average precision score into a scorer suitable for model selection
        avg_prec = make_scorer(roc_auc_score,
                               greater_is_better=True,
                               needs_proba=False)
        rs = {}
        adj_iter = int(n_iter / 10)
        mprint("Running RandomSearchCV...")

        while (adj_iter > 0):
            start = time.time()
            start_cpu = time.process_time()

            if (p_lgb >= p_svc):
                key = "LGBMClassifier"
            else:
                key = "SVC"
            start = time.time()
            start_cpu = time.process_time()
            model = self.models[key]
            params = self.params[key]

            param_keys, param_vecs = zip(*params.items())
            param_keys = list(param_keys)
            param_vecs = list(param_vecs)

            def objective(param_vec):
                params = dict(zip(param_keys, param_vec))
                model.set_params(**params)
                score = cross_val_score(model,
                                        X,
                                        y_bin,
                                        cv=5,
                                        n_jobs=-1,
                                        scoring=avg_prec)
                self.score_std.append(np.std(score))
                return -np.mean(score)

            def on_step(gp_round):
                scores = np.sort(gp_round.func_vals)
                score = scores[0]
                # print("best score: %s" % score)
                if score == -1:
                    print('Interrupting!')
                    return True

            if need_callback:
                print('Running with Callback function....')

                gp_round = dummy_minimize(
                    func=objective,
                    dimensions=list(param_vecs),
                    n_calls=10,
                    callback=[
                        on_step  # , DeadlineStopper(60 * 10)
                    ],  # ,on_stepDeltaYStopper(0.000001)
                    random_state=self.random_state,
                    verbose=self.verbose)

            else:
                gp_round = dummy_minimize(func=objective,
                                          dimensions=list(param_vecs),
                                          n_calls=10,
                                          random_state=self.random_state,
                                          verbose=self.verbose)

            rm_result = {}
            results = []
            score = []
            p = []
            for err, param_vec in zip(gp_round.func_vals, gp_round.x_iters):
                params = dict(zip(param_keys, param_vec))
                mparams = dict({"model": model.__class__.__name__}, **params)
                score.append(-err)
                p.append(mparams)
            bes = np.argmax(score)
            best_index = np.argmin(gp_round.func_vals)
            rm_result['best_score'] = -gp_round.fun
            rm_result['best_params'] = p[bes]
            rm_result['params'] = params
            rm_result['all_cv_results'] = -gp_round.func_vals

            clock_time = time.time() - start
            cpu_time = time.process_time() - start_cpu
            rm_result['test_score_std'] = self.score_std
            rm_result['best_score_std'] = round(self.score_std[best_index], 4)
            rm_result['CPU_Time'] = round(cpu_time, 2)
            rm_result['Time_cost'] = round(clock_time, 2)
            cand = len(rm_result['all_cv_results'])
            cpu = round(cpu_time)
            clock = round(clock_time)
            best_cv_sd = rm_result['best_score_std']
            best_score = round(-gp_round.fun, 4)
            adj_iter -= 1
            rs[adj_iter] = rm_result

            if rs[adj_iter]['best_score'] > np.mean(
                [d['all_cv_results'] for d in rs.values()]):
                if gp_round.x.__class__.__name__ == "SVC":
                    f_svc += 1
                elif gp_round.x.__class__.__name__ == "LGBMClassifier":
                    f_lgb += 1
            else:
                if gp_round.x.__class__.__name__ == "SVC":
                    s_svc += 1
                elif gp_round.x.__class__.__name__ == "LGBMClassifier":
                    s_lgb += 1

            p_lgb = np.random.beta(s_lgb + 1.0, f_lgb + 1.0)
            p_svc = np.random.beta(s_svc + 1.0, f_svc + 1.0)

            self.cand += int(cand)
            self.clock += clock

            if self.final_best_score < best_score:
                self.final_best_score = best_score
                self.final_best_std = best_cv_sd
            else:
                self.final_best_score = self.final_best_score
                self.final_best_std = self.final_best_std

        print(
            f'Finished, took CPU Time: {cpu}s,clock time: {self.clock}s, candidates checked:{self.cand} ,best CV score: {self.final_best_score} \u00B1 {self.final_best_std}'
        )
        print("")

        bests = pd.DataFrame()
        for key in rs.keys():
            if rs[key]['best_score'] == max(d["best_score"]
                                            for d in rs.values()):
                bests = bests.append(
                    {
                        'best_score': rs[key]['best_score'],
                        'best_param': rs[key]['best_params'],
                        "ind": key,
                        'time': rs[key]['Time_cost']
                    },
                    ignore_index=True)
                bests = bests.sort_values(
                    by=['time'], ascending=True).reset_index(drop=True)
                best_param = rs[key]['best_params']
                score = rs[key]['best_score']
        print("")
        print('######## Congratulations! Here is the Best Parameters: #######')
        print('Best Score is:', score)
        print('Best Model is:')
        pprint.pprint(best_param)
        # rs["Time_cost"] = self.clock
        return rs
(options, args) = parser.parse_args()

# Parse dtype argument
if options.dtype == "float32":
    dtype_str = "np.float32"
    dtype = np.float32
elif options.dtype == "float64":
    dtype_str = "np.float64"
    dtype = np.float64
else:
    print("[FATAL] Unknown type %s" % options.dtype)

benches = options.benches.split(",")

comm = MPI.COMM_WORLD
mprint()
mprint("Running %d parallel MPI processes: Results display collective performance" % comm.size)
mprint()

# Calculate sizes
nbytes = options.nbytes * 1024 * 1024
size = nbytes // np.dtype(dtype).itemsize

if 'O1' in benches:
    linear_benchcodes = (
        ("x = 1 * a"             ,  1  , 2  ),
        ("x = a * a"             ,  1  , 3  ),
        ("x = a * b"             ,  1  , 3  ),
        ("x = a * b * c"         ,  2  , 6  ),
        ("x = a[::2] * b[::2]"   ,  0.5, 1.5),
        ("x = np.exp(a)"         ,  1  , 2  ),
예제 #8
0
#!/usr/bin/env python

from __future__ import division

from time import time
from mpi4py import MPI
import numpy as np

from util import mprint, num_bytes

sizes = [2**n for n in range(1, 24)]
runs = 20
comm = MPI.COMM_WORLD

mprint("Benchmarking braodcast performance on %d parallel MPI processes..." %
       comm.size)
mprint("%15s | %12s | %12s" %
       ("Size (bytes)", "Time (msec)", "Bandwidth (MiBytes/s)"))

for s in sizes:
    data = np.ones(s)

    comm.Barrier()
    t0 = time()
    for i in range(runs):
        comm.Bcast([data, MPI.DOUBLE], 0)
    comm.Barrier()
    t = (time() - t0) / runs

    mprint("%15d | %12.3f | %12.3f" %
           (data.nbytes, t * 1000, data.nbytes / t / 1024 / 1024))
예제 #9
0
rs = np.random.RandomState(0)
loss_D_val = 0.
loss_G_val = 0.

train_batch_nums = len(trainset) // batch_size
test_batch_nums = len(testset) // batch_size
iters = origiter

if origiter > 0:
    util.loadNet(saver.latest, model, sess)
    if os.path.isfile('../wts/train/' + mission_path + 'popt.npz'):
        util.loadAdam('../wts/train/' + mission_path + 'popt.npz', opt,
                      model.weights, sess)
    for k in range((origiter + train_batch_nums - 1) // train_batch_nums):
        idx = rs.permutation(len(trainset))
    util.mprint("Restored to iteration %d" % origiter)

# show display categories

show_train_nms = ['Loss_G']
show_test_nms = ['Loss_G_V']

if args.pre == 1:
    show_train_nms = ['Loss_Recon', 'Loss_G', 'Loss_D', 'Loss_adv_G']
    show_test_nms = ['Loss_Recon_V', 'Loss_G_V', 'Loss_D_V', 'Loss_adv_G_V']

print("trainset length: %d" % len(trainset))

while iters <= MAXITER:
    #TODO dont loop on epoch
    if iters % train_batch_nums == 0:
예제 #10
0
#!/usr/bin/env python

from __future__ import division

import numpy as np
from mpi4py import MPI

from util import mprint, num_bytes

#=============================================================================
# Main

comm = MPI.COMM_WORLD

mprint("-" * 78)
mprint(" Running %d parallel processes..." % comm.size)
mprint("-" * 78)

my_N = 10 + comm.rank
my_a = comm.rank * np.ones(my_N)
N = comm.allreduce(my_N)
a = comm.gather(my_a)

mprint("Gathered array: %s" % a)