def __init__(self, low, high, prior="uniform", transform=None): """Search space dimension that can take on any real value. Parameters ---------- * `low` [float]: Lower bound (inclusive). * `high` [float]: Upper bound (exclusive). * `prior` ["uniform" or "log-uniform", default="uniform"]: Distribution to use when sampling random points for this dimension. - If `"uniform"`, points are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, points are sampled uniformly between `log10(lower)` and `log10(upper)`.` * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. """ self.low = low self.high = high self.prior = prior if transform is None: transform = "identity" self.transform_ = transform if self.transform_ not in ["normalize", "identity"]: raise ValueError( "transform should be 'normalize' or 'identity' got %s" % self.transform_) # Define _rvs and transformer spaces. # XXX: The _rvs is for sampling in the transformed space. # The rvs on Dimension calls inverse_transform on the points sampled # using _rvs if self.transform_ == "normalize": self._rvs = uniform(0, 1) if self.prior == "uniform": self.transformer = Pipeline([Identity(), Normalize(low, high)]) else: self.transformer = Pipeline( [Log10(), Normalize(np.log10(low), np.log10(high))]) else: if self.prior == "uniform": self._rvs = uniform(self.low, self.high - self.low) self.transformer = Identity() else: self._rvs = uniform(np.log10(self.low), np.log10(self.high) - np.log10(self.low)) self.transformer = Log10()
def __init__(self, low, high, prior="uniform"): """Search space dimension that can take on any real value. Parameters ---------- * `low` [float]: Lower bound (inclusive). * `high` [float]: Upper bound (exclusive). * `prior` ["uniform" or "log-uniform", default="uniform"]: Distribution to use when sampling random points for this dimension. - If `"uniform"`, points are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, points are sampled uniformly between `log10(lower)` and `log10(upper)`.` """ self._low = low self._high = high self.prior = prior if prior == "uniform": self._rvs = uniform(self._low, self._high - self._low) self.transformer = _Identity() elif prior == "log-uniform": self._rvs = uniform(np.log10(self._low), np.log10(self._high) - np.log10(self._low)) self.transformer = _Log10() else: raise ValueError( "Prior should be either 'uniform' or 'log-uniform', " "got '%s'." % self._rvs)
def random_search(): from scipy.stats.distributions import uniform from sklearn.model_selection import ParameterSampler param_grid = { 'alpha': uniform(0.1, 1.5), # np.linspace(0.1, 1, 10), range(1, 10, 2), # 'beta': uniform(0.1, 1.5), # np.linspace(0.05, 0.25, 5),[0.05], # } param_list = list(ParameterSampler(param_grid, n_iter=20)) return [dict((k, round(v, 1)) for (k, v) in d.items()) for d in param_list]
def bm25_parameter_space(n_trials): rng = np.random.RandomState(42) return ParameterSampler( dict(tf_method=["binary", "raw", "freq", "log_norm", "double_norm"] , idf_method=["smooth", "probabilistic"], drop_stopwords=[True, False], drop_suffix=[True, False], drop_punct=[True, False], lowercase=[True, False], k1=uniform(1.2, 2.0), b=uniform(0.5, 0.8), delta=uniform(0, 2)), n_iter=n_trials, random_state=rng)
def test_uniform(): """ Testing uniform distribution """ np.random.seed(12) a = np.sqrt(3) * sigma data_uniform = uniform(-a, a).rvs(N) x = np.linspace(-vmax, vmax, 10000) fapprox = data_to_pdf(data_uniform, x) ftrue = uniform(-a, a).pdf(x) error = relative_L2_error(fapprox, ftrue, x) assert_almost_equal(error, 0, decimal=1)
def random_search(): param_grid = { 'noise_factor_cafe': uniform(3, 1), 'noise_factor_car': uniform(15, 2), 'noise_factor_white': uniform(0.05, 0.02), 'noise_file': [0, 1, 2], 'speed_factor': uniform(0.8, 0.4), } param_list = list(ParameterSampler(param_grid, n_iter=10)) return [ dict((k, round(v, 4) if not isinstance(v, int) else v) for (k, v) in d.items()) for d in param_list ]
def __init__(self, low, high, prior="uniform", transform=None): """Search space dimension that can take on any real value. Parameters ---------- * `low` [float]: Lower bound (inclusive). * `high` [float]: Upper bound (exclusive). * `prior` ["uniform" or "log-uniform", default="uniform"]: Distribution to use when sampling random points for this dimension. - If `"uniform"`, points are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, points are sampled uniformly between `log10(lower)` and `log10(upper)`.` * `transform` [None or "normalize", optional]: If `transform=normalize`, calling `transform` on X scales X to [0, 1] """ self.low = low self.high = high self.prior = prior self.transform_ = transform if self.transform_ and self.transform_ != "normalize": raise ValueError("transform should be normalize, got %s" % self.transform_) # Define _rvs and transformer spaces. # XXX: The _rvs is for sampling in the transformed space. # The rvs on Dimension calls inverse_transform on the points sampled # using _rvs if self.transform_ == "normalize": self._rvs = uniform(0, 1) if self.prior == "uniform": self.transformer = Pipeline([Identity(), Normalize(low, high)]) else: self.transformer = Pipeline( [Log10(), Normalize(np.log10(low), np.log10(high))]) else: if self.prior == "uniform": self._rvs = uniform(self.low, self.high - self.low) self.transformer = Identity() else: self._rvs = uniform(np.log10(self.low), np.log10(self.high) - np.log10(self.low)) self.transformer = Log10()
def _xgboost_hyperband_model(task, numeric_features, categoric_features, learning_rate): param_space = { 'max_depth': randint(2, 11), 'min_child_weight': randint(1, 11), 'subsample': uniform(0.5, 0.5), 'colsample_bytree': uniform(0.5, 0.5), 'colsample_bylevel': uniform(0.5, 0.5), 'gamma': uniform(0, 1), 'reg_alpha': uniform(0, 1), 'reg_lambda': uniform(0, 10), 'base_score': uniform(0.1, 0.9), 'scale_pos_weight': uniform(0.1, 9.9) } model = XGBClassifier(learning_rate=learning_rate) \ if task == 'classification' else XGBRegressor(learning_rate=learning_rate) return make_pipeline( make_union( make_pipeline(ColumnsSelector(categoric_features), FillNaN('nan'), ColumnApplier(TolerantLabelEncoder())), make_pipeline(ColumnsSelector(numeric_features), Imputer(strategy='mean'), StandardScaler())), Hyperband(model, feat_space=param_space, task=task))
def add_set_value_random_uniform(self, variable, means, scale ): """ Add a 'Set Value' macro command where the value is chosen from a random uniform distribution. Parameters ---------- variable: string An AnyScript variable or a list of AnyScript variables. means: int,float, numpy.ndarray The mean value of the random number scale: The range of the random variable [ means-scale/2 , means+scale/2] Examples -------- Set variable across different macros >>> seed(1) >>> mg = MonteCarloMacroGenerator(number_of_macros=5) >>> mg.add_set_value_random_uniform('Main.Study.myvar', means = 2, scale = 0.1) >>> for line in mg.generate_macros(): pprint(line) ['classoperation Main.Study.myvar "Set Value" --value="2"'] ['classoperation Main.Study.myvar "Set Value" --value="1.99170220047"'] ['classoperation Main.Study.myvar "Set Value" --value="2.02203244934"'] ['classoperation Main.Study.myvar "Set Value" --value="1.95001143748"'] ['classoperation Main.Study.myvar "Set Value" --value="1.98023325726"'] """ dist = distributions.uniform(means-scale/2.0,scale) self.add_set_value_random(variable,dist)
def _compute_thresh(this_data, ch_type, cv=10): """ Compute the rejection threshold for one channel. Parameters ---------- this_data: array (n_epochs, n_times) Data for one channel. ch_type: str 'mag', 'grad' or 'eeg'. cv : iterator Iterator for cross-validation. """ est = ChannelAutoReject() Limits = namedtuple('Limits', 'low high') limits = dict(eeg=Limits(low=20e-7, high=400e-6), grad=Limits(low=400e-13, high=20000e-13), mag=Limits(low=400e-15, high=20000e-15)) param_dist = dict(thresh=uniform(limits[ch_type].low, limits[ch_type].high)) rs = RandomizedSearchCV(est, # XXX : is random really better than grid? param_distributions=param_dist, n_iter=20, cv=cv) rs.fit(this_data) best_thresh = rs.best_estimator_.thresh return best_thresh
def sampleLHS(self): sampledParams = lhs(2, samples=self.n_pf) lb = np.array([0, 0]) width = np.array([self.gloEnv.maxRate, self.gloEnv.maxRate]) for i in range(2): sampledParams[:, i] = uniform(loc=lb[i], scale=width[i]).ppf(sampledParams[:, i]) return sampledParams
def test_with_randomizedsearchcv(self): import numpy as np from scipy.stats.distributions import uniform from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import RandomizedSearchCV lr = LogisticRegression() ranges, cat_idx = lr.get_param_ranges() # specify parameters and distributions to sample from # the loguniform distribution needs to be taken care of properly param_dist = { "solver": ranges["solver"], "C": uniform(0.03125, np.log(32768)) } # run randomized search n_iter_search = 5 with warnings.catch_warnings(): warnings.simplefilter("ignore") random_search = RandomizedSearchCV( lr, param_distributions=param_dist, n_iter=n_iter_search, cv=5, scoring=make_scorer(accuracy_score), ) iris = load_iris() random_search.fit(iris.data, iris.target)
def __init__(self, low: int, high: int, transform=None, name=None): if (type(low) != int) or (type(high) != int): raise TypeError("low, high have to be int") if low % 2 != 0: raise ValueError("low has to be even int") if high % 2 != 0: raise ValueError("high has to be even int") if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) super().__init__(low, high, transform=transform, name=name) if transform is None: transform = "identity" self.transform_ = transform if transform == "normalize": self._rvs = uniform(0, 1) self.transformer = EvenNormalize(low, high, is_int=True) else: self._rvs = randint(self.low / 2, (self.high) / 2 + 1) self.transformer = EvenIdentity()
def add_set_value_random_uniform(self, variable, means, scale): """ Add a 'Set Value' macro command where the value is chosen from a random uniform distribution. Parameters ---------- variable: string An AnyScript variable or a list of AnyScript variables. means: int,float, numpy.ndarray The mean value of the random number scale: The range of the random variable [ means-scale/2 , means+scale/2] Examples -------- Set variable across different macros >>> seed(1) >>> mg = MonteCarloMacroGenerator(number_of_macros=5) >>> mg.add_set_value_random_uniform('Main.Study.myvar', means = 2, scale = 0.1) >>> for line in mg.generate_macros(): pprint(line) ['classoperation Main.Study.myvar "Set Value" --value="2"'] ['classoperation Main.Study.myvar "Set Value" --value="1.99170220047"'] ['classoperation Main.Study.myvar "Set Value" --value="2.02203244934"'] ['classoperation Main.Study.myvar "Set Value" --value="1.95001143748"'] ['classoperation Main.Study.myvar "Set Value" --value="1.98023325726"'] """ dist = distributions.uniform(means - scale / 2.0, scale) self.add_set_value_random(variable, dist)
def _compute_thresh(this_data, ch_type, cv=10): """ Compute the rejection threshold for one channel. Parameters ---------- this_data: array (n_epochs, n_times) Data for one channel. ch_type: str 'mag', 'grad' or 'eeg'. cv : iterator Iterator for cross-validation. """ est = ChannelAutoReject() Limits = namedtuple('Limits', 'low high') limits = dict(eeg=Limits(low=20e-7, high=400e-6), grad=Limits(low=400e-13, high=20000e-13), mag=Limits(low=400e-15, high=20000e-15)) param_dist = dict( thresh=uniform(limits[ch_type].low, limits[ch_type].high)) rs = RandomizedSearchCV( est, # XXX : is random really better than grid? param_distributions=param_dist, n_iter=20, cv=cv) rs.fit(this_data) best_thresh = rs.best_estimator_.thresh return best_thresh
def test_with_randomizedsearchcv(self): from sklearn.model_selection import RandomizedSearchCV from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score, make_scorer from scipy.stats.distributions import uniform import numpy as np lr = LogisticRegression() parameters = {'solver': ('liblinear', 'lbfgs'), 'penalty': ['l2']} ranges, cat_idx = lr.get_param_ranges() min_C, max_C, default_C = ranges['C'] # specify parameters and distributions to sample from #the loguniform distribution needs to be taken care of properly param_dist = { "solver": ranges['solver'], "C": uniform(min_C, np.log(max_C)) } # run randomized search n_iter_search = 5 with warnings.catch_warnings(): warnings.simplefilter("ignore") random_search = RandomizedSearchCV( lr, param_distributions=param_dist, n_iter=n_iter_search, cv=5, scoring=make_scorer(accuracy_score)) iris = load_iris() random_search.fit(iris.data, iris.target)
def LHS(n, loc, upc, dist): """ Latin hypercube sampling. Parameters: n: integer; size of desired sampling loc: scalar; lower bound of desired distribution upc: scalar; upper bound of desired distribution dist: string; either 'uniform' or 'normal' Returns: lhs: 1D array """ lower_limits = np.arange(0, n) / n higher_limits = np.arange(1, n + 1) / n points = np.random.uniform(low=lower_limits, high=higher_limits, size=n) np.random.shuffle(points) scale = upc - loc if dist == 'uniform': rv = distributions.uniform(loc=loc, scale=scale) elif dist == 'normal': rv = distributions.norm(loc=loc, scale=scale) lhs = rv.ppf(points) return lhs
def init_distributions(pkey, kind='dpm', nrvs=25, tb=.65, force_normal=False): """ sample random parameter sets to explore global minima (called by Optimizer method __hop_around__()) """ loc, scale = get_theta_params(pkey, kind=kind) bounds = get_bounds(kind=kind)[pkey] lower = np.min(bounds) upper = np.max(bounds) normal_params = ['a', 'tr', 'v', 'vd', 'ssv', 'sso', 'xb', 'z', 'Beta'] uniform_params = ['vi', 'BX', 'AX', 'PX', 'si'] # init and freeze dist shape if pkey in normal_params: dist = norm(loc, scale) # elif pkey in gamma_params: # dist = gamma(1.0, loc, scale) elif pkey in uniform_params: dist = uniform(loc, scale) # generate random variates rvinits = dist.rvs(nrvs) while rvinits.min() < lower: # apply lower limit ix = rvinits.argmin() rvinits[ix] = dist.rvs() while rvinits.max() > upper: # apply upper limit ix = rvinits.argmax() rvinits[ix] = dist.rvs() if pkey =='tr': rvinits = np.abs(rvinits) rvinits[rvinits<lower] = lower rvinits[rvinits>upper] = upper return rvinits
def UniformPrior(low=0., high=1.): """ Constant prior over a finite range. low, high : min, max of range """ return Prior(distributions.uniform(loc=low, scale=(high-low)))
def gen_sample(loc, scale, sample, distribution_type): if distribution_type == NORMAL_DISTRIBUTION_TYPE: return norm(loc=loc, scale=scale).ppf(sample) elif distribution_type == UNIFORM_DISTRIBUTION_TYPE: return uniform(loc=loc, scale=scale).ppf(sample) else: raise Exception( "Invalid distribution type: {}".format(distribution_type))
def rvs(self, size=1, random_state=None): uniform_values = uniform(loc=self.loc, scale=self.scale) exp_values = np.power( self.base, uniform_values.rvs(size=size, random_state=random_state)) if len(exp_values) == 1: return exp_values[0] else: return exp_values
def test_param_sampler(): # test basic properties of param sampler param_distributions = {"kernel": ["rbf", "linear"], "C": distributions.uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] assert_equal(len(samples), 10) for sample in samples: assert_true(sample["kernel"] in ["rbf", "linear"]) assert_true(0 <= sample["C"] <= 1)
def main(): print('Programming starting ...') args= arg_parser() torch.manual_seed(args.seed) train_loader,test_loader = mnist_loader(root=args.data,train_batch_size=args.batch_size, valid_batch_size=args.batch_size, train_shuffle=True, valid_shuffle=False) args.use_cuda = args.use_cuda and torch.cuda.is_available() model =Net(num_classes=10) checkpointer=CheckPoints(model,'./data/checkpoint') checkpointer.load_checkpoint_from_filename('model-best.chkpt') if args.use_cuda: model.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=0.1) trainer = Trainer(model,optimizer,train_loader,test_loader,args=args) #param_grid = {'lr':[0.01,0.001,0.0025,0.005,0.0075,0.0001,0.00001],'momentum':uniform(0.5,0.45),'weight_decay':[1e-3,1e-5,1e-7]} param_grid = {'lr':[1e-4,1e-5,1e-6],'momentum':uniform(0.,0.2),'weight_decay':[1e-5,1e-7,1e-8]} n_iters=500 param_list = list(ParameterSampler(param_grid, n_iter=n_iters)) train_loss_epochs=np.zeros((n_iters,)) train_acc_epochs=np.zeros((n_iters,)) test_loss_epochs=np.zeros((n_iters,)) test_acc_epochs=np.zeros((n_iters,)) i=0 for parm in param_list: args.lr=parm['lr'] args.momentum=parm['momentum'] args.weight_decay=parm['weight_decay'] trainer = Trainer(model,optimizer,train_loader,test_loader,args=args) optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,weight_decay=args.weight_decay,momentum=args.momentum) #scheduler.step() train_loss, train_acc=trainer.train(i) train_loss_epochs[i]=train_loss train_acc_epochs[i]=train_acc trainer.print_msg('train',i,train_loss,train_acc) test_loss, test_acc=trainer.validate(i) test_loss_epochs[i]=test_loss test_acc_epochs[i]=test_acc trainer.print_msg('test',i,test_loss,test_acc) is_best=checkpointer.save_checkpoint(i,train_loss,train_acc,test_loss,test_acc,save_best=True) if not is_best: checkpointer.load_checkpoint_from_filename('model-best.chkpt') i+=1 data ={'train_loss':train_loss_epochs,'train_acc':train_acc_epochs,'test_loss':test_loss_epochs,'test_acc':test_acc_epochs} torch.save(data,'./data/Template/data.pt') print('Finished...')
def _compute_thresh(this_data, thresh_range, method='bayesian_optimization', cv=10, random_state=None): """ Compute the rejection threshold for one channel. Parameters ---------- this_data: array (n_epochs, n_times) Data for one channel. thresh_range : tuple The range (low, high) of thresholds over which to optimize. method : str 'bayesian_optimization' or 'random_search' cv : iterator Iterator for cross-validation. random_state : int seed, RandomState instance, or None (default) The seed of the pseudo random number generator to use. Returns ------- rs : instance of RandomizedSearchCV The RandomizedSearchCV object. Notes ----- For method='random_search', the random_state parameter gives deterministic results only for scipy versions >= 0.16. This is why we recommend using autoreject with scipy version 0.16 or greater. """ est = _ChannelAutoReject() if method == 'random_search': param_dist = dict(thresh=uniform(thresh_range[0], thresh_range[1])) rs = RandomizedSearchCV(est, param_distributions=param_dist, n_iter=20, cv=cv, random_state=random_state) rs.fit(this_data) elif method == 'bayesian_optimization': from skopt import gp_minimize from sklearn.cross_validation import cross_val_score def objective(thresh): est.set_params(thresh=thresh) return -np.mean(cross_val_score(est, this_data, cv=cv)) space = [(thresh_range[0], thresh_range[1])] rs = gp_minimize(objective, space, n_calls=50, random_state=random_state) return rs
def _xgboost_gridsearch_model( task, numeric_features, categoric_features, learning_rate, use_dask, n_iter, scoring, ): param_space = { 'clf__max_depth': randint(2, 11), 'clf__min_child_weight': randint(1, 11), 'clf__subsample': uniform(0.5, 0.5), 'clf__colsample_bytree': uniform(0.5, 0.5), 'clf__colsample_bylevel': uniform(0.5, 0.5), 'clf__gamma': uniform(0, 1), 'clf__reg_alpha': uniform(0, 1), 'clf__reg_lambda': uniform(0, 10), 'clf__base_score': uniform(0.1, 0.9), 'clf__scale_pos_weight': uniform(0.1, 9.9), } model = (xgbsk.XGBClassifier(learning_rate=learning_rate) if task == 'classification' else xgbsk.XGBRegressor( learning_rate=learning_rate)) pipe = Pipeline([ ( 'preprocessing', simple_proc_for_tree_algoritms(numeric_features, categoric_features), ), ('clf', model), ]) if use_dask: from dask_ml.model_selection import RandomizedSearchCV return RandomizedSearchCV(pipe, param_space, n_iter=n_iter, scoring=scoring, cv=5) else: from sklearn.model_selection import RandomizedSearchCV return RandomizedSearchCV(pipe, param_space, n_iter=n_iter, scoring=scoring, cv=5)
def _make_distribution(self) -> rv_generic: """Build a distribution to randomly sample points within the space Returns ------- rv_generic `uniform` distribution between 0 and 1 if :attr:`transform_` == "normalize". Else, a `randint` distribution between :attr:`low` and (:attr:`high` + 1)""" if self.transform_ == "normalize": return uniform(0, 1) else: return randint(self.low, self.high + 1)
def _mlp_gridsearch_model( task, numeric_features, categoric_features, learning_rate, use_dask, n_iter, scoring, ): param_space = { 'clf__hidden_layer_sizes': [ (24, ), (12, 12), (6, 6, 6, 6), (4, 4, 4, 4, 4, 4), (12, 6, 3, 3), ], 'clf__activation': ['relu', 'logistic', 'tanh'], 'clf__batch_size': [16, 32, 64, 128, 256, 512], 'clf__alpha': uniform(0.0001, 0.9), 'clf__learning_rate': ['constant', 'adaptive'], } model = (MLPClassifier(learning_rate_init=learning_rate) if task == 'classification' else MLPRegressor( learning_rate_init=learning_rate)) pipe = Pipeline([ ( 'preprocessing', simple_proc_for_linear_algoritms(numeric_features, categoric_features), ), ('clf', model), ]) if use_dask: from dask_ml.model_selection import RandomizedSearchCV return RandomizedSearchCV(pipe, param_space, n_iter=n_iter, scoring=scoring, cv=5) else: from sklearn.model_selection import RandomizedSearchCV return RandomizedSearchCV(pipe, param_space, n_iter=n_iter, scoring=scoring, cv=5)
def _lgbm_hyperband_model(task, numeric_features, categoric_features, learning_rate=0.08): param_space = { 'num_leaves': randint(3, 99), 'max_depth': randint(2, 11), 'subsample': uniform(0.5, 0.5), 'colsample_bytree': uniform(0.5, 0.5), 'reg_alpha': uniform(0, 1), 'reg_lambda': uniform(0, 10), 'max_bin': randint(100, 400), 'min_child_weight': randint(1, 10), 'min_child_samples': randint(1, 11) } model = ContinuableLGBMClassifier(learning_rate=learning_rate) \ if task == 'classification' else ContinuableLGBMRegressor(learning_rate=learning_rate) return make_pipeline( simple_proc_for_tree_algoritms(numeric_features, categoric_features), Hyperband(model, feat_space=param_space, task=task))
def lexrank_parameter_space(n_trials): rng = np.random.RandomState(42) return ParameterSampler(dict( tf_method=["binary", "raw", "freq", "log_norm", "double_norm"], idf_method=["smooth", "probabilistic"], drop_stopwords=[True, False], drop_suffix=[True, False], drop_punct=[True, False], lowercase=[True, False], threshold=uniform(0, 1)), n_iter=n_trials, random_state=rng)
def __init__(self, low, high, prior="uniform"): """Search space dimension that can take on any real value. Parameters ---------- * `low` [float]: Lower bound (inclusive). * `high` [float]: Upper bound (exclusive). * `prior` ["uniform" or "log-uniform", default="uniform"]: Distribution to use when sampling random points for this dimension. - If `"uniform"`, points are sampled uniformly between the lower and upper bounds. - If `"log-uniform"`, points are sampled uniformly between `log10(lower)` and `log10(upper)`.` """ self._low = low self._high = high self.prior = prior if prior == "uniform": self._rvs = uniform(self._low, self._high - self._low) self.transformer = _Identity() elif prior == "log-uniform": self._rvs = uniform( np.log10(self._low), np.log10(self._high) - np.log10(self._low)) self.transformer = _Log10() else: raise ValueError( "Prior should be either 'uniform' or 'log-uniform', " "got '%s'." % self._rvs)
def add_set_value_LHS_uniform(self, variable, loc, scale ) : """ Add a 'Set Value' macro command where the values are uniformly chosen from the interval [loc - loc + scale] using a Latin Hyper Cube sampler. Parameters ---------- variable: string An AnyScript variable or a list of AnyScript variables. loc: int,float, numpy.ndarray The start of the interval for uniform sampling. scale: The range of the sample interval Examples -------- Set variable across different macros >>> seed(1) >>> mg = LatinHyperCubeMacroGenerator(number_of_macros=8) >>> mg.add_set_value_LHS_uniform('Main.myvar1',1,2) >>> mg.add_set_value_LHS_uniform('Main.myvar2',10,10) >>> pprint( mg.generate_macros() ) [['classoperation Main.myvar1 "Set Value" --value="2"', 'classoperation Main.myvar2 "Set Value" --value="15"'], ['classoperation Main.myvar1 "Set Value" --value="2.09919186856"', 'classoperation Main.myvar2 "Set Value" --value="12.6154232435"'], ['classoperation Main.myvar1 "Set Value" --value="1.79656505284"', 'classoperation Main.myvar2 "Set Value" --value="15.6735209175"'], ['classoperation Main.myvar1 "Set Value" --value="2.3547986286"', 'classoperation Main.myvar2 "Set Value" --value="14.1819509088"'], ['classoperation Main.myvar1 "Set Value" --value="1.5366889727"', 'classoperation Main.myvar2 "Set Value" --value="10.9004056168"'], ['classoperation Main.myvar1 "Set Value" --value="1.10425550118"', 'classoperation Main.myvar2 "Set Value" --value="18.5976467955"'], ['classoperation Main.myvar1 "Set Value" --value="2.55111306243"', 'classoperation Main.myvar2 "Set Value" --value="19.5880843877"'], ['classoperation Main.myvar1 "Set Value" --value="1.2500285937"', 'classoperation Main.myvar2 "Set Value" --value="17.1065243755"']] """ if isinstance(loc,list): loc = np.array(loc) if isinstance(scale,list): scale = np.array(scale) dist = distributions.uniform(loc,scale) self.add_set_value_LHS(variable,dist)
def add_set_value_LHS_uniform(self, variable, loc, scale): """ Add a 'Set Value' macro command where the values are uniformly chosen from the interval [loc - loc + scale] using a Latin Hyper Cube sampler. Parameters ---------- variable: string An AnyScript variable or a list of AnyScript variables. loc: int,float, numpy.ndarray The start of the interval for uniform sampling. scale: The range of the sample interval Examples -------- Set variable across different macros >>> seed(1) >>> mg = LatinHyperCubeMacroGenerator(number_of_macros=8) >>> mg.add_set_value_LHS_uniform('Main.myvar1',1,2) >>> mg.add_set_value_LHS_uniform('Main.myvar2',10,10) >>> pprint( mg.generate_macros() ) [['classoperation Main.myvar1 "Set Value" --value="2"', 'classoperation Main.myvar2 "Set Value" --value="15"'], ['classoperation Main.myvar1 "Set Value" --value="2.09919186856"', 'classoperation Main.myvar2 "Set Value" --value="12.6154232435"'], ['classoperation Main.myvar1 "Set Value" --value="1.79656505284"', 'classoperation Main.myvar2 "Set Value" --value="15.6735209175"'], ['classoperation Main.myvar1 "Set Value" --value="2.3547986286"', 'classoperation Main.myvar2 "Set Value" --value="14.1819509088"'], ['classoperation Main.myvar1 "Set Value" --value="1.5366889727"', 'classoperation Main.myvar2 "Set Value" --value="10.9004056168"'], ['classoperation Main.myvar1 "Set Value" --value="1.10425550118"', 'classoperation Main.myvar2 "Set Value" --value="18.5976467955"'], ['classoperation Main.myvar1 "Set Value" --value="2.55111306243"', 'classoperation Main.myvar2 "Set Value" --value="19.5880843877"'], ['classoperation Main.myvar1 "Set Value" --value="1.2500285937"', 'classoperation Main.myvar2 "Set Value" --value="17.1065243755"']] """ if isinstance(loc, list): loc = np.array(loc) if isinstance(scale, list): scale = np.array(scale) dist = distributions.uniform(loc, scale) self.add_set_value_LHS(variable, dist)
def add_set_value_LHS_uniform(self, variable, means, scale ) : """ Add a 'Set Value' macro command where the values are uniformly chosen using Latin Hyper Cube Sampling. Parameters ---------- variable: string An AnyScript variable or a list of AnyScript variables. means: int,float, numpy.ndarray The mean value of the sampled space scale: The range of the variable from means-scale/2 to means+scale/2] Examples: --------- Set variable across different macros >>> seed(1) >>> mg = LatinHyperCubeMacroGenerator(number_of_macros=8) >>> mg.add_set_value_LHS_uniform('Main.myvar1',1,2) >>> mg.add_set_value_LHS_uniform('Main.myvar2',10,10) >>> pprint( mg.generate_macros() ) [['classoperation Main.myvar1 "Set Value" --value="1"', 'classoperation Main.myvar2 "Set Value" --value="10"'], ['classoperation Main.myvar1 "Set Value" --value="{1.09919186856}"', 'classoperation Main.myvar2 "Set Value" --value="{7.61542324346}"'], ['classoperation Main.myvar1 "Set Value" --value="{0.796565052844}"', 'classoperation Main.myvar2 "Set Value" --value="{10.6735209175}"'], ['classoperation Main.myvar1 "Set Value" --value="{1.3547986286}"', 'classoperation Main.myvar2 "Set Value" --value="{9.1819509088}"'], ['classoperation Main.myvar1 "Set Value" --value="{0.536688972704}"', 'classoperation Main.myvar2 "Set Value" --value="{5.9004056168}"'], ['classoperation Main.myvar1 "Set Value" --value="{0.104255501176}"', 'classoperation Main.myvar2 "Set Value" --value="{13.5976467955}"'], ['classoperation Main.myvar1 "Set Value" --value="{1.55111306243}"', 'classoperation Main.myvar2 "Set Value" --value="{14.5880843877}"'], ['classoperation Main.myvar1 "Set Value" --value="{0.250028593704}"', 'classoperation Main.myvar2 "Set Value" --value="{12.1065243755}"']] """ if isinstance(means,list): means = np.array(means) if isinstance(scale,list): scale = np.array(scale) dist = distributions.uniform(means-scale/2.0,scale) self.add_set_value_LHS(variable,dist)
def simulate_random_ics(self): """ Randomize initial concentration parameters using latin hypercube sampling and run a time course """ if self.from_pickle and os.path.isfile(self.pickle_file): return pd.read_pickle(self.pickle_file) ics = [ i.replace('[', '').replace(']', '') for i in self.rr.getFloatingSpeciesConcentrationIds() ] original_ics = dict( zip(ics, self.rr.getFloatingSpeciesConcentrations())) sample = lhs(n=len(original_ics), samples=self.n, iterations=1, criterion=None) sample = uniform(self.lower_bound, self.upper_bound).ppf(sample) print('Simulating time series data') simulations = {} for i in range(sample.shape[0]): print('Percent Complete: {}%'.format( round(i / sample.shape[0] * 100, 2))) self.rr.reset() for j in range(sample.shape[1]): setattr(self.rr, ics[j], sample[i, j]) data = self.rr.simulate(0, self.end_time, self.num_simulation_points) df = pd.DataFrame(data) df.columns = [ i.replace('[', '').replace(']', '') for i in data.colnames ] simulations[i] = df.set_index('time') df = pd.concat(simulations) df.to_pickle(self.pickle_file) if self.subtract_ic_normalisation: df = self.normalise(df) return df
def generateLHS(hyperParamRange, nSamples): indexhash = {} # for all hyperparameter lowBounds = [] highBounds = [] index = 0 interestedHPlen = 0 for key, eachHpRng in hyperParamRange.iteritems(): if isinstance(eachHpRng, Sequence) and eachHpRng[1] > eachHpRng[0]: lowBounds.append(eachHpRng[0]) highBounds.append(eachHpRng[1]) indexhash[key] = index interestedHPlen += 1 index += 1 design = lhs(interestedHPlen, samples=nSamples) for i in xrange(interestedHPlen): design[:, i] = uniform(loc=lowBounds[i], scale=highBounds[i] - lowBounds[i]).ppf( design[:, i]) design = np.array(design) for key, eachHpRng in hyperParamRange.iteritems(): if not isinstance(eachHpRng, Sequence): design = np.concatenate( (design, np.full((nSamples, 1), eachHpRng)), axis=1) indexhash[key] = index index += 1 elif eachHpRng[1] <= eachHpRng[0]: design = np.concatenate( (design, np.full((nSamples, 1), eachHpRng[0])), axis=1) indexhash[key] = index index += 1 samples = Samples([]) for point in design: hyperParam = HyperParameter({ k: point[indexhash[k]] for k in hyperParamRange.get_param_names() }) samples.append(Sample(hyperParam, 0.0, 0.0, 0.0)) return samples
def __init__(self, low, high, transform=None, name=None): """Search space dimension that can take on integer values. Parameters ---------- * `low` [int]: Lower bound (inclusive). * `high` [int]: Upper bound (inclusive). * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. * `name` [str or None]: Name associated with dimension, e.g., "number of trees". """ if high <= low: raise ValueError("the lower bound {} has to be less than the" " upper bound {}".format(low, high)) self.low = low self.high = high self.name = name if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if transform == "normalize": self._rvs = uniform(0, 1) self.transformer = Normalize(low, high, is_int=True) else: self._rvs = randint(self.low, self.high + 1) self.transformer = Identity()
def _xgboost_hyperband_model(task, numeric_features, categoric_features, learning_rate): param_space = { 'max_depth': randint(2, 11), 'min_child_weight': randint(1, 11), 'subsample': uniform(0.5, 0.5), 'colsample_bytree': uniform(0.5, 0.5), 'colsample_bylevel': uniform(0.5, 0.5), 'gamma': uniform(0, 1), 'reg_alpha': uniform(0, 1), 'reg_lambda': uniform(0, 10), 'base_score': uniform(0.1, 0.9), 'scale_pos_weight': uniform(0.1, 9.9) } model = ContinuableXGBClassifier(learning_rate=learning_rate) \ if task == 'classification' else ContinuableXGBRegressor(learning_rate=learning_rate) return make_pipeline( simple_proc_for_tree_algoritms(numeric_features, categoric_features), Hyperband(model, feat_space=param_space, task=task))
def _compute_thresh(this_data, thresh_range, cv=10): """ Compute the rejection threshold for one channel. Parameters ---------- this_data: array (n_epochs, n_times) Data for one channel. cv : iterator Iterator for cross-validation. """ est = _ChannelAutoReject() param_dist = dict(thresh=uniform(thresh_range[0], thresh_range[1])) rs = RandomizedSearchCV(est, # XXX : is random really better than grid? param_distributions=param_dist, n_iter=20, cv=cv) rs.fit(this_data) best_thresh = rs.best_estimator_.thresh return best_thresh
def init_distributions(pkey, kind='dpm', mu = None, sigma = None, nrvs=25, tb=.65): """ sample random parameter sets to explore global minima (called by Optimizer method __hop_around__()) """ if mu is None: mu = {'a': .15, 'tr': .02, 'v': 1., 'ssv': -1., 'z': .1, 'xb': 1., 'sso': .15, 'vi': .35, 'vd': .5} if sigma is None: sigma = {'a': .35, 'tr': .25, 'v': .5, 'ssv': .5, 'z': .05, 'xb': .5, 'sso': .01, 'vi': .4, 'vd': .5} normal_params = ['tr', 'v', 'vd', 'ssv', 'z', 'xb', 'sso'] gamma_params = ['a', 'tr'] uniform_params = ['vd', 'vi'] if 'race' in kind: sigma['ssv'] = abs(mu['ssv']) bounds = get_bounds(kind=kind)[pkey] loc = mu[pkey] scale = sigma[pkey] # init and freeze dist shape if pkey in normal_params: dist = norm(loc, scale) elif pkey in gamma_params: dist = gamma(1.0, loc, scale) elif pkey in uniform_params: dist = uniform(loc, scale) # generate random variates rvinits = dist.rvs(nrvs) while rvinits.min() < bounds[0]: # apply lower limit ix = rvinits.argmin() rvinits[ix] = dist.rvs() while rvinits.max() > bounds[1]: # apply upper limit ix = rvinits.argmax() rvinits[ix] = dist.rvs() if pkey =='tr': rvinits = np.abs(rvinits) return rvinits
def __init__(self, low, high, transform=None): """Search space dimension that can take on integer values. Parameters ---------- * `low` [int]: Lower bound (inclusive). * `high` [int]: Upper bound (inclusive). * `transform` ["identity", "normalize", optional]: The following transformations are supported. - "identity", (default) the transformed space is the same as the original space. - "normalize", the transformed space is scaled to be between 0 and 1. """ self.low = low self.high = high if transform is None: transform = "identity" self.transform_ = transform if transform not in ["normalize", "identity"]: raise ValueError("transform should be 'normalize' or 'identity'" " got {}".format(self.transform_)) if transform == "normalize": self._rvs = uniform(0, 1) self.transformer = Normalize(low, high, is_int=True) else: self._rvs = randint(self.low, self.high + 1) self.transformer = Identity()
def __init__(self,min=0,max=1): self.min=min self.max=max self.default=(min+max)/2.0 self.D=D.uniform(min,max-min)
def uniform(min=0,max=1): return D.uniform(min,max-min)
def _compute_thresh(this_data, method='bayesian_optimization', cv=10, random_state=None): """Compute the rejection threshold for one channel. Parameters ---------- this_data: array (n_epochs, n_times) Data for one channel. method : str 'bayesian_optimization' or 'random_search' cv : iterator Iterator for cross-validation. random_state : int seed, RandomState instance, or None (default) The seed of the pseudo random number generator to use. Returns ------- best_thresh : float The best threshold. Notes ----- For method='random_search', the random_state parameter gives deterministic results only for scipy versions >= 0.16. This is why we recommend using autoreject with scipy version 0.16 or greater. """ est = _ChannelAutoReject() all_threshes = np.sort(np.ptp(this_data, axis=1)) if method == 'random_search': param_dist = dict(thresh=uniform(all_threshes[0], all_threshes[-1])) rs = RandomizedSearchCV(est, param_distributions=param_dist, n_iter=20, cv=cv, random_state=random_state) rs.fit(this_data) best_thresh = rs.best_estimator_.thresh elif method == 'bayesian_optimization': from sklearn.cross_validation import cross_val_score cache = dict() def func(thresh): idx = np.where(thresh - all_threshes >= 0)[0][-1] thresh = all_threshes[idx] if thresh not in cache: est.set_params(thresh=thresh) obj = -np.mean(cross_val_score(est, this_data, cv=cv)) cache.update({thresh: obj}) return cache[thresh] n_epochs = all_threshes.shape[0] idx = np.concatenate(( np.linspace(0, n_epochs, 40, endpoint=False, dtype=int), [n_epochs - 1])) # ensure last point is in init idx = np.unique(idx) # linspace may be non-unique if n_epochs < 40 initial_x = all_threshes[idx] best_thresh, _ = bayes_opt(func, initial_x, all_threshes, expected_improvement, max_iter=10, debug=False, random_state=random_state) return best_thresh
from scipy.stats.distributions import norm from scipy.stats.distributions import uniform from sklearn.metrics.pairwise import euclidean_distances from matplotlib.pylab import plt import numpy as np from numpy.random import choice from uuid import uuid4 from collections import defaultdict y_pos_dist = norm(300, 10) cluster_x_dists = { 'A': uniform(0, 50), 'B': uniform(30, 50), 'C': uniform(60, 50) } cluster_sizes = { 'A': 8, 'B': 10, 'C': 8 } cluster_colors = { 'A': 'r', 'B': 'b', 'C': 'g' }
def uniform(lower=0.0, upper=1.0): return dists.uniform(loc=lower, scale=upper-lower)
n_k = 5 # number of random variables D = 26 * 1.0e-6 # m A = ( D / 2.0 ) ** 2 * pi # set the mean and standard deviation of the two random variables la_mean, la_stdev = 0.0, 0.2 xi_mean, xi_stdev = 0.019027, 0.0022891 E_mean, E_stdev = 70.0e+9, 15.0e+9 th_mean, th_stdev = 0.0, 0.01 A_mean, A_stdev = A * 0.3, 0.7 * A print A_mean, A_mean + A_stdev # construct the normal distributions and get the methods # for the evaluation of the probability density functions g_la = uniform( loc = la_mean, scale = la_stdev ) g_xi = norm( loc = xi_mean, scale = xi_stdev ) g_E = uniform( loc = E_mean, scale = E_stdev ) g_th = uniform( loc = th_mean, scale = th_stdev ) g_A = uniform( loc = A_mean, scale = A_stdev ) # generate the grids for integration covering major part of the random domains Theta_la = linspace( la_mean + 0.5 * la_stdev / n_int, la_mean + la_stdev - 0.5 * la_stdev / n_int, n_int ) delta = ( xi_mean + ( 4 * xi_stdev ) - xi_mean + ( 4 * xi_stdev ) ) / n_int Theta_xi = linspace( xi_mean - ( 4 * xi_stdev ) + 0.5 * delta, xi_mean + ( 4 * xi_stdev ) - 0.5 * delta, n_int ) Theta_E = linspace( E_mean + 0.5 * E_stdev / n_int, E_mean + E_stdev - 0.5 * E_stdev / n_int, n_int ) Theta_th = linspace( th_mean + 0.5 * th_stdev / n_int, th_mean + th_stdev - 0.5 * th_stdev / n_int, n_int ) Theta_A = linspace( A_mean + 0.5 * A_stdev / n_int, A_mean + A_stdev - 0.5 * A_stdev / n_int, n_int ) # LHS generate the grids for integration covering major part of the random domains T_la = g_la.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) ) T_xi = g_xi.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) )
from time import clock from scipy.interpolate import interp1d from scipy.weave import inline, converters n_int = 10 # number of discretization points n_k = 2 # number of random variables # set the mean and standard deviation of the two random variables la_mean, la_stdev = 0.0, 0.2 xi_mean, xi_stdev = 0.019027, 0.0022891 # construct the normal distributions and get the methods # for the evaluation of the probability density functions g_la = uniform( loc = la_mean, scale = la_stdev ) g_xi = weibull_min( 10., scale = 0.02 ) # generate the grids for integration covering major part of the random domains Theta_la = linspace( la_mean + 0.5 * la_stdev / n_int, la_mean + la_stdev - 0.5 * la_stdev / n_int, n_int ) delta_xi = ( xi_mean + ( 4 * xi_stdev ) - xi_mean + ( 4 * xi_stdev ) ) / n_int Theta_xi = linspace( xi_mean - ( 4 * xi_stdev ) + 0.5 * delta_xi, xi_mean + ( 4 * xi_stdev ) - 0.5 * delta_xi, n_int ) # LHS generate the grids for integration covering major part of the random domains T_la = g_la.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) ) T_xi = g_xi.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ) ) # MC generation T_la_MC = g_la.rvs( n_int ** n_k ) T_xi_MC = g_xi.rvs( n_int ** n_k ) #T_la_MC = array( zip( *sorted( zip( random( n_int ** n_k ), g_la.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ** n_k ) ) ) ) )[1] ) #T_xi_MC = array( zip( *sorted( zip( random( n_int ** n_k ), g_xi.ppf( linspace( 0.5 / n_int, 1. - 0.5 / n_int, n_int ** n_k ) ) ) ) )[1] ) print diff( sort( g_la.cdf( g_la.rvs( 10 ) ) ) )
def _uniform_inclusive(loc=0.0, scale=1.0): # like scipy.stats.distributions but inclusive of `high` # XXX scale + 1. might not actually be a float after scale if # XXX scale is very large. return uniform(loc=loc, scale=np.nextafter(scale, scale + 1.))
epochs_gt = clean_by_interp(epochs) picks = mne.pick_types(epochs.info, meg='grad', eeg=False, stim=False, eog=False, include=include, exclude='bads') X = epochs.get_data() X_gt = epochs_gt.get_data() X = np.concatenate((X, X_gt), axis=0) np.random.seed(42) cv = KFold(X.shape[0], 10, random_state=42) low, high = 4e-13, 900e-13 best_threshes = np.zeros((len(picks), )) for idx, pick in enumerate(picks): est = ChannelAutoReject() param_dist = dict(thresh=uniform(low, high)) rs = RandomizedSearchCV(est, param_distributions=param_dist, n_iter=20, cv=cv) rs.fit(X[:, pick]) best_thresh = rs.best_estimator_.thresh best_threshes[idx] = best_thresh unit = r'fT/cm' scaling = 1e13 plt.figure(figsize=(6, 5)) plt.tick_params(axis='x', which='both', bottom='off', top='off') plt.tick_params(axis='y', which='both', left='off', right='off') counts, bins, _ = plt.hist(scaling * best_threshes, 30, color='g', alpha=0.4)
def RMSE(predicted, expected): return np.linalg.norm(predicted - expected) / np.sqrt(len(predicted)) distributions = [ norm(), t(df=5), gamma(a=2), gamma(a=4), gamma(a=8), expon(scale=1/0.5), expon(scale=1/1), expon(scale=1/2), rayleigh(), uniform(), ] errors = [] for distribution in distributions: parameters = [k + '=' + str(v) for k, v in distribution.kwds.items()] name = "{name}({parameters})".format( name=distribution.dist.name, parameters=', '.join(parameters) ) l, lm, lt, m, ut, um, u = distribution.ppf([0.05, 0.2625, 0.342, 0.5, 0.658, 0.7375, 0.95]) candidates = [ between(l, u), between(l, m, u), between(l, lt, ut, u),
def sample_pspace(model, param_list=None, bounds=None, samples=100, seed=None): """ A DataFrame where each row represents a location in the parameter space, locations distributed to exercise the full range of values that each parameter can take on. This is useful for quick and dirty application of tests to a bunch of locations in the sample space. Kind-of a fuzz-testing for the model. Uses latin hypercube sampling, with random values within the sample bins. The LHS sampler shuffles the bins each time, so a subsequent call will yield a different sample from the parameter space. When a variable has both upper and lower bounds, use a uniform sample between those bounds. When a variable has only one bound, use an exponential distribution with the scale set to be the difference between the bound and the current model value (1 if they are the same) When the variable has neither bound, use a normal distribution centered on the current model value, with scale equal to the absolute value of the model value (1 if that magnitude is 0) Parameters ---------- model: pysd.Model object param_list: None or list of strings The real names of parameters to include in the explored parameter space. If None, uses all of the constants in the model except TIME STEP, INITIAL TIME, etc. bounds: DataFrame, string filename, or None A range test matrix as used for bounds checking. If None, creates one from the model These bounds can also place artificial limits on the parameter space you want to explore, even if the theoretical bounds on the variable are infinite. samples: int How many samples to include in the iterator? Returns ------- lhs : pandas DataFrame distribution-weighted latin hypercube samples Note ---- Executes the model by 1 time-step to get the current value of parameters. """ if param_list is None: doc = model.doc() param_list = sorted(list(set(doc[doc['Type'] == 'constant']['Real Name']) - {'FINAL TIME', 'INITIAL TIME', 'TIME STEP', 'TIME STEP'})) if isinstance(bounds, _pd.DataFrame): bounds = bounds.set_index('Real Name') elif bounds is None: bounds = create_bounds_test_matrix(model).set_index('Real Name') elif isinstance(bounds, str): if bounds.split('.')[-1] in ['xls', 'xlsx']: bounds = _pd.read_excel(bounds, sheetname='Bounds', index_col='Real Name') elif bounds.split('.')[-1] == 'csv': bounds = _pd.read_csv(bounds, index_col='Real Name', encoding='UTF-8') elif bounds.split('.')[-1] == 'tab': bounds = _pd.read_csv(bounds, sep='\t', index_col='Real Name', encoding='UTF-8') else: raise ValueError('Unknown file type: bounds') else: raise ValueError('Unknown type: bounds') if seed is not None: _np.random.seed(seed) unit_lhs = _pd.DataFrame(_pyDOE.lhs(n=len(param_list), samples=samples), columns=param_list) # raw latin hypercube sample res = model.run(return_timestamps=[model.components.initial_time()]) lhs = _pd.DataFrame(index=unit_lhs.index) for param in param_list: lower, upper = bounds[['Min', 'Max']].loc[param] value = res[param].iloc[0] if lower == upper: lhs[param] = lower elif _np.isfinite(lower) and _np.isfinite(upper): # np.isfinite(0)==True scale = upper - lower lhs[param] = _dist.uniform(lower, scale).ppf(unit_lhs[param]) elif _np.isfinite(lower) and _np.isinf(upper): if lower == value: scale = 1 else: scale = value - lower lhs[param] = _dist.expon(lower, scale).ppf(unit_lhs[param]) elif _np.isinf(lower) and _np.isfinite(upper): # np.isinf(-np.inf)==True if upper == value: scale = 1 else: scale = upper - value lhs[param] = upper - _dist.expon(0, scale).ppf(unit_lhs[param]) elif _np.isinf(lower) and _np.isinf(upper): # np.isinf(-np.inf)==True if value == 0: scale = 1 else: scale = abs(value) lhs[param] = _dist.norm(value, scale).ppf(unit_lhs[param]) else: raise ValueError('Problem with lower: %s or upper: %s bounds' % (lower, upper)) return lhs
def design_lhs_exp(variables, maps, offsets=None, samples=int(1e4), project_linear=True): """ Design an LHS experiment """ design = lhs(len(variables), samples=samples, criterion="m", iterations=100) z_design = np.zeros_like(design) print "Computing LHS design..." if project_linear: print " using linear re-projection for log variables" else: print " using original variable coordinate" for i, v in enumerate(variables): dist, a, b = v[3] if project_linear: # Re-sample in linear space if v[0].startswith("ln"): ## 9/4/2014 ## This is an experimental correction to re-project the ## logarithmic variables into their normal coordinate ## system. It should only effect the sampling, and hopefully ## improve it by forcing it to even things out over the ## actually range we care about a = np.exp(a) b = np.exp(b) offsets[i] = np.exp(offsets[i]) elif v[0].startswith("log"): ## 10/26/2014 ## In accordance with above, but for log10 vars a = 10.0 ** a b = 10.0 ** b offsets[i] = 10.0 ** offsets[i] if offsets: ## These corrections with "offsets" re-center the interval ## so that the left endpoint is 0. I found that if arbitrary ## lower/upper limits were used, sometimes the PPF routines ## would really mess up in inverting the CDF. a, b = a - offsets[i], b - offsets[i] if dist == "uniform": design[:, i] = uniform(a, b).ppf(design[:, i]) elif dist == "normal": design[:, i] = norm(a, b).ppf(design[:, i]) elif dist == "loguniform": design[:, i] = loguni_ppf(design[:, i], a, b) else: raise ValueError("no dist defined for %s" % dist) if offsets: ## Project back in to the correct limits design[:, i] += offsets[i] a, b = a + offsets[i], b + offsets[i] if project_linear: if v[0].startswith("ln"): ## 9/4/2014 ## Second half of correction a = np.log(a) b = np.log(b) design[:, i] = np.log(design[:, i]) elif v[0].startswith("log"): ## 10/26/2014 a = np.log10(a) b = np.log10(b) design[:, i] = np.log10(design[:, i]) z_design[:, i] = maps[i](design[:, i], a, b) design = design.T # in x-coords z_design = z_design.T return design, z_design