def __init__(self, config_space, objective_func, R, num_iter=10, n_workers=1, eta=3, es_gap=9, rho=0.7, random_state=1, method_id="Default"): BaseFacade.__init__(self, objective_func, n_workers=n_workers, need_lc=True, method_name=method_id) self.seed = random_state self.config_space = config_space self.config_space.seed(self.seed) self.R = R self.num_iter = num_iter self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(R)) self.inner_iteration_n = (self.s_max + 1) * (self.s_max + 1) types, bounds = get_types(config_space) self.num_config = len(bounds) self.surrogate = RandomForestWithInstances(types=types, bounds=bounds) self.acquisition_func = EI(model=self.surrogate) # TODO: add SMAC's optimization algorithm. self.acq_optimizer = RandomSampling(self.acquisition_func, config_space, n_samples=max(500, 50 * self.num_config)) self.incumbent_configs = [] self.incumbent_obj = [] self.lcnet_model = LC_ES() self.early_stop_gap = es_gap self.es_rho = rho self.lc_training_x = None self.lc_training_y = None
def obtain_cv_prediction(self, X, y): types, bounds = get_types(self.config_space) base_model = RandomForestWithInstances(types=types, bounds=bounds) instance_num = len(y) output_pred = [] if instance_num < 10: for i in range(instance_num): row_indexs = list(range(instance_num)) del row_indexs[i] base_model.train(X[row_indexs], y[row_indexs]) mu, _ = base_model.predict(X) output_pred.append(mu[i, 0]) else: # Conduct 5-fold cross validation. K = 5 fold_num = instance_num // K for i in range(K): row_indexs = list(range(instance_num)) bound = (instance_num - i * fold_num) if i == (K - 1) else fold_num for index in range(bound): del row_indexs[i * fold_num] base_model.train(X[row_indexs, :], y[row_indexs]) mu, _ = base_model.predict(X) start = i * fold_num end = start + bound output_pred.extend(mu[start:end, 0].tolist()) assert len(output_pred) == instance_num return output_pred
def mini_smac(learn_delta): sample_num_m = s_mid sample_num_l = s_min if not learn_delta: sample_num_m = s_min start_time = time.time() config_space = create_configspace() types, bounds = get_types(config_space) num_hp = len(bounds) surrogate = RandomForestWithInstances(types=types, bounds=bounds) acquisition_func = EI(model=surrogate) acq_optimizer = RandomSampling(acquisition_func, config_space, n_samples=max(500, 50 * num_hp)) X = [] y = [] y_delta = [] c = [] inc_y = 1. # Initial design. for _ in range(num_init): init_configs = sample_configurations(config_space, num_init) for config in init_configs: perf_t, _ = objective_function((config.get_dictionary(), sample_num_m)) X.append(config) y.append(perf_t) if perf_t < inc_y: inc_y = perf_t c.append([time.time()-start_time, inc_y]) if learn_delta: perf_l, _ = objective_function((config.get_dictionary(), sample_num_l)) y_delta.append(perf_t - perf_l) else: y_delta.append(perf_t) # BO iterations. for _ in range(num_iter - num_init): # Update the surrogate model. surrogate.train(convert_configurations_to_array(X), np.array(y, dtype=np.float64)) # Use EI acq to choose next config. incumbent = dict() best_index = np.argmin(y) incumbent['obj'] = y[best_index] incumbent['config'] = X[best_index] acquisition_func.update(model=surrogate, eta=incumbent) next_config = acq_optimizer.maximize(batch_size=1)[0] perf_t, _ = objective_function((next_config.get_dictionary(), sample_num_m)) X.append(next_config) y.append(perf_t) if perf_t < inc_y: inc_y = perf_t c.append([time.time() - start_time, inc_y]) if learn_delta: perf_l, _ = objective_function((config.get_dictionary(), sample_num_l)) y_delta.append(perf_t - perf_l) else: y_delta.append(perf_t) return [convert_configurations_to_array(X), np.array(y_delta, dtype=np.float64)]
def __init__(self, config_space, objective_func, R, num_iter=10, eta=3, n_workers=1): BaseFacade.__init__(self, objective_func, n_workers=n_workers) self.config_space = config_space self.R = R self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter types, bounds = get_types(config_space) self.num_config = len(bounds) # Define the multi-fidelity ensemble surrogate. init_weight = [0.] init_weight.extend([1 / self.s_max] * self.s_max) self.weighted_surrogate = WeightedRandomForestCluster( types, bounds, self.s_max, self.eta, init_weight, 'gpoe') self.weighted_acquisition_func = EI(model=self.weighted_surrogate) self.weighted_acq_optimizer = RandomSampling( self.weighted_acquisition_func, config_space, n_samples=max(1000, 50 * self.num_config)) self.incumbent_configs = [] self.incumbent_obj = [] self.iterate_id = 0 self.iterate_r = [] # Store the multi-fidelity evaluation data: D_1, ..., D_K. self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = [] self.target_y[r] = []
def __init__(self, config_space: ConfigurationSpace, objective_func, R, num_iter=10000, eta=3, p=0.3, n_workers=1, random_state=1, method_id='Default'): BaseFacade.__init__(self, objective_func, n_workers=n_workers, method_name=method_id) self.config_space = config_space self.seed = random_state self.config_space.seed(self.seed) self.p = p self.R = R self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter types, bounds = get_types(config_space) self.num_config = len(bounds) self.surrogate = RandomForestWithInstances(types=types, bounds=bounds) self.acquisition_func = EI(model=self.surrogate) self.acq_optimizer = RandomSampling(self.acquisition_func, config_space, n_samples=max( 500, 50 * self.num_config)) self.incumbent_configs = [] self.incumbent_obj = []
def __init__(self, config_space, objective_func, R, num_iter=10000, eta=3, p=0.5, n_workers=1, random_state=1, method_id='Default'): BaseFacade.__init__(self, objective_func, n_workers=n_workers, need_lc=True, method_name=method_id) self.config_space = config_space self.seed = random_state self.config_space.seed(self.seed) self.p = p self.R = R self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter types, bounds = get_types(config_space) self.num_config = len(bounds) self.incumbent_configs = [] self.incumbent_obj = [] self.lcnet_model = LC_ES() self.lc_training_x = None self.lc_training_y = None
def __init__(self, config_space, objective_func, R, num_iter=10, eta=3, p=0.5, n_workers=1, info_type='Weighted', rho_delta=0.1, init_weight=None, update_enable=False, random_mode=True, enable_rho=True, scale_method=1, init_rho=0.8): BaseFacade.__init__(self, objective_func, n_workers=n_workers) self.config_space = config_space self.p = p self.R = R self.eta = eta self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter self.info_type = info_type self.update_enable = update_enable self.random_mode = random_mode self.enable_rho = enable_rho self.rho = init_rho self.rho_delta = rho_delta self.min_rho = 0.5 self.scale_method = scale_method self.weight_update_id = 0 if init_weight is None: init_weight = [1. / (self.s_max + 1)] * (self.s_max + 1) self.logger.info("initial confidence weight %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) self.num_config = len(bounds) self.surrogate = RandomForestWithInstances(types=types, bounds=bounds) self.acquisition_func = EI(model=self.surrogate) self.acq_optimizer = RandomSampling(self.acquisition_func, config_space, n_samples=max( 500, 50 * self.num_config)) if info_type == 'Weighted': self.weighted_surrogate = WeightedRandomForestCluster( types, bounds, self.s_max, self.eta, init_weight, 'lc') self.weighted_acquisition_func = EI(model=self.weighted_surrogate) self.weighted_acq_optimizer = RandomSampling( self.weighted_acquisition_func, config_space, n_samples=max(500, 50 * self.num_config)) self.incumbent_configs = [] self.incumbent_obj = [] self.init_tradeoff = 0.5 self.tradeoff_dec_rate = 0.8 self.iterate_id = 0 self.iterate_r = [] self.hist_weights = list() self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = [] self.target_y[r] = []
def tse(run_id, train_base_models=True): start_time = time.time() from concurrent.futures import ProcessPoolExecutor pool = ProcessPoolExecutor(max_workers=args.worker) X, y = [], [] c = [] inc = 1. X_l, y_l = [], [] weight = np.array([1/K]*(K+1)) config_evaluated = [] config_space = create_configspace() # Initialize config L. config_L = sample_configurations(config_space, num_L_init) if train_base_models: func_configs = list() for iter_t in range(K): print('Build mid fidelity model', iter_t) func_configs.append(True) func_configs.append(False) training_data = run_parallel_async(pool, mini_smac, func_configs) with open('data/xgb/base_tse_data_%d.pkl' % run_id, 'wb') as f: pickle.dump(training_data, f) else: with open('data/xgb/base_tse_data_%d.pkl' % 10, 'rb') as f: training_data = pickle.load(f) print('Load training data for M evaluations!') # Create base models. base_models = list() config_space = create_configspace() types, bounds = get_types(config_space) for iter_t in range(K+1): config_x, config_y = training_data[iter_t] model = RandomForestWithInstances(types=types, bounds=bounds) model.train(config_x, config_y) base_models.append(model) low_fidelity_model = base_models[K] X_l.extend(training_data[K][0].tolist()) y_l.extend(training_data[K][1].tolist()) print('Base model building finished!') # The framework of TSE. for iter_t in range(iter_H): print('Iteration in TSE', iter_t) # Sample a batch of configurations according to tse model. configs = sample_configurations(config_space, iter_L * 10) config_arrays = convert_configurations_to_array(configs) perfs, _ = low_fidelity_model.predict(config_arrays) perfs = perfs[:, 0] if len(y) > 3: preds = [] for i in range(K): m, _ = base_models[i].predict(config_arrays) preds.append(m[:, 0].tolist()) preds = np.array(preds).T preds = np.mat(np.hstack((preds, np.ones((len(configs), 1))))) # Add the delta. delta = preds*np.mat(weight.reshape(-1, 1)) perfs += delta.getA()[:, 0] configs_candidate = [] indexes = np.argsort(perfs)[:iter_L] for index in indexes: configs_candidate.append(configs[index]) # Evaluate the low-fidelity configurations. print('='*10 + 'Evaluating the low-fidelity configurations') config_params = [] for config in configs_candidate: config_params.append((config.get_dictionary(), s_min)) result_perf = run_parallel_async(pool, objective_function, config_params) for index, item in enumerate(result_perf): X_l.append(configs_candidate[index].get_array().tolist()) y_l.append(item[0]) print(np.array(X_l).shape, np.array(y_l, dtype=np.float64).shape) # Update f_L. print('=' * 10 + 'Retrain the f_L') low_fidelity_model.train(np.array(X_l), np.array(y_l, dtype=np.float64)) config_L.extend(configs_candidate) configs_input = [] for config in config_L: if config not in config_evaluated: configs_input.append(config) # Choose the next configuration. config_arrays = convert_configurations_to_array(configs_input) perfs, _ = low_fidelity_model.predict(config_arrays) perfs = perfs[:, 0] if len(y) > 3: preds = [] for i in range(K): m, _ = base_models[i].predict(config_arrays) preds.append(m[:, 0].tolist()) preds = np.array(preds).T preds = np.mat(np.hstack((preds, np.ones((len(configs_input), 1))))) # Add the delta. delta = preds * np.mat(weight.reshape(-1, 1)) perfs += delta.getA()[:, 0] next_config = configs_input[np.argmin(perfs)] # Evaluate this config with a high-fidelity setting. print('=' * 10 + 'Evaluate the high-fidelity configuration') perf, _ = objective_function((next_config.get_dictionary(), s_max)) X.append(next_config) y.append(perf) if perf < inc: inc = perf c.append([time.time()-start_time, inc]) print('Current inc', inc) if len(y) < 3: continue # Learn the weight in TSE. Z = [] for i in range(K): m, v = base_models[i].predict(convert_configurations_to_array(X)) Z.append(m[:, 0].tolist()) Z = np.mat(np.hstack((np.array(Z).T, np.ones((len(y), 1))))) f = np.mat(np.array(y).reshape((-1, 1))) # Compute the weight. try: ZtZ_inv = np.linalg.inv(Z.T * Z) weight = (ZtZ_inv * Z.T * f)[:, 0] print('The weight updated is', weight) except np.linalg.LinAlgError as err: if 'Singular matrix' in str(err): print('Singular matrix encountered, and do not update the weight!') else: raise ValueError('Unexpected error!') # Save the result. np.save('data/xgb/tse_%d.npy' % run_id, np.array(c)) plt.plot(np.array(c)[:, 0], np.array(c)[:, 1]) plt.xlabel('time_elapsed (s)') plt.ylabel('validation error') plt.savefig("data/xgb/tse_%d.png" % run_id) if time.time() - start_time > 21600: raise ValueError('Runtime budget meets!') pool.shutdown(wait=True)
if __name__ == "__main__": cs = ConfigurationSpace() learning_rate = UniformFloatHyperparameter("learning_rate", 1e-4, 5e-3, default_value=3e-4) cs.add_hyperparameter(learning_rate) n_layer1 = UniformIntegerHyperparameter("n_layer1", 5, 50, default_value=32) cs.add_hyperparameter(n_layer1) n_layer2 = UniformIntegerHyperparameter("n_layer2", 30, 80, default_value=64) cs.add_hyperparameter(n_layer2) batch_size = UniformIntegerHyperparameter("batch_size", 10, 500, default_value=200) cs.add_hyperparameter(batch_size) types, bounds = get_types(cs) reg = regression.binary_rss_forest() rf_opts = regression.forest_opts() rf_opts.num_trees = 10 rf_opts.do_bootstrapping = True model = RandomForestWithInstances(types=types, bounds=bounds) x = np.array([[0.78105907, 0.33860037, 0.72826097, 0.02941158], [0.81160897, 0.63147998, 0.72826097, 0.04901943], [0.27800406, 0.36616871, 0.16304333, 0.24509794], [0.41242362, 0.37351241, 0.11956505, 0.4607843], [0.70162934, 0.15819312, 0.51086957, 0.10784298], [0.53869654, 0.86662495, 0.27173903, 0.22549009], [0.53665988, 0.68576624, 0.81521753, 0.06862728], [0.72199594, 0.18900731, 0.75000011, 0.36274504]], dtype=np.float64) y = np.array([0.544481, 2.34456, 0.654629, 0.576376, 0.603501, 0.506214, 0.416664, 0.483639])
def update_weight(self): max_r = self.iterate_r[-1] incumbent_configs = self.target_x[max_r] test_x = convert_configurations_to_array(incumbent_configs) test_y = np.array(self.target_y[max_r], dtype=np.float64) r_list = self.weighted_surrogate.surrogate_r K = len(r_list) if len(test_y) >= 3: # Get previous weights if self.weight_method in [ 'rank_loss_softmax', 'rank_loss_single', 'rank_loss_p_norm' ]: preserving_order_p = list() preserving_order_nums = list() for i, r in enumerate(r_list): fold_num = 5 if i != K - 1: mean, var = self.weighted_surrogate.surrogate_container[ r].predict(test_x) tmp_y = np.reshape(mean, -1) preorder_num, pair_num = MFSE.calculate_preserving_order_num( tmp_y, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) else: if len(test_y) < 2 * fold_num: preserving_order_p.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[ train_idx], test_y[train_idx] valid_configs, valid_y = test_x[ valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) pred, _ = _surrogate.predict(valid_configs) cv_pred[valid_idx] = pred.reshape(-1) preorder_num, pair_num = MFSE.calculate_preserving_order_num( cv_pred, test_y) preserving_order_p.append(preorder_num / pair_num) preserving_order_nums.append(preorder_num) if self.weight_method == 'rank_loss_softmax': order_weight = np.array(np.sqrt(preserving_order_nums)) trans_order_weight = order_weight - np.max(order_weight) # Softmax mapping. new_weights = np.exp(trans_order_weight) / sum( np.exp(trans_order_weight)) elif self.weight_method == 'rank_loss_p_norm': trans_order_weight = np.array(preserving_order_p) power_sum = np.sum( np.power(trans_order_weight, self.power_num)) new_weights = np.power(trans_order_weight, self.power_num) / power_sum else: _idx = np.argmax(np.array(preserving_order_nums)) new_weights = [0.] * K new_weights[_idx] = 1. elif self.weight_method == 'rank_loss_prob': # For basic surrogate i=1:K-1. mean_list, var_list = list(), list() for i, r in enumerate(r_list[:-1]): mean, var = self.weighted_surrogate.surrogate_container[ r].predict(test_x) mean_list.append(np.reshape(mean, -1)) var_list.append(np.reshape(var, -1)) sample_num = 100 min_probability_array = [0] * K for _ in range(sample_num): order_preseving_nums = list() # For basic surrogate i=1:K-1. for idx in range(K - 1): sampled_y = np.random.normal(mean_list[idx], var_list[idx]) _num, _ = MFSE.calculate_preserving_order_num( sampled_y, test_y) order_preseving_nums.append(_num) fold_num = 5 # For basic surrogate i=K. cv if len(test_y) < 2 * fold_num: order_preseving_nums.append(0) else: # 5-fold cross validation. kfold = KFold(n_splits=fold_num) cv_pred = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[train_idx], test_y[ train_idx] valid_configs, valid_y = test_x[valid_idx], test_y[ valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) _pred, _var = _surrogate.predict(valid_configs) sampled_pred = np.random.normal( _pred.reshape(-1), _var.reshape(-1)) cv_pred[valid_idx] = sampled_pred _num, _ = MFSE.calculate_preserving_order_num( cv_pred, test_y) order_preseving_nums.append(_num) max_id = np.argmax(order_preseving_nums) min_probability_array[max_id] += 1 new_weights = np.array(min_probability_array) / sample_num elif self.weight_method == 'opt_based': mean_list, var_list = list(), list() for i, r in enumerate(r_list): if i != K - 1: mean, var = self.weighted_surrogate.surrogate_container[ r].predict(test_x) tmp_y = np.reshape(mean, -1) tmp_var = np.reshape(var, -1) mean_list.append(tmp_y) var_list.append(tmp_var) else: if len(test_y) < 8: mean_list.append(np.array([0] * len(test_y))) var_list.append(np.array([0] * len(test_y))) else: # 5-fold cross validation. kfold = KFold(n_splits=5) cv_pred = np.array([0] * len(test_y)) cv_var = np.array([0] * len(test_y)) for train_idx, valid_idx in kfold.split(test_x): train_configs, train_y = test_x[ train_idx], test_y[train_idx] valid_configs, valid_y = test_x[ valid_idx], test_y[valid_idx] types, bounds = get_types(self.config_space) _surrogate = RandomForestWithInstances( types=types, bounds=bounds) _surrogate.train(train_configs, train_y) pred, var = _surrogate.predict(valid_configs) cv_pred[valid_idx] = pred.reshape(-1) cv_var[valid_idx] = var.reshape(-1) mean_list.append(cv_pred) var_list.append(cv_var) means = np.array(mean_list) vars = np.array(var_list) + 1e-8 def min_func(x): x = np.reshape(np.array(x), (1, len(x))) ensemble_vars = 1 / (x @ (1 / vars)) ensemble_means = x @ (means / vars) * ensemble_vars ensemble_means = np.reshape(ensemble_means, -1) self.logger.info("Loss:" + str(x)) return MFSE.calculate_ranking_loss(ensemble_means, test_y) constraints = [{ 'type': 'eq', 'fun': lambda x: np.sum(x) - 1 }, { 'type': 'ineq', 'fun': lambda x: x - 0 }, { 'type': 'ineq', 'fun': lambda x: 1 - x }] res = minimize(min_func, np.array([1e-8] * K), constraints=constraints) new_weights = res.x else: raise ValueError('Invalid weight method: %s!' % self.weight_method) else: old_weights = list() for i, r in enumerate(r_list): _weight = self.weighted_surrogate.surrogate_weight[r] old_weights.append(_weight) new_weights = old_weights.copy() self.logger.info( '[%s] %d-th Updating weights: %s' % (self.weight_method, self.weight_changed_cnt, str(new_weights))) # Assign the weight to each basic surrogate. for i, r in enumerate(r_list): self.weighted_surrogate.surrogate_weight[r] = new_weights[i] self.weight_changed_cnt += 1 # Save the weight data. self.hist_weights.append(new_weights) np.save( 'data/%s_weights_%s.npy' % (self.method_name, self.method_name), np.asarray(self.hist_weights))
def __init__(self, config_space: ConfigurationSpace, objective_func, R, num_iter=10000, eta=3, n_workers=1, random_state=1, init_weight=None, update_enable=True, weight_method='rank_loss_p_norm', fusion_method='gpoe', power_num=2, method_id='Default'): BaseFacade.__init__(self, objective_func, n_workers=n_workers, method_name=method_id) self.config_space = config_space self.R = R self.eta = eta self.seed = random_state self.logeta = lambda x: log(x) / log(self.eta) self.s_max = int(self.logeta(self.R)) self.B = (self.s_max + 1) * self.R self.num_iter = num_iter self.update_enable = update_enable self.fusion_method = fusion_method # Parameter for weight method `rank_loss_p_norm`. self.power_num = power_num # Specify the weight learning method. self.weight_method = weight_method self.config_space.seed(self.seed) self.weight_update_id = 0 self.weight_changed_cnt = 0 if init_weight is None: init_weight = [0.] init_weight.extend([1. / self.s_max] * self.s_max) assert len(init_weight) == (self.s_max + 1) if self.weight_method == 'equal_weight': assert self.update_enable is False self.logger.info('Weight method & flag: %s-%s' % (self.weight_method, str(self.update_enable))) self.logger.info("Initial weight is: %s" % init_weight[:self.s_max + 1]) types, bounds = get_types(config_space) self.num_config = len(bounds) self.weighted_surrogate = WeightedRandomForestCluster( types, bounds, self.s_max, self.eta, init_weight, self.fusion_method) self.acquisition_function = EI(model=self.weighted_surrogate) self.incumbent_configs = [] self.incumbent_perfs = [] self.iterate_id = 0 self.iterate_r = [] self.hist_weights = list() # Saving evaluation statistics in Hyperband. self.target_x = dict() self.target_y = dict() for index, item in enumerate( np.logspace(0, self.s_max, self.s_max + 1, base=self.eta)): r = int(item) self.iterate_r.append(r) self.target_x[r] = [] self.target_y[r] = [] # BO optimizer settings. self.configs = list() self.history_container = HistoryContainer('mfse-container') self.sls_max_steps = None self.n_sls_iterations = 5 self.sls_n_steps_plateau_walk = 10 rng = np.random.RandomState(seed=random_state) self.acq_optimizer = InterleavedLocalAndRandomSearch( acquisition_function=self.acquisition_function, config_space=self.config_space, rng=rng, max_steps=self.sls_max_steps, n_steps_plateau_walk=self.sls_n_steps_plateau_walk, n_sls_iterations=self.n_sls_iterations) self._random_search = RandomSearch(self.acquisition_function, self.config_space, rng=rng) self.random_configuration_chooser = ChooserProb(prob=0.2, rng=rng)