def _RunOptimizationImpl(self): # Look for the minimum max_delta that results in a successful comparison # for each possible edge_threshold/max_diff combination. for edge_threshold in xrange(self._args.max_edge_threshold, self._args.min_edge_threshold, -1 * self._args.edge_threshold_step): should_continue = True for max_diff in xrange(self._args.min_max_diff, self._args.max_max_diff, self._args.max_diff_step): for max_delta in xrange(self._args.min_delta_threshold, self._args.max_delta_threshold, self._args.delta_threshold_step): parameters = parameter_set.ParameterSet( max_diff, max_delta, edge_threshold) success, _, _ = self._RunComparisonForParameters( parameters) if success: print 'Found good parameters %s' % parameters should_continue = False break logging.info('Found bad parameters %s', parameters) # Increasing the max_diff for a given edge_threshold once we've found # a good max_delta won't give us any new information, so go on to the # next edge_threshold. if not should_continue: break
def _AdjacentParameters(self, starting_parameters): max_diff = starting_parameters.max_diff delta_threshold = starting_parameters.delta_threshold edge_threshold = starting_parameters.edge_threshold max_diff_step = self._args.max_diff_step delta_threshold_step = self._args.delta_threshold_step edge_threshold_step = self._args.edge_threshold_step max_diffs = [ max(self._args.min_max_diff, max_diff - max_diff_step), max_diff, min(self._args.max_max_diff, max_diff + max_diff_step) ] delta_thresholds = [ max(self._args.min_delta_threshold, delta_threshold - delta_threshold_step), delta_threshold, min(self._args.max_delta_threshold, delta_threshold + delta_threshold_step) ] edge_thresholds = [ max(self._args.min_edge_threshold, edge_threshold - edge_threshold_step), edge_threshold, min(self._args.max_edge_threshold, edge_threshold + edge_threshold_step) ] for combo in itertools.product(max_diffs, delta_thresholds, edge_thresholds): adjacent = parameter_set.ParameterSet(combo[0], combo[1], combo[2]) if adjacent != starting_parameters: yield adjacent
def p1b1_parameter_set(): """Utility function to encapsulate ParameterSet definition""" ps = prs.ParameterSet() ps["activation"] = prs.DiscreteParameter(activation) ps["batch_size"] = prs.NumericListParameter(batch_size) ps["dense"] = prs.DiscreteParameter(dense) ps["drop"] = prs.NumericParameter(0.0, 0.9) ps["epochs"] = prs.IntegerParameter(10, 20) #100, 200) ps["latent_dim"] = prs.NumericListParameter(latent_dim) ps["learning_rate"] = prs.NumericParameter(0.00001, 0.1) ps["model"] = prs.DiscreteParameter(model) ps["optimizer"] = prs.DiscreteParameter(optimizer) ps["residual"] = prs.DiscreteParameter(residual) ps["reduce_lr"] = prs.DiscreteParameter(reduce_lr) ps["warmup_lr"] = prs.DiscreteParameter(warmup_lr) # # switching batch_size to NumericList to enforce integer validation # ps.add(prs.DiscreteParameter("activation", activation)) # ps.add(prs.NumericListParameter("batch_size", batch_size)) # ps.add(prs.DiscreteParameter("dense", dense)) # ps.add(prs.NumericParameter("drop", 0.0, 0.9)) # ps.add(prs.IntegerParameter("epochs", 10, 20)) #100, 200)) # ps.add(prs.NumericListParameter("latent_dim", latent_dim)) # ps.add(prs.NumericParameter("learning_rate", 0.00001, 0.1)) # ps.add(prs.DiscreteParameter("model", model)) # ps.add(prs.DiscreteParameter("optimizer", optimizer)) # ps.add(prs.DiscreteParameter("residual", residual)) # ps.add(prs.DiscreteParameter("reduce_lr", reduce_lr)) # ps.add(prs.DiscreteParameter("warmup_lr", warmup_lr)) return ps
def _CreateParameterSet(self, value): """Creates a parameter_set.ParameterSet to test. Args: value: The value to set the variable parameter to. Returns: A parameter_set.ParameterSet with the variable parameter set to |value| and the other parameters set to their fixed values. """ if self._unlocked_parameter == self.UNLOCKED_PARAM_MAX_DIFF: return parameter_set.ParameterSet(value, self._args.min_delta_threshold, self._args.min_edge_threshold) elif self._unlocked_parameter == self.UNLOCKED_PARAM_DELTA_THRESHOLD: return parameter_set.ParameterSet(self._args.min_max_diff, value, self._args.min_edge_threshold) else: return parameter_set.ParameterSet(self._args.min_max_diff, self._args.min_delta_threshold, value)
def p3b1_parameter_set(): """Utility function to encapsulate ParameterSet definition""" ps = prs.ParameterSet() # switching batch_size to NumericList to enforce integer validation #ps.add(prs.DiscreteParameter("batch_size", batch_size)) ps.add(prs.NumericListParameter("batch_size", batch_size)) ps.add(prs.IntegerParameter("epochs", 5, 50)) #ps.add(prs.DiscreteParameter("activation", activation)) #ps.add(prs.DiscreteParameter("optimizer", optimizer)) ps.add(prs.NumericParameter("dropout", 0.0, 0.9)) ps.add(prs.NumericParameter("learning_rate", 0.00001, 0.1)) ps.add(prs.DiscreteParameter("shared_nnet_spec", shared_nnet_spec)) ps.add(prs.DiscreteParameter("ind_nnet_spec", ind_nnet_spec)) return ps
def p1b1_parameter_set(): """Utility function to encapsulate ParameterSet definition""" ps = prs.ParameterSet() # batch_size is NumericList to enforce integer validation ps["activation"] = prs.DiscreteParameter(activation) ps["batch_size"] = prs.NumericListParameter(batch_size) ps["dense"] = prs.DiscreteParameter(dense) ps["drop"] = prs.NumericParameter(0.0, 0.9) # limit maximum number of epcohs for demonstration purposes ps["epochs"] = prs.IntegerParameter(10, 20) #100, 200) ps["latent_dim"] = prs.NumericListParameter(latent_dim) ps["learning_rate"] = prs.NumericParameter(0.00001, 0.1) ps["model"] = prs.DiscreteParameter(model) ps["optimizer"] = prs.DiscreteParameter(optimizer) ps["residual"] = prs.DiscreteParameter(residual) ps["reduce_lr"] = prs.DiscreteParameter(reduce_lr) ps["warmup_lr"] = prs.DiscreteParameter(warmup_lr) return ps
def __init__(self, data_df, X_columns, target, factors=[], prefix_sep="|"): dfc_set = set(data_df.columns) xcol_set = set(X_columns) factor_set = set(factors) assert target in dfc_set, "Target column must be in dataframe" assert xcol_set.issubset(dfc_set), "X_columns must be in dataframe's columns" assert factor_set.issubset(dfc_set), "Factors must be in dataframe's columns" #assert set(factors).issubset(set(X_columns)), "Factors should be listed in X_columns" self.data = data_df self.factors = factors xcol_set = xcol_set | factor_set # set union xcol_set.discard(target) # n.b. set is not a hashable type so make it a list X = data_df[list(xcol_set)] y = data_df[target] # Create auxiliary dataframe with dummy-coded indicators Xd = pd.get_dummies(X, columns=factors, prefix_sep=prefix_sep) if factors else X continuous_columns = [] factor_columns = defaultdict(list) factor_values = defaultdict(list) factor_objects = {} for i, name in enumerate(Xd.columns): n = name.split(prefix_sep) n0 = n[0] if n0 in factors: factor_columns[n0].append(i) factor_values[n0].append(prefix_sep.join(n[1:])) else: continuous_columns.append(i) # TODO: create a new parameter set, just for the factors ps_factor = prs.ParameterSet() for name, values in factor_values.items(): #ps_factor.add(prs.DiscreteParameter(name, values)) ps_factor[name] = prs.DiscreteParameter(values) columns = factor_columns[name] factor_objects[name] = Factor(name, columns, values) #self.n_continuous = len(continuous_columns) self.continuous_columns = continuous_columns self.factor_columns = factor_columns self.parameter_set = ps_factor self.X = X self.Xd = Xd self.y = y # TODO: consider leaving these till later, and using hasattr to check self.gpr_ec = None self.gpr_mc = None self.gpr_uc = None
# ============================================================================= # ParameterSet generated initial sample grid used to train model # creates candidate parameter dictionaries after model is trained # see parameter_set for a more complete parameter set matching R # ============================================================================= batch_size = [16, 32, 64, 128, 256, 512] #activation = ["softmax", "elu", "softplus", "softsign", "relu", "tanh", "sigmoid", "hard_sigmoid", "linear"] dense = [[500, 100, 50], [1000, 500, 100, 50], [2000, 1000, 500, 100, 50], [2000, 1000, 1000, 500, 100, 50], [2000, 1000, 1000, 1000, 500, 100, 50]] #optimizer = ["adam", "sgd", "rmsprop", "adagrad", "adadelta","adamax","nadam"] conv = [[50, 50, 50, 50, 50, 1], [25, 25, 25, 25, 25, 1], [64, 32, 16, 32, 64, 1], [100, 100, 100, 100, 100, 1], [32, 20, 16, 32, 10, 1]] ps = prs.ParameterSet() ps.add(prs.DiscreteParameter("batch_size", batch_size)) ps.add(prs.IntegerParameter("epochs", 5, 100)) #ps.add(prs.DiscreteParameter("activation", activation)) ps.add(prs.DiscreteParameter("dense", dense)) #ps.add(prs.DiscreteParameter("optimizer", optimizer)) ps.add(prs.NumericParameter("drop", 0.0, 0.9)) ps.add(prs.NumericParameter("learning_rate", 0.00001, 0.1)) ps.add(prs.DiscreteParameter("conv", conv)) # TODO: since dense and conv will be dummy-coded, ensure that all possible # category values are present in the parameter set # ============================================================================= # DATA # =============================================================================
def _GetMostPermissiveParameters(self): return parameter_set.ParameterSet(self._args.max_max_diff, self._args.max_delta_threshold, self._args.min_edge_threshold)