def p3b1_parameter_set(): """Utility function to encapsulate ParameterSet definition""" ps = prs.ParameterSet() # switching batch_size to NumericList to enforce integer validation #ps.add(prs.DiscreteParameter("batch_size", batch_size)) ps.add(prs.NumericListParameter("batch_size", batch_size)) ps.add(prs.IntegerParameter("epochs", 5, 50)) #ps.add(prs.DiscreteParameter("activation", activation)) #ps.add(prs.DiscreteParameter("optimizer", optimizer)) ps.add(prs.NumericParameter("dropout", 0.0, 0.9)) ps.add(prs.NumericParameter("learning_rate", 0.00001, 0.1)) ps.add(prs.DiscreteParameter("shared_nnet_spec", shared_nnet_spec)) ps.add(prs.DiscreteParameter("ind_nnet_spec", ind_nnet_spec)) return ps
def p1b1_parameter_set(): """Utility function to encapsulate ParameterSet definition""" ps = prs.ParameterSet() ps["activation"] = prs.DiscreteParameter(activation) ps["batch_size"] = prs.NumericListParameter(batch_size) ps["dense"] = prs.DiscreteParameter(dense) ps["drop"] = prs.NumericParameter(0.0, 0.9) ps["epochs"] = prs.IntegerParameter(10, 20) #100, 200) ps["latent_dim"] = prs.NumericListParameter(latent_dim) ps["learning_rate"] = prs.NumericParameter(0.00001, 0.1) ps["model"] = prs.DiscreteParameter(model) ps["optimizer"] = prs.DiscreteParameter(optimizer) ps["residual"] = prs.DiscreteParameter(residual) ps["reduce_lr"] = prs.DiscreteParameter(reduce_lr) ps["warmup_lr"] = prs.DiscreteParameter(warmup_lr) # # switching batch_size to NumericList to enforce integer validation # ps.add(prs.DiscreteParameter("activation", activation)) # ps.add(prs.NumericListParameter("batch_size", batch_size)) # ps.add(prs.DiscreteParameter("dense", dense)) # ps.add(prs.NumericParameter("drop", 0.0, 0.9)) # ps.add(prs.IntegerParameter("epochs", 10, 20)) #100, 200)) # ps.add(prs.NumericListParameter("latent_dim", latent_dim)) # ps.add(prs.NumericParameter("learning_rate", 0.00001, 0.1)) # ps.add(prs.DiscreteParameter("model", model)) # ps.add(prs.DiscreteParameter("optimizer", optimizer)) # ps.add(prs.DiscreteParameter("residual", residual)) # ps.add(prs.DiscreteParameter("reduce_lr", reduce_lr)) # ps.add(prs.DiscreteParameter("warmup_lr", warmup_lr)) return ps
def p1b1_parameter_set(): """Utility function to encapsulate ParameterSet definition""" ps = prs.ParameterSet() # batch_size is NumericList to enforce integer validation ps["activation"] = prs.DiscreteParameter(activation) ps["batch_size"] = prs.NumericListParameter(batch_size) ps["dense"] = prs.DiscreteParameter(dense) ps["drop"] = prs.NumericParameter(0.0, 0.9) # limit maximum number of epcohs for demonstration purposes ps["epochs"] = prs.IntegerParameter(10, 20) #100, 200) ps["latent_dim"] = prs.NumericListParameter(latent_dim) ps["learning_rate"] = prs.NumericParameter(0.00001, 0.1) ps["model"] = prs.DiscreteParameter(model) ps["optimizer"] = prs.DiscreteParameter(optimizer) ps["residual"] = prs.DiscreteParameter(residual) ps["reduce_lr"] = prs.DiscreteParameter(reduce_lr) ps["warmup_lr"] = prs.DiscreteParameter(warmup_lr) return ps
def __init__(self, data_df, X_columns, target, factors=[], prefix_sep="|"): dfc_set = set(data_df.columns) xcol_set = set(X_columns) factor_set = set(factors) assert target in dfc_set, "Target column must be in dataframe" assert xcol_set.issubset(dfc_set), "X_columns must be in dataframe's columns" assert factor_set.issubset(dfc_set), "Factors must be in dataframe's columns" #assert set(factors).issubset(set(X_columns)), "Factors should be listed in X_columns" self.data = data_df self.factors = factors xcol_set = xcol_set | factor_set # set union xcol_set.discard(target) # n.b. set is not a hashable type so make it a list X = data_df[list(xcol_set)] y = data_df[target] # Create auxiliary dataframe with dummy-coded indicators Xd = pd.get_dummies(X, columns=factors, prefix_sep=prefix_sep) if factors else X continuous_columns = [] factor_columns = defaultdict(list) factor_values = defaultdict(list) factor_objects = {} for i, name in enumerate(Xd.columns): n = name.split(prefix_sep) n0 = n[0] if n0 in factors: factor_columns[n0].append(i) factor_values[n0].append(prefix_sep.join(n[1:])) else: continuous_columns.append(i) # TODO: create a new parameter set, just for the factors ps_factor = prs.ParameterSet() for name, values in factor_values.items(): #ps_factor.add(prs.DiscreteParameter(name, values)) ps_factor[name] = prs.DiscreteParameter(values) columns = factor_columns[name] factor_objects[name] = Factor(name, columns, values) #self.n_continuous = len(continuous_columns) self.continuous_columns = continuous_columns self.factor_columns = factor_columns self.parameter_set = ps_factor self.X = X self.Xd = Xd self.y = y # TODO: consider leaving these till later, and using hasattr to check self.gpr_ec = None self.gpr_mc = None self.gpr_uc = None
# creates candidate parameter dictionaries after model is trained # see parameter_set for a more complete parameter set matching R # ============================================================================= batch_size = [16, 32, 64, 128, 256, 512] #activation = ["softmax", "elu", "softplus", "softsign", "relu", "tanh", "sigmoid", "hard_sigmoid", "linear"] dense = [[500, 100, 50], [1000, 500, 100, 50], [2000, 1000, 500, 100, 50], [2000, 1000, 1000, 500, 100, 50], [2000, 1000, 1000, 1000, 500, 100, 50]] #optimizer = ["adam", "sgd", "rmsprop", "adagrad", "adadelta","adamax","nadam"] conv = [[50, 50, 50, 50, 50, 1], [25, 25, 25, 25, 25, 1], [64, 32, 16, 32, 64, 1], [100, 100, 100, 100, 100, 1], [32, 20, 16, 32, 10, 1]] ps = prs.ParameterSet() ps.add(prs.DiscreteParameter("batch_size", batch_size)) ps.add(prs.IntegerParameter("epochs", 5, 100)) #ps.add(prs.DiscreteParameter("activation", activation)) ps.add(prs.DiscreteParameter("dense", dense)) #ps.add(prs.DiscreteParameter("optimizer", optimizer)) ps.add(prs.NumericParameter("drop", 0.0, 0.9)) ps.add(prs.NumericParameter("learning_rate", 0.00001, 0.1)) ps.add(prs.DiscreteParameter("conv", conv)) # TODO: since dense and conv will be dummy-coded, ensure that all possible # category values are present in the parameter set # ============================================================================= # DATA # ============================================================================= # TODO: relocate pdtypes to nt3_run_data
"softmax", "elu", "softplus", "softsign", "relu", "tanh", "sigmoid", "hard_sigmoid", "linear" ] dense = [[500, 100, 50], [1000, 500, 100, 50], [2000, 1000, 500, 100, 50], [2000, 1000, 1000, 500, 100, 50], [2000, 1000, 1000, 1000, 500, 100, 50]] optimizer = [ "adam", "sgd", "rmsprop", "adagrad", "adadelta", "adamax", "nadam" ] conv = [[50, 50, 50, 50, 50, 1], [25, 25, 25, 25, 25, 1], [64, 32, 16, 32, 64, 1], [100, 100, 100, 100, 100, 1], [32, 20, 16, 32, 10, 1]] ps = prs.ParameterSet() ps["batch_size"] = prs.DiscreteParameter(batch_size) ps["epochs"] = prs.IntegerParameter(5, 500) ps["activation"] = prs.DiscreteParameter(activation) ps["dense"] = prs.DiscreteParameter(dense) ps["optimizer"] = prs.DiscreteParameter(optimizer) ps["drop"] = prs.NumericParameter(0.0, 0.9) ps["learning_rate"] = prs.NumericParameter(0.00001, 0.1) ps["conv"] = prs.DiscreteParameter(conv) print(ps) # ============================================================================= # Add run_id and subdirectory of /save for logged output. # Ensure that all parameters are populated with default values # Any last-minute or ad hoc changes can be added here