def train_add(func=lambda a, b: a + b, results_dir=None, reg_weight=5e-2, learning_rate=1e-2, n_epochs=10001): """Addition of two MNIST digits with a symbolic regression network.""" tf.reset_default_graph() # Symbolic regression network to combine the conv net outputs PRIMITIVE_FUNCS = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, *[functions.Product()] * 2, ] sr_net = symbolic_network.SymbolicNet(2, funcs=PRIMITIVE_FUNCS, init_stddev=0.1) # Symbolic regression network # Overall architecture sym_digit_network = SymbolicDigit(sr_net=sr_net, normalize=normalize) # Set up regularization term and training penalty = regularization.l12_smooth(sr_net.get_weights()) penalty = reg_weight * penalty sym_digit_network.set_training(reg=penalty) config = tf.ConfigProto() config.gpu_options.allow_growth = True # Take up variable amount of memory on GPU sess = tf.Session(config=config) batch = batch_generator(batch_size=100) # Train, and restart training if loss goes to NaN loss_i = np.nan while np.isnan(loss_i): sess.run(tf.global_variables_initializer()) loss_i = sym_digit_network.train(sess, n_epochs, batch, func, lr_val=learning_rate) if np.isnan(loss_i): continue # Freezing weights sr_net = symbolic_network.MaskedSymbolicNet(sess, sr_net, threshold=0.01) sym_digit_network = SymbolicDigitMasked(sym_digit_network, sr_net, normalize=normalize) sym_digit_network.set_training() # Training with frozen weights. Regularization is 0 loss_i = sym_digit_network.train(sess, n_epochs, batch, func, lr_val=learning_rate/10) # Print out human-readable equation (with regularization) weights = sess.run(sr_net.get_weights()) expr = pretty_print.network(weights, PRIMITIVE_FUNCS, ["z1", "z2"]) expr = normalize(expr) print(expr) # Calculate accuracy on test dataset acc_test, error_test = sym_digit_network.calc_accuracy(X_test, y_test, func, sess) result_str = 'Test accuracy: %g\n' % acc_test print(result_str) sym_digit_network.save_result(sess, results_dir, expr, result_str)
def train(self, func, func_name='', trials=1, func_dir='results/test'): """Train the network to find a given function""" x_dim = len(signature(func).parameters) # Number of input arguments to the function # Generate training data and test data x, y = generate_data(func, N_TRAIN) # x_val, y_val = generate_data(func, N_VAL) x_test, y_test = generate_data(func, N_TEST, range_min=DOMAIN_TEST[0], range_max=DOMAIN_TEST[1]) # Setting up the symbolic regression network x_placeholder = tf.compat.v1.placeholder(shape=(None, x_dim), dtype=tf.float32) width = len(self.activation_funcs) n_double = functions.count_double(self.activation_funcs) sym = SymbolicNetL0(self.n_layers, funcs=self.activation_funcs, initial_weights=[ tf.random.truncated_normal([x_dim, width + n_double], stddev=init_sd_first, dtype=tf.float32), #removed the dtypes tf.random.truncated_normal([width, width + n_double], stddev=init_sd_middle, dtype=tf.float32), tf.random.truncated_normal([width, width + n_double], stddev=init_sd_middle, dtype=tf.float32), tf.random.truncated_normal([width, 1], stddev=init_sd_last, dtype=tf.float32) ], ) y_hat = sym(x_placeholder) # Label and errors error = tf.keras.losses.mean_squared_error(y_true=y, y_pred=y_hat) error_test = tf.keras.losses.mean_squared_error(y_true=y_test, y_pred=y_hat) # Regularization oscillates as a function of epoch. reg_loss = sym.get_loss() loss = error + self.reg_weight * reg_loss # Training learning_rate = tf.compat.v1.placeholder(tf.float32) opt = tf.compat.v1.train.RMSPropOptimizer(learning_rate=learning_rate) train = opt.minimize(loss) loss_list = [] # Total loss (MSE + regularization) error_list = [] # MSE reg_list = [] # Regularization error_test_list = [] # Test error error_test_final = [] eq_list = [] # Only take GPU memory as needed - allows multiple jobs on a single GPU config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True with tf.compat.v1.Session(config=config) as sess: for trial in range(trials): print("Training on function " + func_name + " Trial " + str(trial+1) + " out of " + str(trials)) loss_val = np.nan # Restart training if loss goes to NaN (which happens when gradients blow up) while np.all(np.isnan(loss_val)): sess.run(tf.compat.v1.global_variables_initializer()) # 1st stage of training with oscillating regularization weight for i in range(self.n_epochs1): feed_dict = {x_placeholder: x, learning_rate: self.learning_rate} _ = sess.run(train, feed_dict=feed_dict) if i % self.summary_step == 0: loss_val, error_val, reg_val, = sess.run((loss, error, reg_loss), feed_dict=feed_dict) error_test_val = sess.run(error_test, feed_dict={x_placeholder: x_test}) loss_val_avg = sum(loss_val) / len(loss_val) error_test_val_avg = sum(error_test_val) / len(error_test_val) error_val_avg = sum(error_val) / len(error_val) print(error_val_avg) print("Epoch: %d\tTotal training loss: %f\tTest error: %f" % (i, loss_val_avg, error_test_val_avg)) loss_list.append(loss_val_avg) error_list.append(error_val_avg) reg_list.append(reg_val) error_test_list.append(error_test_val_avg) if np.any(np.isnan(loss_val)): # If loss goes to NaN, restart training break # Print the expressions weights = sess.run(sym.get_weights()) expr = pretty_print.network(weights, self.activation_funcs, var_names[:x_dim]) print(expr) # Save results trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial) results = { "weights": weights, "loss_list": loss_list, "error_list": error_list, "reg_list": reg_list, "error_test": error_test_list, "expr": expr } with open(trial_file, "wb+") as f: pickle.dump(results, f) error_test_final.append(error_test_list[-1]) eq_list.append(expr) return eq_list, error_test_final
def main(results_dir='results/sho/test', trials=1, learning_rate=1e-2, reg_weight=2e-4, timesteps=25, batch_size=129, n_epochs1=2001, n_epochs2=5001, n_epochs3=5001): # Hyperparameters summary_step = 500 timesteps0 = 1 primitive_funcs = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, *[functions.Product(norm=0.1)] * 2, ] # Import parabola data data = np.load('dataset/sho.npz') x_d = np.asarray(data["x_d"]) x_v = np.asarray(data["x_v"]) y_d = np.asarray(data["y_d"]) y_v = np.asarray(data["y_v"]) omega2_data = data["omega2"] N = data["N"] # Prepare data x = np.stack((x_d, x_v), axis=2) # Shape (N, NT, 2) y0 = np.stack((y_d[:, 0], y_v[:, 0]), axis=1) # Initial conditions for prediction y, fed into propagator y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2) # shape(NG, LENGTH, 2) # Tensorflow placeholders for x, y0, y x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input") y0_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="prop_input") # input is d, v y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input") length_input = tf.placeholder(dtype=tf.int32, shape=()) # Dynamics encoder encoder = helpers.Encoder() training = tf.placeholder_with_default(False, []) z = encoder(x_input, training=training) z_data = omega2_data[:, np.newaxis] # Propagating decoders prop_d = SymbolicNet(2, funcs=primitive_funcs) prop_v = SymbolicNet(2, funcs=primitive_funcs) prop_d.build(4) prop_v.build(4) # Building recurrent structure rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v), return_sequences=True) y0_rnn = tf.concat([tf.expand_dims(y0_input, axis=1), tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2))], axis=1) prop_input = tf.concat([y0_rnn, tf.keras.backend.repeat(z, length_input), tf.ones((tf.shape(y0_input)[0], length_input, 1))], axis=2) prop_output = rnn(prop_input) epoch = tf.placeholder(tf.float32) reg_freq = np.pi / (n_epochs1 + n_epochs2) / 1.1 reg_loss = tf.sin(reg_freq * epoch) ** 2 * regularization.l12_smooth(prop_d.get_weights()) + \ tf.sin(reg_freq * epoch) ** 2 * regularization.l12_smooth(prop_v.get_weights()) # reg_loss = regularization.l12_smooth(prop_d.get_weights()) + regularization.l12_smooth(prop_v.get_weights()) # Training learning_rate_ph = tf.placeholder(tf.float32) opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph) reg_weight_ph = tf.placeholder(tf.float32) error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=prop_output) loss = error + reg_weight_ph * reg_loss train = tf.group([opt.minimize(loss), encoder.bn.updates]) batch = helpers.batch_generator([x, y_data, y0, z_data], N=N, batch_size=batch_size) # Training session with tf.Session() as sess: for _ in range(trials): loss_i = np.nan while np.isnan(loss_i): loss_list = [] error_list = [] reg_list = [] sess.run(tf.global_variables_initializer()) for i in range(n_epochs1 + n_epochs2): if i < n_epochs1: reg_weight_i = reg_weight / 5 learning_rate_i = learning_rate length_i = min(i // 500 * 2 + timesteps0, timesteps) else: reg_weight_i = reg_weight learning_rate_i = learning_rate / 5 length_i = timesteps x_batch, y_batch, y0_batch, z_batch = next(batch) feed_dict = {x_input: x_batch, y0_input: y0_batch, y_input: y_batch, epoch: i, learning_rate_ph: learning_rate_i, training: True, reg_weight_ph: reg_weight_i, length_input: length_i} _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0 or i == n_epochs1 - 1: feed_dict[training] = False loss_i, error_i, reg_i = sess.run((loss, error, reg_loss), feed_dict=feed_dict) z_arr = sess.run(z, feed_dict=feed_dict) r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0] loss_list.append(loss_i) error_list.append(error_i) reg_list.append(reg_i) print("Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f\tCorrelation: %f" % (i, loss_i, error_i, reg_i, r)) if np.isnan(loss_i): break # Setting small weights to 0 and freezing them prop_d_masked = MaskedSymbolicNet(sess, prop_d, threshold=0.01) prop_v_masked = MaskedSymbolicNet(sess, prop_v, threshold=0.01) # Keep track of currently existing variables. When we rebuild the rnn, it makes new variables that we need # to initialize. Later, we will use this to figure out what the uninitialized variables are. temp = set(tf.global_variables()) # Rebuilding the decoding propagator. Remove regularization rnn = tf.keras.layers.RNN(SymbolicCell(prop_d_masked, prop_v_masked), return_sequences=True) prop_output = rnn(prop_input) loss = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=prop_output) train = tf.group([opt.minimize(loss), encoder.bn.updates]) weights_d = sess.run(prop_d_masked.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v_masked.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) print("Frozen weights. Next stage of training.") # Initialize only the uninitialized variables. sess.run(tf.variables_initializer(set(tf.global_variables()) - temp)) for i in range(n_epochs3): x_batch, y_batch, y0_batch, z_batch = next(batch) feed_dict = {x_input: x_batch, y0_input: y0_batch, y_input: y_batch, epoch: 0, learning_rate_ph: learning_rate / 10, training: True, reg_weight_ph: 0, length_input: length_i} _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0: feed_dict[training] = False loss_i, error_i, reg_i = sess.run((loss, error, reg_loss), feed_dict=feed_dict) z_arr = sess.run(z, feed_dict=feed_dict) r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0] loss_list.append(loss_i) error_list.append(error_i) reg_list.append(reg_i) print("Epoch %d\tError: %g\tCorrelation: %f" % (i, error_i, r)) weights_d = sess.run(prop_d_masked.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v_masked.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) # Save results results = { "summary_step": summary_step, "learning_rate": learning_rate, "n_epochs1": n_epochs1, "n_epochs2": n_epochs2, "reg_weight": reg_weight, "timesteps": timesteps, "timesteps0": timesteps0, "weights_d": weights_d, "weights_v": weights_v, "loss_plot": loss_list, "error_plot": error_list, "reg_plot": reg_list, "expr_d": expr_d, "expr_v": expr_v } trial_dir = helpers.get_trial_path(results_dir) # Get directory in which to save trial results tf.saved_model.simple_save(sess, trial_dir, inputs={"x": x_input, "y0": y0_input, "training": training}, outputs={"z": z, "y": prop_output}) # Save a summary of the parameters and results with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f: pickle.dump(results, f) with open(os.path.join(results_dir, 'eq_summary.txt'), 'a') as f: f.write(str(expr_d) + "\n") f.write(str(expr_v) + "\n") f.write("Error: %f\n\n" % error_list[-1])
def train(self, func, func_name='', trials=1, func_dir='results/test'): """Train the network to find a given function""" x, y = generate_data(func, N_TRAIN) # x_val, y_val = generate_data(func, N_VAL) x_test, y_test = generate_data(func, N_TEST, range_min=DOMAIN_TEST[0], range_max=DOMAIN_TEST[1]) # Setting up the symbolic regression network x_dim = len(signature( func).parameters) # Number of input arguments to the function # x_placeholder = tf.placeholder(shape=(None, x_dim), dtype=tf.float32) width = len(self.activation_funcs) n_double = functions.count_double(self.activation_funcs) # Arrays to keep track of various quantities as a function of epoch loss_list = [] # Total loss (MSE + regularization) error_list = [] # MSE reg_list = [] # Regularization error_test_list = [] # Test error error_test_final = [] eq_list = [] for trial in range(trials): print("Training on function " + func_name + " Trial " + str(trial + 1) + " out of " + str(trials)) # reinitialize for each trial net = SymbolicNet( self.n_layers, funcs=self.activation_funcs, initial_weights=[ # kind of a hack for truncated normal torch.fmod( torch.normal(0, init_sd_first, size=(x_dim, width + n_double)), 2), torch.fmod( torch.normal(0, init_sd_middle, size=(width, width + n_double)), 2), torch.fmod( torch.normal(0, init_sd_middle, size=(width, width + n_double)), 2), torch.fmod(torch.normal(0, init_sd_last, size=(width, 1)), 2) ]) criterion = nn.MSELoss() optimizer = optim.RMSprop( net.parameters(), lr=self.learning_rate * 10, momentum=0.0, # weight_decay=7 ) # adapative learning rate lmbda = lambda epoch: 0.1 * epoch scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda) for param_group in optimizer.param_groups: print("Learning rate: %f" % param_group['lr']) loss_val = np.nan # Restart training if loss goes to NaN (which happens when gradients blow up) while np.isnan(loss_val): t0 = time.time() # First stage of training, preceded by 0th warmup stage for epoch in range(self.n_epochs1 + 2000): inputs, labels = x, y # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) # TODO regularization = L12Smooth() mse_loss = criterion(outputs, labels) reg_loss = regularization(net.get_weights_tensor()) loss = mse_loss + self.reg_weight * reg_loss loss.backward() optimizer.step() if epoch % self.summary_step == 0: error_val = mse_loss.item() reg_val = reg_loss.item() loss_val = error_val + self.reg_weight * reg_val print( "Epoch: %d\tTotal training loss: %f\tReg loss: %f" % (epoch, loss_val, reg_val)) error_list.append(error_val) reg_list.append(reg_val) loss_list.append(loss_val) # TODO: error test val # loss_val, error_val, reg_val, = sess.run((loss, error, reg_loss), feed_dict=feed_dict) # error_test_val = sess.run(error_test, feed_dict={x_placeholder: x_test}) # print("Epoch: %d\tTotal training loss: %f\tTest error: %f" % (i, loss_val, error_test_val)) # error_list.append(error_val) # error_test_list.append(error_test_val) if np.isnan(loss_val ): # If loss goes to NaN, restart training break if epoch == 2000: scheduler.step() # lr /= 10 for param_group in optimizer.param_groups: print(param_group['lr']) # scheduler.step() # lr /= 10 again for param_group in optimizer.param_groups: print("Learning rate: %f" % param_group['lr']) t1 = time.time() tot_time = t1 - t0 print(tot_time) # Print the expressions with torch.no_grad(): weights = net.get_weights() expr = pretty_print.network(weights, self.activation_funcs, var_names[:x_dim]) print(expr) # Save results trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial) results = { "weights": weights, "loss_list": loss_list, "error_list": error_list, "reg_list": reg_list, "error_test": error_test_list, "expr": expr, "runtime": tot_time } with open(trial_file, "wb+") as f: pickle.dump(results, f) # error_test_final.append(error_test_list[-1]) eq_list.append(expr) return eq_list, error_test_final
def main(results_dir='results/kinematics/test', learning_rate=1e-2, reg_weight=1e-3, n_epochs1=5001, n_epochs2=5001, timesteps=5): # Hyperparameters summary_step = 500 timesteps0 = 1 # Import kinematics data data = np.load('dataset/kinematic.npz') x_d = np.asarray(data["x_d"]) x_v = np.asarray(data["x_v"]) y_d = np.asarray(data["y_d"]) y_v = np.asarray(data["y_v"]) a_data = np.asarray(data["g"]) # Prepare data # The first few time steps are reserved for the symbolic regression propagator x = np.stack((x_d, x_v), axis=2) # Shape (N, NT, 2) y0 = np.stack((y_d[:, 0], y_v[:, 0]), axis=1) # Input into the symbolic propagator label_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2) # shape(NG, timesteps, 2) # Encoder encoder = helpers.Encoder() # layer should end with 1, which is the output x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input") y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input") training = tf.placeholder_with_default(False, []) z = encoder(x_input, training=training) # z = np.array(a_data)[:, np.newaxis] # uncomment to ignore the autoencoder # Propagating decoder primitive_funcs = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, *[functions.Product(norm=0.1)] * 2, ] prop_d = SymbolicNet(2, funcs=primitive_funcs) prop_v = SymbolicNet(2, funcs=primitive_funcs) prop_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="prop_input") # input is d, v def rec_sr(y0_input, enc_output, length, prop1=prop_d, prop2=prop_v): rec_input = [y0_input] for i in range(length): full_input = tf.concat( [rec_input[i], enc_output, tf.ones_like(enc_output)], axis=1, name="full_input") # d, v, z rec_input.append( tf.concat( [prop1(full_input), prop2(full_input)], axis=1, name="c_prop_input")) output = tf.stack(rec_input[1:], axis=1) # Ignore initial conditions return output y_hat_start = rec_sr(prop_input, z, timesteps0, prop_d, prop_v) y_hat_full = rec_sr(prop_input, z, timesteps, prop_d, prop_v) # Label and errors epoch = tf.placeholder(tf.float32) reg_weight_ph = tf.placeholder(tf.float32) reg_loss = regularization.l12_smooth( prop_d.get_weights()) + regularization.l12_smooth(prop_v.get_weights()) # Training learning_rate_ph = tf.placeholder(tf.float32) opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph) def define_loss(prop_output, length): error = tf.losses.mean_squared_error( labels=y_input[:, :length, :], predictions=prop_output[:, :length, :]) loss = error + reg_weight_ph * reg_loss train = opt.minimize(loss) train = tf.group([train, encoder.bn.updates]) return error, loss, train error_start, loss_start, train_start = define_loss(y_hat_start, timesteps0) error_full, loss_full, train_full = define_loss(y_hat_full, timesteps) # Training session config = tf.ConfigProto() config.gpu_options.allow_growth = True # Take up variable amount of memory on GPU with tf.Session(config=config) as sess: loss_i = np.nan while np.isnan(loss_i): loss_list = [] error_list = [] reg_list = [] error, loss, train = error_start, loss_start, train_start sess.run(tf.global_variables_initializer()) for i in range(n_epochs1): feed_dict = { x_input: x, prop_input: y0, y_input: label_data, epoch: 0, learning_rate_ph: learning_rate, training: True, reg_weight_ph: reg_weight } _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0: feed_dict[training] = False print_loss, print_error, print_l12 = sess.run( (loss, error, reg_loss), feed_dict=feed_dict) loss_list.append(print_loss) error_list.append(print_error) reg_list.append(print_l12) print("Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f" % (i, print_loss, print_error, print_l12)) loss_i = print_loss if i > 2000: error, loss, train = error_full, loss_full, train_full if np.isnan(loss_i): break # Setting small weights to 0 and freezing them prop_d_masked = MaskedSymbolicNet(sess, prop_d, threshold=0.1) prop_v_masked = MaskedSymbolicNet(sess, prop_v, threshold=0.1) # Rebuilding the decoding propagator prop_output_masked = rec_sr(prop_input, z, timesteps, prop_d_masked, prop_v_masked) error, loss, train = define_loss(prop_output_masked, timesteps) weights_d = sess.run(prop_d_masked.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v_masked.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) print("Frozen weights. Next stage of training.") for i in range(n_epochs2): feed_dict = { x_input: x, prop_input: y0, y_input: label_data, epoch: 0, learning_rate_ph: learning_rate / 10, training: True, reg_weight_ph: 0 } _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0: feed_dict[training] = False print_loss, print_error, print_l12 = sess.run( (loss, error, reg_loss), feed_dict=feed_dict) loss_list.append(print_loss) error_list.append(print_error) reg_list.append(print_l12) print("Epoch %d\tError: %g" % (i, print_error)) weights_d = sess.run(prop_d_masked.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v_masked.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) # Save results results = { "timesteps": timesteps, "summary_step": summary_step, "learning_rate": learning_rate, "n_epochs1": n_epochs1, "n_epochs2": n_epochs2, "reg_weight_ph": reg_weight, "weights_d": weights_d, "weights_v": weights_v, "loss_plot": loss_list, "error_plot": error_list, "l12_plot": reg_list, "expr_d": expr_d, "expr_v": expr_v } trial_dir = helpers.get_trial_path( results_dir) # Get directory in which to save trial results tf.saved_model.simple_save(sess, trial_dir, inputs={ "x": x_input, "y0": prop_input, "training": training }, outputs={ "z": z, "y": y_hat_full }) # Save a summary of the parameters and results with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f: pickle.dump(results, f)
def main(results_dir='results/kinematics/test', learning_rate=1e-2, reg_weight=1e-3, n_epochs=10001, timesteps=5): tf.reset_default_graph() # Hyperparameters summary_step = 1000 # tf.set_random_seed(0) # Import parabola data data = np.load('dataset/kinematic.npz') x_d = np.asarray(data["x_d"]) x_v = np.asarray(data["x_v"]) y_d = np.asarray(data["y_d"]) y_v = np.asarray(data["y_v"]) a_data = np.asarray(data["g"]) # Prepare data # The first few time steps are reserved for the symbolic regression propagator x = np.stack((x_d, x_v), axis=2) # Shape (N, NT, 2) y0 = np.stack((y_d[:, 0], y_v[:, 0]), axis=1) # Input into the symbolic propagator y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2) # shape(NG, LENGTH, 2) # Encoder encoder = helpers.Encoder() # layer should end with 1, which is the output x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input") y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input") y0_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="y_input") # input is d, v length_input = tf.placeholder(dtype=tf.int32, shape=()) training = tf.placeholder_with_default(False, []) z = encoder(x_input, training=training) # enc_output = np.array(g_data)[:, np.newaxis] # uncomment to ignore the autoencoder # Build EQL network for the propagating decoder primitive_funcs = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, *[functions.Product(norm=0.1)] * 2, ] prop_d = SymbolicNetL0(2, funcs=primitive_funcs) prop_v = SymbolicNetL0(2, funcs=primitive_funcs) prop_d.build(4) prop_v.build(4) # Build recurrent structure rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v), return_sequences=True) y0_rnn = tf.concat([tf.expand_dims(y0_input, axis=1), tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2))], axis=1) prop_input = tf.concat([y0_rnn, tf.keras.backend.repeat(z, length_input), tf.ones((tf.shape(y0_input)[0], length_input, 1))], axis=2) y_hat = rnn(prop_input) # Label and errors reg_loss = prop_d.get_loss() + prop_v.get_loss() # Training learning_rate_ph = tf.placeholder(tf.float32) opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph) error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=y_hat) loss = error + reg_weight * reg_loss train = opt.minimize(loss) train = tf.group([train, encoder.bn.updates]) # Training session config = tf.ConfigProto() config.gpu_options.allow_growth = True # Take up variable amount of memory on GPU with tf.Session(config=config) as sess: loss_i = np.nan while np.isnan(loss_i): loss_list = [] error_list = [] reg_list = [] sess.run(tf.global_variables_initializer()) length_i = 1 for i in range(n_epochs): lr_i = learning_rate feed_dict = {x_input: x, y0_input: y0, y_input: y_data, learning_rate_ph: lr_i, training: True, length_input: length_i} _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0: feed_dict[training] = False loss_val, error_val, reg_val = sess.run((loss, error, reg_loss), feed_dict=feed_dict) loss_list.append(loss_val) error_list.append(error_val) reg_list.append(reg_val) print("Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f" % (i, loss_val, error_val, reg_val)) loss_i = loss_val if i > 3000: length_i = timesteps if np.isnan(loss_i): break weights_d = sess.run(prop_d.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) # z_arr = sess.run(enc_output, feed_dict=feed_dict) # Save results results = { "timesteps": timesteps, "summary_step": summary_step, "learning_rate": learning_rate, "N_EPOCHS": n_epochs, "reg_weight": reg_weight, "weights_d": weights_d, "weights_v": weights_v, "loss_plot": loss_list, "error_plot": error_list, "l12_plot": reg_list, "expr_d": expr_d, "expr_v": expr_v } trial_dir = helpers.get_trial_path(results_dir) # Get directory in which to save trial results tf.saved_model.simple_save(sess, trial_dir, inputs={"x": x_input, "y0": y0_input, "training": training}, outputs={"z": z, "y": y_hat}) # Save a summary of the parameters and results with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f: pickle.dump(results, f)
def train_add_test(func=lambda a, b: a+b, results_dir=None, reg_weight=5e-2, learning_rate=1e-2, n_epochs=10001): """Addition of two MNIST digits with a symbolic regression network. Withold sums > 15 for test data""" tf.reset_default_graph() # Symbolic regression network to combine the conv net outputs PRIMITIVE_FUNCS = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, # *[functions.Product()] * 2, ] sr_net = symbolic_network.SymbolicNet(2, funcs=PRIMITIVE_FUNCS) # Symbolic regression network # Overall architecture sym_digit_network = SymbolicDigit(sr_net=sr_net, normalize=normalize) # Set up regularization term and training penalty = regularization.l12_smooth(sr_net.get_weights()) epoch = tf.placeholder_with_default(0.0, []) penalty = tf.sin(np.pi / n_epochs / 1.1 * epoch) ** 2 * regularization.l12_smooth(sr_net.get_weights()) penalty = reg_weight * penalty sym_digit_network.set_training(reg=penalty) config = tf.ConfigProto() config.gpu_options.allow_growth = True # Take up variable amount of memory on GPU sess = tf.Session(config=config) batch = batch_generator(batch_size=100) def train_fun(y): return y < 15 def test_fun(y): return np.logical_not(train_fun(y)) # Train, and restart training if loss goes to NaN loss_i = np.nan while np.isnan(loss_i): sess.run(tf.global_variables_initializer()) loss_i = sym_digit_network.train(sess, n_epochs, batch, func, epoch, lr_val=learning_rate, train_fun=train_fun) if np.isnan(loss_i): continue # Freezing weights sr_net_masked = symbolic_network.MaskedSymbolicNet(sess, sr_net, threshold=0.01) sym_digit_network = SymbolicDigitMasked(sym_digit_network, sr_net_masked, normalize=normalize) sym_digit_network.set_training() loss_i = sym_digit_network.train(sess, n_epochs, batch, func, lr_val=learning_rate/10, train_fun=train_fun) # Print out human-readable equation (with regularization) weights = sess.run(sr_net.get_weights()) expr = pretty_print.network(weights, PRIMITIVE_FUNCS, ["z1", "z2"]) expr = normalize(expr) print(expr) # Calculate accuracy on test dataset acc_train, error_train = sym_digit_network.calc_accuracy(X_train, y_train, func, sess) acc_train1, error_train1 = sym_digit_network.calc_accuracy(X_train, y_train, func, sess, filter_fun=train_fun) acc_train2, error_train2 = sym_digit_network.calc_accuracy(X_train, y_train, func, sess, filter_fun=test_fun) acc_test, error_test = sym_digit_network.calc_accuracy(X_test, y_test, func, sess) acc_test1, error_test1 = sym_digit_network.calc_accuracy(X_test, y_test, func, sess, filter_fun=train_fun) acc_test2, error_test2 = sym_digit_network.calc_accuracy(X_test, y_test, func, sess, filter_fun=test_fun) result_str = "Train digits overall accuracy: %.3f\ttrain sum accuracy: %.3f\t test sum accuracy: %.3f\n" \ "Train digits overall error: %.3f\ttrain sum error: %.3f\t test sum error: %.3f\n" \ "Test digits overall accuracy: %.3f\ttrain sum accuracy: %.3f\t test sum accuracy: %.3f\n" \ "Test digits overall error: %.3f\ttrain sum error: %.3f\t test sum error: %.3f\n" % \ (acc_train, acc_train1, acc_train2, error_train, error_train1, error_train2, acc_test, acc_test1, acc_test2, error_test, error_test1, error_test2) print(result_str) sym_digit_network.save_result(sess, results_dir, expr, result_str)
def meta_learn(self, func_names, trials, val_func_names=None): """Meta-train the EQL network on data generated by the given functions. Arguments: func_names: list of strings that describes the functions trials: number of trials to train from scratch. Will save the results for each trial. """ opt = optim.Adam(self.net.parameters(), self.outer_learning_rate) equations = dict() train_losses = dict() val_eq = dict() val_losses = dict() for func_name in func_names: equations[func_name] = [] train_losses[func_name] = [] if val_func_names is not None: for val_func_name in val_func_names: val_eq[val_func_name] = [] val_losses[val_func_name] = [] if self.train_mode == "maml": # ------------- each iteration is one MAML outer loop for counter in range(self.n_epochs1): verbose = (counter + 1) % 250 == 0 opt.zero_grad() eval_loss = 0 for func_name in func_names: func = self.equation_dict[func_name] assert self.x_dim == len(signature(func).parameters) # adapt to func eql_for_func = self.adapt(func, func_name, verbose, equations, train_losses) # eval task performance x, y = generate_data(func, N_QUERY) inputs, labels = x, y eval_loss += self.get_loss(eql_for_func, inputs, labels) eval_loss.backward() # Average the accumulated gradients and optimize for p in self.net.parameters(): p.grad.data.mul_(1.0 / len(func_names)) opt.step() if val_func_names is not None: # Validation step for val_func_name in val_func_names: func = self.equation_dict[val_func_name] eql_for_func = self.adapt(func, val_func_name, verbose) x, y = generate_data(func, N_QUERY) inputs, labels = x, y eval_loss += self.get_loss(eql_for_func, inputs, labels) val_losses[val_func_name].append(eval_loss.item()) if self.train_mode == "joint": # -------------------- joint training for counter in range(self.n_epochs1): verbose = (counter + 1) % 250 == 0 for func_name in func_names: # get function, do fwd pass, compute loss func = self.equation_dict[func_name] inputs, labels = generate_data(func, N_SUPPORT + N_QUERY) loss = self.get_loss(self.net, inputs, labels) # bwd pass opt.zero_grad() loss.backward() opt.step() if verbose: with torch.no_grad(): weights = self.net.get_weights() expr = pretty_print.network( weights, self.activation_funcs, var_names[:self.x_dim]) print(expr) equations[func_name].append(expr) train_losses[func_name].append(loss) # validate if val_func_names is not None: # Validation step for val_func_name in val_func_names: # deep copy self.net so that we don't see val functions during training model = copy.deepcopy(self.net) func = self.equation_dict[val_func_name] inputs, labels = generate_data(func, N_SUPPORT) # adapt loss = self.get_loss(model, inputs, labels) # bwd pass opt.zero_grad() loss.backward() opt.step() # eval inputs, labels = generate_data(func, N_QUERY) eql_val = self.get_loss(model, inputs, labels) val_losses[val_func_name].append(eql_val.item()) for func_name in func_names: # ----------------------------- write results to disk fi = open( os.path.join(self.results_dir, 'eq_summary_{}.txt'.format(func_name)), 'w') fi.write("\n{}\n".format(func_name)) for expr in equations[func_name]: fi.write("%s\n" % (str(expr))) fi.close() np.save( os.path.join(self.results_dir, 'train_curve_{}'.format(func_name)), train_losses[func_name]) for val_func_name in val_func_names: np.save( os.path.join(self.results_dir, 'val_curve_{}'.format(val_func_name)), val_losses[val_func_name])
def adapt(self, func, func_name='', verbose=False, equations=None, train_losses=None): if verbose: print("****adapting to function {}****".format(func_name)) # these should probably be command-line argument first_order = False allow_unused = False allow_nograd = False second_order = True x, y = generate_data(func, N_SUPPORT) inputs, labels = x, y # clone module and specify adaptation params learner = clone_module(self.net) diff_params = [p for p in learner.parameters() if p.requires_grad] # ---------------------------------begin learn2learn excerpt to compute gradients for _ in range(0, self.inner_steps): loss = self.get_loss(learner, inputs, labels) if allow_nograd: # Compute relevant gradients diff_params = [ p for p in learner.parameters() if p.requires_grad ] grad_params = grad(loss, diff_params, retain_graph=second_order, create_graph=second_order, allow_unused=allow_unused) gradients = [] grad_counter = 0 # Handles gradients for non-differentiable parameters for param in learner.parameters(): if param.requires_grad: gradient = grad_params[grad_counter] grad_counter += 1 else: gradient = None gradients.append(gradient) else: try: gradients = grad(loss, learner.parameters(), retain_graph=second_order, create_graph=second_order, allow_unused=allow_unused) except RuntimeError: traceback.print_exc() print( 'learn2learn: Maybe try with allow_nograd=True and/or allow_unused=True ?' ) # Update the module learner = self.maml_update(learner, self.inner_learning_rate, gradients) adapted_learner = learner # -------------------------------------------------------------------------------end learn2learn excerpt if verbose: with torch.no_grad(): weights = learner.get_weights() expr = pretty_print.network(weights, self.activation_funcs, var_names[:self.x_dim]) print(expr) if equations is not None: equations[func_name].append(expr) if train_losses is not None: train_losses[func_name].append(loss) return adapted_learner
def main(results_dir='results/sho/test', trials=20, learning_rate=1e-3, reg_weight=1e-3, timesteps=25, batch_size=128, n_epochs1=10001, n_epochs2=10001): # Hyperparameters summary_step = 1000 primitive_funcs = [ *[functions.Constant()] * 2, *[functions.Identity()] * 4, *[functions.Square()] * 4, *[functions.Sin()] * 2, *[functions.Exp()] * 2, *[functions.Sigmoid()] * 2, *[functions.Product(norm=0.1)] * 2, ] # Import parabola data data = np.load('dataset/sho.npz') x_d = np.asarray(data["x_d"]) x_v = np.asarray(data["x_v"]) y_d = np.asarray(data["y_d"]) y_v = np.asarray(data["y_v"]) omega2_data = data["omega2"] N = data["N"] # Prepare data x = np.stack((x_d, x_v), axis=2) # Shape (N, NT, 2) y0 = np.stack( (y_d[:, 0], y_v[:, 0]), axis=1) # Initial conditions for prediction y, fed into propagator y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2) # shape(NG, timesteps, 2) z_data = omega2_data[:, np.newaxis] # Tensorflow placeholders for x, y0, y x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input") y0_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="prop_input") # input is d, v y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input") length_input = tf.placeholder(dtype=tf.int32, shape=()) # Dynamics encoder encoder = helpers.Encoder(n_filters=[16, 16, 16, 16]) training = tf.placeholder_with_default(False, []) z = encoder(x_input, training=training) # Propagating decoders prop_d = SymbolicNetL0(2, funcs=primitive_funcs) prop_v = SymbolicNetL0(2, funcs=primitive_funcs) prop_d.build(4) prop_v.build(4) # Building recurrent structure rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v), return_sequences=True) y0_rnn = tf.concat([ tf.expand_dims(y0_input, axis=1), tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2)) ], axis=1) prop_input = tf.concat([ y0_rnn, tf.keras.backend.repeat(z, length_input), tf.ones((tf.shape(y0_input)[0], length_input, 1)) ], axis=2) y_hat = rnn(prop_input) length_list = [1, 2, 3, 4, 5, 7, 10, 15, 25] # Slowly increase the length of propagation # Training learning_rate_ph = tf.placeholder(tf.float32) opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph) reg_weight_ph = tf.placeholder(tf.float32) reg_loss = prop_d.get_loss() + prop_v.get_loss() error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=y_hat) loss = error + reg_weight_ph * reg_loss train = tf.group([opt.minimize(loss), encoder.bn.updates]) batch = helpers.batch_generator([x, y_data, y0, z_data], N=N, batch_size=batch_size) # Training session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: for _ in range(trials): loss_i = np.nan while np.isnan(loss_i): loss_list = [] error_list = [] reg_list = [] sess.run(tf.global_variables_initializer()) length_i = 1 for i in range(n_epochs1 + n_epochs2): if i < n_epochs1: lr_i = learning_rate else: lr_i = learning_rate / 10 x_batch, y_batch, y0_batch, z_batch = next(batch) feed_dict = { x_input: x_batch, y0_input: y0_batch, y_input: y_batch, learning_rate_ph: lr_i, training: True, reg_weight_ph: reg_weight, length_input: length_i } _ = sess.run(train, feed_dict=feed_dict) if i % summary_step == 0: feed_dict[training] = False loss_i, error_i, reg_i, z_arr = sess.run( (loss, error, reg_loss, z), feed_dict=feed_dict) r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0] loss_list.append(loss_i) error_list.append(error_i) reg_list.append(reg_i) print( "Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f\tCorrelation: %f" % (i, loss_i, error_i, reg_i, r)) if np.isnan(loss_i): break i_length = min(i // 1000, len(length_list) - 1) length_i = length_list[i_length] weights_d = sess.run(prop_d.get_weights()) expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1]) print(expr_d) weights_v = sess.run(prop_v.get_weights()) expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1]) print(expr_v) print("Done. Saving results.") # z_arr = sess.run(z, feed_dict=feed_dict) # Save results results = { "summary_step": summary_step, "learning_rate": learning_rate, "n_epochs1": n_epochs1, "reg_weight": reg_weight, "timesteps": timesteps, "weights_d": weights_d, "weights_v": weights_v, "loss_plot": loss_list, "error_plot": error_list, "reg_plot": reg_list, "expr_d": expr_d, "expr_v": expr_v } trial_dir = helpers.get_trial_path( results_dir) # Get directory in which to save trial results tf.saved_model.simple_save(sess, trial_dir, inputs={ "x": x_input, "y0": y0_input, "training": training }, outputs={ "z": z, "y": y_hat }) # Save a summary of the parameters and results with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f: pickle.dump(results, f) with open(os.path.join(results_dir, 'eq_summary.txt'), 'a') as f: f.write(str(expr_d) + "\n") f.write(str(expr_v) + "\n") f.write("Error: %f\n\n" % error_list[-1])
# torch.zeros(size=(width, width + n_double)), # torch.zeros(size=(width, 1))], # initial_weights=[torch.ones(size=(x_dim, width + n_double)), # kind of a hack for truncated normal # torch.ones(size=(width, width + n_double)), # torch.ones(size=(width, width + n_double)), # torch.ones(size=(width, 1))], # initial_weights=[torch.fmod(torch.normal(0, 1, size=(x_dim, width + n_double)), 2), # kind of a hack for truncated normal # torch.fmod(torch.normal(0, 1, size=(width, width + n_double)), 2), # torch.fmod(torch.normal(0, 1, size=(width, width + n_double)), 2), # torch.fmod(torch.normal(0, 1, size=(width, 1)), 2) # ] ) with torch.no_grad(): weights = sym.get_weights() expr = pretty_print.network(weights, activation_funcs, var_names[:x_dim]) print(expr) optimizer = torch.optim.Adam(sym.parameters(), lr=0.01) loss_func = torch.nn.MSELoss() y = func(x) for i in range(1000): yhat = sym(x) reg = torch.tensor(0.) for param in sym.parameters(): reg = reg + 0.01*torch.norm(param, 0.5) loss = loss_func(yhat, y) + reg optimizer.zero_grad() loss.backward() optimizer.step()
def train(self, func, func_name='', trials=1, func_dir='results/test'): """Train the network to find a given function""" x, y = generate_data(func, N_TRAIN) # x_val, y_val = generate_data(func, N_VAL) x_test, y_test = generate_data(func, N_TEST, range_min=DOMAIN_TEST[0], range_max=DOMAIN_TEST[1]) # Setting up the symbolic regression network x_dim = len(signature( func).parameters) # Number of input arguments to the function x_placeholder = tf.placeholder(shape=(None, x_dim), dtype=tf.float32) width = len(self.activation_funcs) n_double = functions.count_double(self.activation_funcs) sym = SymbolicNet(self.n_layers, funcs=self.activation_funcs, initial_weights=[ tf.truncated_normal([x_dim, width + n_double], stddev=init_sd_first), tf.truncated_normal([width, width + n_double], stddev=init_sd_middle), tf.truncated_normal([width, width + n_double], stddev=init_sd_middle), tf.truncated_normal([width, 1], stddev=init_sd_last) ]) # sym = SymbolicNet(self.n_layers, funcs=self.activation_funcs) y_hat = sym(x_placeholder) # Label and errors error = tf.losses.mean_squared_error(labels=y, predictions=y_hat) error_test = tf.losses.mean_squared_error(labels=y_test, predictions=y_hat) reg_loss = l12_smooth(sym.get_weights()) loss = error + self.reg_weight * reg_loss # Set up TensorFlow graph for training learning_rate = tf.placeholder(tf.float32) opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate) train = opt.minimize(loss) # Arrays to keep track of various quantities as a function of epoch loss_list = [] # Total loss (MSE + regularization) error_list = [] # MSE reg_list = [] # Regularization error_test_list = [] # Test error error_test_final = [] eq_list = [] # Only take GPU memory as needed - allows multiple jobs on a single GPU config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: for trial in range(trials): print("Training on function " + func_name + " Trial " + str(trial + 1) + " out of " + str(trials)) loss_val = np.nan # Restart training if loss goes to NaN (which happens when gradients blow up) while np.isnan(loss_val): sess.run(tf.global_variables_initializer()) t0 = time.time() # First stage of training, preceded by 0th warmup stage for i in range(self.n_epochs1 + 2000): if i < 2000: lr_i = self.learning_rate * 10 else: lr_i = self.learning_rate feed_dict = {x_placeholder: x, learning_rate: lr_i} _ = sess.run(train, feed_dict=feed_dict) if i % self.summary_step == 0: loss_val, error_val, reg_val, = sess.run( (loss, error, reg_loss), feed_dict=feed_dict) error_test_val = sess.run( error_test, feed_dict={x_placeholder: x_test}) print( "Epoch: %d\tTotal training loss: %f\tTest error: %f" % (i, loss_val, error_test_val)) loss_list.append(loss_val) error_list.append(error_val) reg_list.append(reg_val) error_test_list.append(error_test_val) if np.isnan( loss_val ): # If loss goes to NaN, restart training break t1 = time.time() # Masked network - weights below a threshold are set to 0 and frozen. This is the fine-tuning stage sym_masked = MaskedSymbolicNet(sess, sym) y_hat_masked = sym_masked(x_placeholder) error_masked = tf.losses.mean_squared_error( labels=y, predictions=y_hat_masked) error_test_masked = tf.losses.mean_squared_error( labels=y_test, predictions=y_hat_masked) train_masked = opt.minimize(error_masked) # 2nd stage of training t2 = time.time() for i in range(self.n_epochs2): feed_dict = { x_placeholder: x, learning_rate: self.learning_rate / 10 } _ = sess.run(train_masked, feed_dict=feed_dict) if i % self.summary_step == 0: loss_val, error_val = sess.run( (loss, error_masked), feed_dict=feed_dict) error_test_val = sess.run( error_test_masked, feed_dict={x_placeholder: x_test}) print( "Epoch: %d\tTotal training loss: %f\tTest error: %f" % (i, loss_val, error_test_val)) loss_list.append(loss_val) error_list.append(error_val) error_test_list.append(error_test_val) if np.isnan( loss_val ): # If loss goes to NaN, restart training break t3 = time.time() tot_time = t1 - t0 + t3 - t2 print(tot_time) # Print the expressions weights = sess.run(sym_masked.get_weights()) expr = pretty_print.network(weights, self.activation_funcs, var_names[:x_dim]) print(expr) # Save results trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial) results = { "weights": weights, "loss_list": loss_list, "error_list": error_list, "reg_list": reg_list, "error_test": error_test_list, "expr": expr, "runtime": tot_time } with open(trial_file, "wb+") as f: pickle.dump(results, f) error_test_final.append(error_test_list[-1]) eq_list.append(expr) return eq_list, error_test_final