def train_with_goals(noise=0, iterations=10000, learning_rate=0.1): model = nn.ElmanGoalNet(size_hidden=15, size_observation=9, size_action=8, size_goal1=2, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = 0. rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities( pnas2018task.sequence_probabilities) goal = pnas2018task.goals[seqid] sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) # Add noise model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) loss, _ = model.train_obsolete(targets, goal, None, tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model
def train(model=None, noise=0., iterations=5000, l1reg=0.0, l2reg= 0.0, algorithm=nn.SGD, size_hidden=15, learning_rate=None, loss_type='cross_entropy', initial_context=pnas2018.ZEROS): if model is None: model = nn.ElmanGoalNet(size_hidden=size_hidden, size_observation=len(rewardtask.all_inputs), size_action=len(rewardtask.all_outputs), size_goal1=0, size_goal2=0, algorithm=algorithm, initialization="normal") num_episodes = iterations if learning_rate is not None: # Else keep the model's learning rate model.learning_rate = learning_rate model.L1_regularization = l1reg model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_sequence = 0. for episode in range(num_episodes): model.new_episode(initial_context=initial_context) seqid = utils.idx_from_probabilities(rewardtask.sequence_probabilities) sequence = rewardtask.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs) # run the network with tf.GradientTape(persistent=True) as tape: for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) #if episode % 2 == 0: # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape((-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. if loss_type==pnas2018.MSE: loss, _ = model.train_MSE(targets, None, None, tape) elif loss_type==pnas2018.CROSS_ENTROPY: loss, _ = model.train_obsolete(targets, None, None, tape) else: loss, _ = model.train(tape, targets) del tape #if episode % 2 == 0: # Monitor progress using rolling averages. speed = 2. / (episode + 2) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_sequence = utils.rolling_avg(rng_avg_sequence, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_sequence)) return model, rng_avg_sequence
def train_predictive_net(model=None, iterations=5000, learning_rate=0.1, algorithm=nn.RMSPROP, hidden_units=15): if model is None: model = nn.PredictiveNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(pnas2018task.all_inputs), size_action=len(pnas2018task.all_outputs)) num_episodes = iterations model.learning_rate = learning_rate rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_full_seq = 0. rng_avg_preds = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities(pnas2018task.sequence_probabilities) sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) action_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) prediction_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_inputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) model.prediction_linear = np.zeros((1, model.size_observation), dtype=np.float32) #initial prediction = 0 # run the network # Initialize context with random/uniform values. with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) for i in range(len(action_targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) #model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape((-1, len(action_targets[0]))) # reshape to (x, 8) ratios = scripts.evaluate([tchoices], [action_targets]) tpreds = np.array(model.h_prediction_wta).reshape((-1, len(prediction_targets[0]))) ratios_predictions = scripts.evaluate([tpreds], [prediction_targets]) # Train model, record loss. NOTE: targets and predictions are identical for this task!!! loss, gradients = model.train(tape, [action_targets, prediction_targets]) # Monitor progress using rolling averages. speed = 2. / (episode + 2) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_preds = utils.rolling_avg(rng_avg_preds, ratios_predictions[0], speed) rng_avg_full_seq = utils.rolling_avg(rng_avg_full_seq, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: grad_avg = sum([np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients])/sum([tf.size(gradient).numpy() for gradient in gradients]) grad_max = max([np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients]) print("{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tpredictions={4}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, rng_avg_preds)) return model
def accuracy_test_with_goals(model, test_number=None): hidden_activation = [] all_choices = [] for j, sequence in enumerate(seqs): goal = goals[j] seq_choices = [] all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] # Reset the previous action for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation.append(model.context) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) model.h_action_wta.clear() seq_choices.append(choice) # Now evaluate accuracy: accuracy = np.zeros((len(seq1) - 1)) accuracy_weighted = np.zeros((len(seq1) - 1)) for i in range(len(all_choices)): targets = utils.liststr_to_onehot(seqs[i][1:], all_outputs) for j in range(len(targets)): if (all_choices[i][0][j] == targets[j]).all(): accuracy_weighted[j] += 1 * sequence_probabilities[i] accuracy[j] += 1 / len(all_choices) optimal = np.array_equal(accuracy_weighted, optimal_accuracy_goals) if test_number is None: print(accuracy, accuracy_weighted, optimal) else: print("{0} ({1}) - network {2} -- {3}".format(accuracy, accuracy_weighted, test_number, optimal)) if not optimal: for i in range(len(seqs)): print([ utils.onehot_to_str(all_choices[i][0][j], all_outputs) for j in range(len(targets)) ]) return hidden_activation, optimal
def accuracy_test_deepprednet(model, name=None, noise=0.): hidden_activation1 = [] hidden_activation2 = [] all_choices = [] for sequence in pnas2018task.seqs: model.new_episode() seq_choices = [] all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) #model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. #model.context = np.zeros((1, model.size_hidden), dtype=np.float32) # Reset the previous action for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation1.append(model.context1) hidden_activation2.append(model.context2) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) model.h_action_wta.clear() seq_choices.append(choice) # Now evaluate accuracy: accuracy_totals = np.zeros((len(pnas2018task.seq1) - 1)) for i in range(len(all_choices)): targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:], pnas2018task.all_outputs) for j in range(len(targets)): if (all_choices[i][0][j] == targets[j]).all(): accuracy_totals[j] += 1 accuracy_totals /= 4 if name is not None: print(name, accuracy_totals) else: print(accuracy_totals) return hidden_activation1, hidden_activation2, accuracy_totals
def accuracy_test_reg_hierarchy_nogoals(model, model_num=None): hidden_activation = [] all_choices = [] for j, sequence in enumerate(pnas2018task.seqs): #goal = goals[j] seq_choices = [] all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) #model.goal1 = np.zeros_like(goal[0]) # Reset the previous action for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation.append(model.context) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) model.h_action_wta.clear() seq_choices.append(choice) # Now evaluate accuracy: accuracy_totals = np.zeros((len(pnas2018task.seq1) - 1)) for i in range(len(all_choices)): targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:], pnas2018task.all_outputs) for j in range(len(targets)): if (all_choices[i][0][j] == targets[j]).all(): accuracy_totals[j] += 1 accuracy_totals /= 4 if model_num is not None: print(model_num, accuracy_totals) else: print(accuracy_totals) return hidden_activation
def accuracy_test(model, name=None, noise=0., initial_context=pnas2018.ZEROS): hidden_activation = [] all_choices = [] for sequence in rewardtask.seqs: seq_choices = [] all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: model.new_episode(initial_context=initial_context) # Reset the previous action for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation.append(model.context) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape((-1, len(targets[0]))) model.h_action_wta.clear() seq_choices.append(choice) # Now evaluate accuracy: accuracy_totals = np.zeros((len(rewardtask.seq1) - 1)) for i in range(len(all_choices)): targets = utils.liststr_to_onehot(rewardtask.seqs[i][1:], rewardtask.all_outputs) for j in range(len(targets)): if (all_choices[i][0][j] == targets[j]).all(): accuracy_totals[j] += 1 accuracy_totals /= 4 if name is not None: print(name, accuracy_totals) else: print(accuracy_totals) return hidden_activation, accuracy_totals
def test_one_sequence(model, sequence_num, turn_goal_step=None, goal_to_turn=None): hidden_activation = [] all_choices = [] results = [] for trials in range(100): sequence = pnas2018task.seqs[sequence_num] goal = pnas2018task.goals[sequence_num] seq_choices = [] all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.float32( np.abs( np.random.randint(0, 2, (1, model.size_hidden)) - 0.1)) # np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] # Reset the previous action for i in range(len(targets)): if i == turn_goal_step: model.goal1 = goal_to_turn model.action = np.zeros((1, model.size_action), dtype=np.float32) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation.append(model.context) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) model.clear_history() results.append(choice) # Now, count the number of unique result sequences, and the number of occurences for each unique sequence unique_results = [] unique_results_counts = [] for result in results: unique = True for i, unique_result in enumerate(unique_results): if np.array_equal(result, unique_result): unique_results_counts[i] += 1 unique = False break if unique: unique_results.append(result) unique_results_counts.append(1) # Sort in order of frequency unique_results = [ unique_result for (_, unique_result ) in sorted(zip(unique_results_counts, unique_results), key=lambda pair: pair[0], reverse=True) ] unique_results_counts = sorted(unique_results_counts, reverse=True) # Print the target sequence full_sequence_str = "" for row in targets: full_sequence_str += utils.onehot_to_str( row, pnas2018task.all_outputs) + "; " print("target: " + full_sequence_str) # Now print the results for i, unique_result in enumerate(unique_results): full_sequence_str = "" for row in unique_result: full_sequence_str += utils.onehot_to_str( row, pnas2018task.all_outputs) + "; " print(str(unique_results_counts[i]) + "%: " + full_sequence_str) return hidden_activation
def accuracy_test_keepcontext(model, name=None, num_samples=100): num_actions = len(pnas2018task.seq1) - 1 # List of 4 lists of 6 empty lists hidden_activation = [[[] for _ in range(num_actions)] for _ in range(len(pnas2018task.seqs))] all_choices = [] # Make a list of 400 sequences sequences = [0, 1, 2, 3] * num_samples random.shuffle(sequences) for seq_id in sequences: sequence = pnas2018task.seqs[seq_id] seq_choices = [] all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: model.new_episode() # Reset the previous action for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) #model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation[seq_id][i].append(model.context.numpy()) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) model.h_action_wta.clear() seq_choices.append(choice) hidden_activations_averaged = [] # Average the hidden activations per action for seq in hidden_activation: for action in seq: average = np.zeros_like(action[0]) for instance in action: average += instance average /= len(action) hidden_activations_averaged.append(average) # Now evaluate accuracy: accuracy_totals = np.zeros(num_actions) for i in range(len(all_choices)): targets = utils.liststr_to_onehot(pnas2018task.seqs[sequences[i]][1:], pnas2018task.all_outputs) for j in range(len(targets)): if (all_choices[i][0][j] == targets[j]).all(): accuracy_totals[j] += 1 accuracy_totals /= 4 * num_samples if name is not None: print(name, accuracy_totals) else: print(accuracy_totals) return hidden_activations_averaged, accuracy_totals
def accuracy_test_predictive(model, test_number=None): hidden_activation = [] all_choices = [] all_predictions = [] for sequence in pnas2018task.seqs: seq_choices = [] seq_predictions = [] all_predictions.append(seq_predictions) all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) action_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) prediction_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_inputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.prediction_linear = np.zeros((1, model.size_observation), dtype=np.float32) #initial prediction = 0, # Reset the previous action for i in range(len(action_targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) observation = inputs[i].reshape(1, -1) model.feedforward(observation) hidden_activation.append(model.context) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape((-1, len(action_targets[0]))) prediction = np.array(model.h_prediction_wta).reshape((-1, len(prediction_targets[0]))) model.h_action_wta.clear() model.h_prediction_wta.clear() seq_choices.append(choice) seq_predictions.append(prediction) # Now evaluate accuracy: optimal_accuracy = np.asarray([.5, .5, 1., 1., 1., 1.]) accuracy = np.zeros((len(pnas2018task.seq1) - 1)) accuracy_weighted = np.zeros((len(pnas2018task.seq1) - 1)) for i in range(len(all_choices)): action_targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:], pnas2018task.all_outputs) for j in range(len(action_targets)): if (all_choices[i][0][j] == action_targets[j]).all(): accuracy_weighted[j] += 1 * pnas2018task.sequence_probabilities[i] accuracy[j] += 1/len(all_choices) optimal_actions = np.array_equal(accuracy_weighted, optimal_accuracy) optimal_accuracy_preds = [.5, .5, 1, 1, 1, 1] accuracy_preds = np.zeros((len(pnas2018task.seq1) - 1)) accuracy_preds_weighted = np.zeros((len(pnas2018task.seq1) - 1)) for i in range(len(all_predictions)): prediction_targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:], pnas2018task.all_inputs) for j in range(len(prediction_targets)): if (all_predictions[i][0][j] == prediction_targets[j]).all(): accuracy_preds_weighted[j] += 1 * pnas2018task.sequence_probabilities[i] accuracy_preds[j] += 1/len(all_predictions) optimal_predictions = np.array_equal(accuracy_preds_weighted, optimal_accuracy_preds) if test_number is None: print(accuracy, accuracy_weighted, optimal_actions, accuracy_preds, accuracy_preds_weighted, optimal_predictions) else: print("Actions: {0} ({1}) - network {2} -- {3}".format(accuracy, accuracy_weighted, test_number, optimal_actions)) if not optimal_actions or not optimal_predictions: print("actions:") for i in range(len(pnas2018task.seqs)): print([utils.onehot_to_str(all_choices[i][0][j], pnas2018task.all_outputs) for j in range(len(action_targets))]) print("predictions:") for i in range(len(pnas2018task.seqs)): print([utils.onehot_to_str(all_predictions[i][0][j], pnas2018task.all_inputs) for j in range(len(prediction_targets))]) return hidden_activation, optimal_actions and optimal_predictions
def accuracy_test_predictive(model, test_number=None, type='sigmoid'): inputs_str = [ "start", "coffee", "milk", "cream", "water", "stir", "tea", "serve", "sugar", "end" ] outputs_str = [ "start", "coffee", "milk", "cream", "water", "stir", "tea", "servetea", "servecoffee", "sugar", "end" ] seq1in = ['start', 'coffee', 'water', 'stir', 'cream', 'serve', 'end'] # 60% seq1t = [ 'start', 'coffee', 'water', 'stir', 'cream', 'servecoffee', 'end' ] # 60% seq2in = ['start', 'coffee', 'water', 'stir', 'milk', 'serve', 'end'] # 20% seq2t = ['start', 'coffee', 'water', 'stir', 'milk', 'servecoffee', 'end'] # 20% seq3in = ['start', 'tea', 'water', 'stir', 'sugar', 'serve', 'end'] # 20% seq3t = ['start', 'tea', 'water', 'stir', 'sugar', 'servetea', 'end'] # 20% inputs_seqs = [seq1in, seq2in, seq3in] target_seqs = [seq1t, seq2t, seq3t] hidden_activation = [] all_choices = [] all_predictions = [] for i in range(len(inputs_seqs)): sequence_i = inputs_seqs[i] sequence_t = target_seqs[i] seq_choices = [] seq_predictions = [] all_predictions.append(seq_predictions) all_choices.append(seq_choices) inputs = utils.liststr_to_onehot(sequence_i[:-1], inputs_str) action_targets = utils.liststr_to_onehot(sequence_t[:-1], outputs_str) prediction_targets = utils.liststr_to_onehot(sequence_i[1:], inputs_str) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.prediction_linear = np.zeros( (1, model.size_observation), dtype=np.float32) #initial prediction = 0, # Reset the previous action for i in range(len(action_targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) observation = inputs[i].reshape(1, -1) model.feedforward(observation, type) hidden_activation.append(model.context) # Get some statistics about what was correct and what wasn't choice = np.array(model.h_action_wta).reshape( (-1, len(action_targets[0]))) prediction = np.array(model.h_prediction_wta).reshape( (-1, len(prediction_targets[0]))) model.h_action_wta.clear() model.h_prediction_wta.clear() seq_choices.append(choice) seq_predictions.append(prediction) # Now evaluate accuracy: optimal_accuracy = np.asarray([1., 1., 1., 1., 1., 1.]) accuracy = np.zeros((len(seq1) - 1)) accuracy_weighted = np.zeros((len(seq1) - 1)) for i in range(len(all_choices)): action_targets = utils.liststr_to_onehot(target_seqs[i][:-1], outputs_str) for j in range(len(action_targets)): if (all_choices[i][0][j] == action_targets[j]).all(): accuracy_weighted[j] += 1 * sequence_probabilities[i] accuracy[j] += 1 / len(all_choices) optimal_actions = np.array_equal(accuracy_weighted, optimal_accuracy) optimal_accuracy_preds = [.8, 1, 1, .8, 1, 1] accuracy_preds = np.zeros((len(seq1) - 1)) accuracy_preds_weighted = np.zeros((len(seq1) - 1)) for i in range(len(all_predictions)): prediction_targets = utils.liststr_to_onehot(inputs_seqs[i][1:], inputs_str) for j in range(len(prediction_targets)): if (all_predictions[i][0][j] == prediction_targets[j]).all(): accuracy_preds_weighted[j] += 1 * sequence_probabilities[i] accuracy_preds[j] += 1 / len(all_predictions) optimal_predictions = np.array_equal(accuracy_preds_weighted, optimal_accuracy_preds) if test_number is None: print(accuracy, accuracy_weighted, optimal_actions, accuracy_preds, accuracy_preds_weighted, optimal_predictions) else: print("Actions: {0} ({1}) - network {2} -- {3}".format( accuracy, accuracy_weighted, test_number, optimal_actions and optimal_predictions)) if not optimal_actions or not optimal_predictions: print("actions:") for i in range(len(seqs)): print([ utils.onehot_to_str(all_choices[i][0][j], outputs_str) for j in range(len(action_targets)) ]) print("predictions:") for i in range(len(seqs)): print([ utils.onehot_to_str(all_predictions[i][0][j], inputs_str) for j in range(len(prediction_targets)) ]) return hidden_activation, optimal_actions and optimal_predictions
def train_with_goals(model=None, mse=False, learning_rate=0.1, noise=0., iterations=5000, l2reg=0.0, algorithm=nn.SGD, hidden_units=15, reg_strength=0., reg_increase="square"): num_goals = 2 if model is None: model = nn.ElmanGoalNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(all_inputs), size_action=len(all_inputs), size_goal1=num_goals, size_goal2=0) num_episodes = iterations model.learning_rate = 0.5 if mse else learning_rate model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): decider = np.random.uniform() if decider < 0.6: seqid = 0 elif decider < 0.8: seqid = 1 else: seqid = 2 sequence = seqs[seqid] goal = goals[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) targets_goal1 = goal model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) cols = model.size_hidden # Regularization in the hidden layer weights # Recurrent hidden to hidden connections extra_loss = pnashierarchy.weight_regularization_calculator( model.hidden_layer.w, [0, model.size_hidden], [0, cols], reg_strength, reg_type="recurrent", reg_increase=reg_increase) # Prev action to hidden # extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9, model.size_hidden+9+model.size_action], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) # Prev goal to hidden extra_loss += pnashierarchy.weight_regularization_calculator( model.hidden_layer.w, [ model.size_hidden + 9 + model.size_action, model.size_hidden + 9 + model.size_action + num_goals ], [0, cols], reg_strength, reg_type="input_left", reg_increase=reg_increase) # SWITCHED OUTPUT LEFT AND OUTPUT RIGHT. #Regularization in the output layers (goals and actions) weights # hidden to next action extra_loss += pnashierarchy.weight_regularization_calculator( model.action_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_right", reg_increase=reg_increase) # Hidden to next goal extra_loss += pnashierarchy.weight_regularization_calculator( model.goal1_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_left", reg_increase=reg_increase) # Regularization of the observation (only goes to the action side) #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden, model.size_hidden+model.size_observation], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) loss, _ = model.train_obsolete(targets, goal, None, tape, extra_loss) # Train model, record loss. #if mse: # loss = model.train_MSE(targets, None, None, tape) #else: # loss, gradients = model.train_obsolete(targets, targets_goal1, None, tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model
def train(model=None, mse=False, noise=0., iterations=5000, l2reg=0.0, learning_rate=0.1, algorithm=nn.SGD, hidden_units=15): if model is None: model = nn.ElmanGoalNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(all_inputs), size_action=len(all_inputs), size_goal1=0, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_full_seq = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities(sequence_probabilities) sequence = seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network # Initialize context with random/uniform values. with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. if mse: loss, gradients = model.train_MSE(targets, None, None, tape) else: loss, gradients = model.train_obsolete(targets, None, None, tape) # Monitor progress using averages speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_full_seq = utils.rolling_avg( rng_avg_full_seq, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: grad_avg = sum([ np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients ]) / sum([tf.size(gradient).numpy() for gradient in gradients]) grad_max = max([ np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients ]) print( "{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tgrad_avg={4}, \tgrad_max={5}" .format(episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, grad_avg, grad_max)) return model
def train_hierarchical_nogoals(noise=0, iterations=10000, learning_rate=0.1, reg_strength=0.001, reg_increase="linear"): model = nn.ElmanGoalNet(size_hidden=15, size_observation=9, size_action=8, size_goal1=0, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = 0. rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): model.new_episode() seqid = utils.idx_from_probabilities( pnas2018task.sequence_probabilities) #goal = pnas2018task.goals[seqid] sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. #model.context = np.zeros((1, model.size_hidden), dtype=np.float32) #model.goal1 = np.zeros_like(goal[0]) for i in range(len(targets)): #model.action = np.zeros((1, model.size_action), dtype=np.float32) # Add noise model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. cols = model.size_hidden # Regularization in the hidden layer weights # Recurrent hidden to hidden connections extra_loss = utils.weight_regularization_calculator( model.hidden_layer.w, [0, model.size_hidden], [0, cols], reg_strength, reg_type="recurrent", reg_increase=reg_increase) # Prev action to hidden #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9, model.size_hidden+9+model.size_action], [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) # Prev goal to hidden #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9+model.size_action, model.size_hidden+9+model.size_action+2], [0, cols], # reg_strength, reg_type="input_left", reg_increase=reg_increase) #Regularization in the output layers (goals and actions) weights # hidden to next action extra_loss += utils.weight_regularization_calculator( model.action_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_right", reg_increase=reg_increase) # Hidden to next goal #extra_loss += weight_regularization_calculator(model.goal1_layer.w, # [0, model.size_hidden], [0, model.size_action], # reg_strength, reg_type="output_left", reg_increase=reg_increase) # Regularization of the observation (only goes to the action side) #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden, model.size_hidden+model.size_observation], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) loss, _ = model.train_obsolete(targets, None, None, tape, extra_loss) #if(episode%100 == 0): # print(loss.numpy()-extra_loss.numpy(), extra_loss.numpy()) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model