def train_and_save(num_models, name, num_episodes): for i in range(num_models): model = nn.ElmanGoalNet() train_supervised_teacoffeeenv(model, num_episodes) utils.save_object(name, model) print('Trained and saved model #{0} of {1}\n'.format( i + 1, num_models))
def train_with_goals(noise=0, iterations=10000, learning_rate=0.1): model = nn.ElmanGoalNet(size_hidden=15, size_observation=9, size_action=8, size_goal1=2, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = 0. rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities( pnas2018task.sequence_probabilities) goal = pnas2018task.goals[seqid] sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) # Add noise model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) loss, _ = model.train_obsolete(targets, goal, None, tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model
def train(model=None, noise=0., iterations=5000, l1reg=0.0, l2reg= 0.0, algorithm=nn.SGD, size_hidden=15, learning_rate=None, loss_type='cross_entropy', initial_context=pnas2018.ZEROS): if model is None: model = nn.ElmanGoalNet(size_hidden=size_hidden, size_observation=len(rewardtask.all_inputs), size_action=len(rewardtask.all_outputs), size_goal1=0, size_goal2=0, algorithm=algorithm, initialization="normal") num_episodes = iterations if learning_rate is not None: # Else keep the model's learning rate model.learning_rate = learning_rate model.L1_regularization = l1reg model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_sequence = 0. for episode in range(num_episodes): model.new_episode(initial_context=initial_context) seqid = utils.idx_from_probabilities(rewardtask.sequence_probabilities) sequence = rewardtask.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs) # run the network with tf.GradientTape(persistent=True) as tape: for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) #if episode % 2 == 0: # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape((-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. if loss_type==pnas2018.MSE: loss, _ = model.train_MSE(targets, None, None, tape) elif loss_type==pnas2018.CROSS_ENTROPY: loss, _ = model.train_obsolete(targets, None, None, tape) else: loss, _ = model.train(tape, targets) del tape #if episode % 2 == 0: # Monitor progress using rolling averages. speed = 2. / (episode + 2) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_sequence = utils.rolling_avg(rng_avg_sequence, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_sequence)) return model, rng_avg_sequence
def run_model1_ari(): # ARI # num_training_steps = 10000 nnet = nn.ElmanGoalNet(size_hidden=15, initialization=nn.UNIFORM, size_goal1=0, size_goal2=0, size_observation=len(task.symbols), size_action=len(task.symbols), learning_rate=0.01, algorithm=nn.ADAM) nnet.L2_regularization = 0.00001 train_ari(nnet, num_training_steps) utils.save_object("cogloadtasknet_ari", nnet) nnet = utils.load_object("cogloadtasknet_ari") generate_rdm_ari(nnet, name="cogloadtasknet_ari")
def train_with_goals(model=None, mse=False, learning_rate=0.1, noise=0., iterations=5000, l2reg=0.0, algorithm=nn.SGD, hidden_units=15, reg_strength=0., reg_increase="square"): num_goals = 2 if model is None: model = nn.ElmanGoalNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(all_inputs), size_action=len(all_inputs), size_goal1=num_goals, size_goal2=0) num_episodes = iterations model.learning_rate = 0.5 if mse else learning_rate model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): decider = np.random.uniform() if decider < 0.6: seqid = 0 elif decider < 0.8: seqid = 1 else: seqid = 2 sequence = seqs[seqid] goal = goals[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) targets_goal1 = goal model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. model.context = np.zeros((1, model.size_hidden), dtype=np.float32) model.goal1 = goal[0] for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) cols = model.size_hidden # Regularization in the hidden layer weights # Recurrent hidden to hidden connections extra_loss = pnashierarchy.weight_regularization_calculator( model.hidden_layer.w, [0, model.size_hidden], [0, cols], reg_strength, reg_type="recurrent", reg_increase=reg_increase) # Prev action to hidden # extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9, model.size_hidden+9+model.size_action], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) # Prev goal to hidden extra_loss += pnashierarchy.weight_regularization_calculator( model.hidden_layer.w, [ model.size_hidden + 9 + model.size_action, model.size_hidden + 9 + model.size_action + num_goals ], [0, cols], reg_strength, reg_type="input_left", reg_increase=reg_increase) # SWITCHED OUTPUT LEFT AND OUTPUT RIGHT. #Regularization in the output layers (goals and actions) weights # hidden to next action extra_loss += pnashierarchy.weight_regularization_calculator( model.action_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_right", reg_increase=reg_increase) # Hidden to next goal extra_loss += pnashierarchy.weight_regularization_calculator( model.goal1_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_left", reg_increase=reg_increase) # Regularization of the observation (only goes to the action side) #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden, model.size_hidden+model.size_observation], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) loss, _ = model.train_obsolete(targets, goal, None, tape, extra_loss) # Train model, record loss. #if mse: # loss = model.train_MSE(targets, None, None, tape) #else: # loss, gradients = model.train_obsolete(targets, targets_goal1, None, tape) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model
def train(model=None, mse=False, noise=0., iterations=5000, l2reg=0.0, learning_rate=0.1, algorithm=nn.SGD, hidden_units=15): if model is None: model = nn.ElmanGoalNet(size_hidden=hidden_units, algorithm=algorithm, size_observation=len(all_inputs), size_action=len(all_inputs), size_goal1=0, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = l2reg rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_full_seq = 0. for episode in range(num_episodes): seqid = utils.idx_from_probabilities(sequence_probabilities) sequence = seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs) targets = utils.liststr_to_onehot(sequence[1:], all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network # Initialize context with random/uniform values. with tf.GradientTape() as tape: model.context = np.zeros((1, model.size_hidden), dtype=np.float32) for i in range(len(targets)): model.action = np.zeros((1, model.size_action), dtype=np.float32) model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. if mse: loss, gradients = model.train_MSE(targets, None, None, tape) else: loss, gradients = model.train_obsolete(targets, None, None, tape) # Monitor progress using averages speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_full_seq = utils.rolling_avg( rng_avg_full_seq, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: grad_avg = sum([ np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients ]) / sum([tf.size(gradient).numpy() for gradient in gradients]) grad_max = max([ np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients ]) print( "{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tgrad_avg={4}, \tgrad_max={5}" .format(episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, grad_avg, grad_max)) return model
def run_model3_multiple(stopping_params, nnparams, blanks, from_file=None, num_networks=1, name="model3", hrp=None): if from_file is not None: networks = utils.load_objects(from_file, num_networks) else: networks = [] for i in range(num_networks): nnparams.size_goal1 = 2 nnparams.size_action = len(task.output_symbols) nnparams.size_observation = len(task.input_symbols) nnet = nn.ElmanGoalNet(params=nnparams) train_all(stopping_params, nnet, hrp=hrp, blanks=blanks) utils.save_object(name, nnet) networks.append(nnet) # Print some stuff hidden_activation, accuracy_totals, accuracy_fullseqs = test_network_all( nnet) print("network {0}: ") print(accuracy_totals) print(accuracy_fullseqs) # pattern of sequences, for the mds pattern = [6] * 4 + [6] * 4 + [12] * 4 if hrp is None: sum_rdm = None labels = None for net in networks: rdm, labels = generate_rdm_all(net, name=name, from_file=False) if sum_rdm is None: sum_rdm = rdm else: sum_rdm += rdm average_rdm = sum_rdm / num_networks # Save it utils.save_rdm(average_rdm, name, labels, title="RDM training combined") analysis.make_mds(average_rdm, name, labels=labels, title="MDS training combined", pattern=pattern) else: sum_rdm_left = sum_rdm_right = None labels = None for net in networks: rdmleft, rdmright, labels = generate_rdm_all_gradient( net, name=name, blanks=blanks, from_file=False, delete_blank_states=True) if sum_rdm_left is None: sum_rdm_left = rdmleft sum_rdm_right = rdmright else: sum_rdm_left += rdmleft sum_rdm_right += rdmright average_rdm_left = sum_rdm_left / num_networks average_rdm_right = sum_rdm_right / num_networks utils.save_rdm(average_rdm_left, name + "left", labels, title="RDM training combined: left (goals)", fontsize=1.) utils.save_rdm(average_rdm_right, name + "right", labels, title="RDM training combined: right (actions)", fontsize=1.) analysis.make_mds(average_rdm_left, name + "left", labels=labels, title="MDS training combined: left (goals)", pattern=pattern) analysis.make_mds(average_rdm_right, name + "right", labels=labels, title="MDS training combined: right (actions)", pattern=pattern)
def train_hierarchical_nogoals(noise=0, iterations=10000, learning_rate=0.1, reg_strength=0.001, reg_increase="linear"): model = nn.ElmanGoalNet(size_hidden=15, size_observation=9, size_action=8, size_goal1=0, size_goal2=0) num_episodes = iterations model.learning_rate = learning_rate model.L2_regularization = 0. rng_avg_loss = 0. rng_avg_actions = 0. rng_avg_goals = 0. for episode in range(num_episodes): model.new_episode() seqid = utils.idx_from_probabilities( pnas2018task.sequence_probabilities) #goal = pnas2018task.goals[seqid] sequence = pnas2018task.seqs[seqid] inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs) targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs) model.action = np.zeros((1, model.size_action), dtype=np.float32) # run the network with tf.GradientTape() as tape: # Initialize context with random/uniform values. #model.context = np.zeros((1, model.size_hidden), dtype=np.float32) #model.goal1 = np.zeros_like(goal[0]) for i in range(len(targets)): #model.action = np.zeros((1, model.size_action), dtype=np.float32) # Add noise model.context += np.float32( np.random.normal(0., noise, size=(1, model.size_hidden))) observation = inputs[i].reshape(1, -1) model.feedforward(observation) # Get some statistics about what was correct and what wasn't tchoices = np.array(model.h_action_wta).reshape( (-1, len(targets[0]))) ratios = scripts.evaluate([tchoices], [targets]) # Train model, record loss. cols = model.size_hidden # Regularization in the hidden layer weights # Recurrent hidden to hidden connections extra_loss = utils.weight_regularization_calculator( model.hidden_layer.w, [0, model.size_hidden], [0, cols], reg_strength, reg_type="recurrent", reg_increase=reg_increase) # Prev action to hidden #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9, model.size_hidden+9+model.size_action], [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) # Prev goal to hidden #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden+9+model.size_action, model.size_hidden+9+model.size_action+2], [0, cols], # reg_strength, reg_type="input_left", reg_increase=reg_increase) #Regularization in the output layers (goals and actions) weights # hidden to next action extra_loss += utils.weight_regularization_calculator( model.action_layer.w, [0, model.size_hidden], [0, model.size_action], reg_strength, reg_type="output_right", reg_increase=reg_increase) # Hidden to next goal #extra_loss += weight_regularization_calculator(model.goal1_layer.w, # [0, model.size_hidden], [0, model.size_action], # reg_strength, reg_type="output_left", reg_increase=reg_increase) # Regularization of the observation (only goes to the action side) #extra_loss += weight_regularization_calculator(model.hidden_layer.w, # [model.size_hidden, model.size_hidden+model.size_observation], # [0, cols], # reg_strength, reg_type="input_right", reg_increase=reg_increase) loss, _ = model.train_obsolete(targets, None, None, tape, extra_loss) #if(episode%100 == 0): # print(loss.numpy()-extra_loss.numpy(), extra_loss.numpy()) # Monitor progress using rolling averages. speed = 2. / ( episode + 2 ) if episode < 1000 else 0.001 # enables more useful evaluations for early trials rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed) rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed) rng_avg_goals = utils.rolling_avg( rng_avg_goals, ratios[0] == 1, speed) # whole action sequence correct ? # Display on the console at regular intervals if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \ or episode + 1 == num_episodes: print( "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format( episode, rng_avg_loss, rng_avg_actions, rng_avg_goals)) return model