예제 #1
0
def train_with_goals(noise=0, iterations=10000, learning_rate=0.1):
    model = nn.ElmanGoalNet(size_hidden=15,
                            size_observation=9,
                            size_action=8,
                            size_goal1=2,
                            size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = 0.

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(
            pnas2018task.sequence_probabilities)

        goal = pnas2018task.goals[seqid]
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                # Add noise
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            loss, _ = model.train_obsolete(targets, goal, None, tape)
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model
예제 #2
0
def train(model=None, noise=0., iterations=5000, l1reg=0.0, l2reg= 0.0, algorithm=nn.SGD,
          size_hidden=15, learning_rate=None, loss_type='cross_entropy',
          initial_context=pnas2018.ZEROS):
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=size_hidden, size_observation=len(rewardtask.all_inputs),
                                size_action=len(rewardtask.all_outputs), size_goal1=0, size_goal2=0,
                                algorithm=algorithm, initialization="normal")
    num_episodes = iterations
    if learning_rate is not None:  # Else keep the model's learning rate
        model.learning_rate = learning_rate
    model.L1_regularization = l1reg
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_sequence = 0.

    for episode in range(num_episodes):
        model.new_episode(initial_context=initial_context)
        seqid = utils.idx_from_probabilities(rewardtask.sequence_probabilities)

        sequence = rewardtask.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs)
        # run the network
        with tf.GradientTape(persistent=True) as tape:
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            #if episode % 2 == 0:
                # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape((-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            # Train model, record loss.
            if loss_type==pnas2018.MSE:
                loss, _ = model.train_MSE(targets, None, None, tape)
            elif loss_type==pnas2018.CROSS_ENTROPY:
                loss, _ = model.train_obsolete(targets, None, None, tape)
            else:
                loss, _ = model.train(tape, targets)
        del tape

        #if episode % 2 == 0:
            # Monitor progress using rolling averages.
        speed = 2. / (episode + 2) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_sequence = utils.rolling_avg(rng_avg_sequence, ratios[0] == 1,
                                          speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_sequence))
    return model, rng_avg_sequence
예제 #3
0
def train_predictive_net(model=None, iterations=5000, learning_rate=0.1, algorithm=nn.RMSPROP, hidden_units=15):
    if model is None:
        model = nn.PredictiveNet(size_hidden=hidden_units, algorithm=algorithm,
                                 size_observation=len(pnas2018task.all_inputs),
                                 size_action=len(pnas2018task.all_outputs))
    num_episodes = iterations
    model.learning_rate = learning_rate

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_full_seq = 0.
    rng_avg_preds = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(pnas2018task.sequence_probabilities)
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs)
        action_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs)
        prediction_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_inputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        model.prediction_linear = np.zeros((1, model.size_observation), dtype=np.float32)  #initial prediction = 0
        # run the network
        # Initialize context with random/uniform values.
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            for i in range(len(action_targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                #model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape((-1, len(action_targets[0])))    # reshape to (x, 8)
            ratios = scripts.evaluate([tchoices], [action_targets])
            tpreds = np.array(model.h_prediction_wta).reshape((-1, len(prediction_targets[0])))
            ratios_predictions = scripts.evaluate([tpreds], [prediction_targets])

            # Train model, record loss. NOTE: targets and predictions are identical for this task!!!
            loss, gradients = model.train(tape, [action_targets, prediction_targets])

        # Monitor progress using rolling averages.
        speed = 2. / (episode + 2) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_preds = utils.rolling_avg(rng_avg_preds, ratios_predictions[0], speed)
        rng_avg_full_seq = utils.rolling_avg(rng_avg_full_seq, ratios[0] == 1, speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            grad_avg = sum([np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients])/sum([tf.size(gradient).numpy() for gradient in gradients])
            grad_max = max([np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients])
            print("{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tpredictions={4}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, rng_avg_preds))

    return model
예제 #4
0
def accuracy_test_with_goals(model, test_number=None):
    hidden_activation = []
    all_choices = []
    for j, sequence in enumerate(seqs):
        goal = goals[j]
        seq_choices = []
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            # Reset the previous action
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation.append(model.context)
            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            model.h_action_wta.clear()
            seq_choices.append(choice)

    # Now evaluate accuracy:
    accuracy = np.zeros((len(seq1) - 1))
    accuracy_weighted = np.zeros((len(seq1) - 1))
    for i in range(len(all_choices)):
        targets = utils.liststr_to_onehot(seqs[i][1:], all_outputs)
        for j in range(len(targets)):
            if (all_choices[i][0][j] == targets[j]).all():
                accuracy_weighted[j] += 1 * sequence_probabilities[i]
                accuracy[j] += 1 / len(all_choices)
    optimal = np.array_equal(accuracy_weighted, optimal_accuracy_goals)
    if test_number is None:
        print(accuracy, accuracy_weighted, optimal)
    else:
        print("{0} ({1}) - network {2} -- {3}".format(accuracy,
                                                      accuracy_weighted,
                                                      test_number, optimal))
    if not optimal:
        for i in range(len(seqs)):
            print([
                utils.onehot_to_str(all_choices[i][0][j], all_outputs)
                for j in range(len(targets))
            ])
    return hidden_activation, optimal
예제 #5
0
def accuracy_test_deepprednet(model, name=None, noise=0.):
    hidden_activation1 = []
    hidden_activation2 = []
    all_choices = []
    for sequence in pnas2018task.seqs:
        model.new_episode()
        seq_choices = []
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        #model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            #model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            # Reset the previous action
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation1.append(model.context1)
                hidden_activation2.append(model.context2)
            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            model.h_action_wta.clear()
            seq_choices.append(choice)

    # Now evaluate accuracy:
    accuracy_totals = np.zeros((len(pnas2018task.seq1) - 1))
    for i in range(len(all_choices)):
        targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:],
                                          pnas2018task.all_outputs)
        for j in range(len(targets)):
            if (all_choices[i][0][j] == targets[j]).all():
                accuracy_totals[j] += 1
    accuracy_totals /= 4
    if name is not None:
        print(name, accuracy_totals)
    else:
        print(accuracy_totals)
    return hidden_activation1, hidden_activation2, accuracy_totals
예제 #6
0
def accuracy_test_reg_hierarchy_nogoals(model, model_num=None):
    hidden_activation = []
    all_choices = []
    for j, sequence in enumerate(pnas2018task.seqs):
        #goal = goals[j]
        seq_choices = []
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            #model.goal1 = np.zeros_like(goal[0])
            # Reset the previous action
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation.append(model.context)
            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            model.h_action_wta.clear()
            seq_choices.append(choice)

    # Now evaluate accuracy:
    accuracy_totals = np.zeros((len(pnas2018task.seq1) - 1))
    for i in range(len(all_choices)):
        targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:],
                                          pnas2018task.all_outputs)
        for j in range(len(targets)):
            if (all_choices[i][0][j] == targets[j]).all():
                accuracy_totals[j] += 1
    accuracy_totals /= 4
    if model_num is not None:
        print(model_num, accuracy_totals)
    else:
        print(accuracy_totals)
    return hidden_activation
예제 #7
0
def accuracy_test(model, name=None, noise=0., initial_context=pnas2018.ZEROS):
    hidden_activation = []
    all_choices = []
    for sequence in rewardtask.seqs:
        seq_choices = []
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            model.new_episode(initial_context=initial_context)
            # Reset the previous action
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation.append(model.context)
            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape((-1, len(targets[0])))
            model.h_action_wta.clear()
            seq_choices.append(choice)

    # Now evaluate accuracy:
    accuracy_totals = np.zeros((len(rewardtask.seq1) - 1))
    for i in range(len(all_choices)):
        targets = utils.liststr_to_onehot(rewardtask.seqs[i][1:], rewardtask.all_outputs)
        for j in range(len(targets)):
            if (all_choices[i][0][j] == targets[j]).all():
                accuracy_totals[j] += 1
    accuracy_totals /= 4
    if name is not None:
        print(name, accuracy_totals)
    else:
        print(accuracy_totals)
    return hidden_activation, accuracy_totals
예제 #8
0
def test_one_sequence(model,
                      sequence_num,
                      turn_goal_step=None,
                      goal_to_turn=None):
    hidden_activation = []
    all_choices = []
    results = []

    for trials in range(100):
        sequence = pnas2018task.seqs[sequence_num]
        goal = pnas2018task.goals[sequence_num]
        seq_choices = []
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.float32(
                np.abs(
                    np.random.randint(0, 2, (1, model.size_hidden)) -
                    0.1))  # np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            # Reset the previous action
            for i in range(len(targets)):
                if i == turn_goal_step:
                    model.goal1 = goal_to_turn
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation.append(model.context)
            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            model.clear_history()
            results.append(choice)

    # Now, count the number of unique result sequences, and the number of occurences for each unique sequence
    unique_results = []
    unique_results_counts = []
    for result in results:
        unique = True
        for i, unique_result in enumerate(unique_results):
            if np.array_equal(result, unique_result):
                unique_results_counts[i] += 1
                unique = False
                break
        if unique:
            unique_results.append(result)
            unique_results_counts.append(1)

    # Sort in order of frequency
    unique_results = [
        unique_result
        for (_, unique_result
             ) in sorted(zip(unique_results_counts, unique_results),
                         key=lambda pair: pair[0],
                         reverse=True)
    ]
    unique_results_counts = sorted(unique_results_counts, reverse=True)

    # Print the target sequence
    full_sequence_str = ""
    for row in targets:
        full_sequence_str += utils.onehot_to_str(
            row, pnas2018task.all_outputs) + "; "
    print("target: " + full_sequence_str)

    # Now print the results
    for i, unique_result in enumerate(unique_results):
        full_sequence_str = ""
        for row in unique_result:
            full_sequence_str += utils.onehot_to_str(
                row, pnas2018task.all_outputs) + "; "
        print(str(unique_results_counts[i]) + "%: " + full_sequence_str)

    return hidden_activation
예제 #9
0
def accuracy_test_keepcontext(model, name=None, num_samples=100):
    num_actions = len(pnas2018task.seq1) - 1
    # List of 4 lists of 6 empty lists
    hidden_activation = [[[] for _ in range(num_actions)]
                         for _ in range(len(pnas2018task.seqs))]

    all_choices = []

    # Make a list of 400 sequences
    sequences = [0, 1, 2, 3] * num_samples
    random.shuffle(sequences)

    for seq_id in sequences:
        sequence = pnas2018task.seqs[seq_id]
        seq_choices = []
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            model.new_episode()
            # Reset the previous action
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                #model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation[seq_id][i].append(model.context.numpy())
            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            model.h_action_wta.clear()
            seq_choices.append(choice)

    hidden_activations_averaged = []
    # Average the hidden activations per action
    for seq in hidden_activation:
        for action in seq:
            average = np.zeros_like(action[0])
            for instance in action:
                average += instance
            average /= len(action)
            hidden_activations_averaged.append(average)

    # Now evaluate accuracy:
    accuracy_totals = np.zeros(num_actions)
    for i in range(len(all_choices)):
        targets = utils.liststr_to_onehot(pnas2018task.seqs[sequences[i]][1:],
                                          pnas2018task.all_outputs)
        for j in range(len(targets)):
            if (all_choices[i][0][j] == targets[j]).all():
                accuracy_totals[j] += 1
    accuracy_totals /= 4 * num_samples
    if name is not None:
        print(name, accuracy_totals)
    else:
        print(accuracy_totals)
    return hidden_activations_averaged, accuracy_totals
예제 #10
0
def accuracy_test_predictive(model, test_number=None):
    hidden_activation = []
    all_choices = []
    all_predictions = []
    for sequence in pnas2018task.seqs:
        seq_choices = []
        seq_predictions = []
        all_predictions.append(seq_predictions)
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs)
        action_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs)
        prediction_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_inputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.prediction_linear = np.zeros((1, model.size_observation), dtype=np.float32)  #initial prediction = 0,
            # Reset the previous action
            for i in range(len(action_targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)
                hidden_activation.append(model.context)

            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape((-1, len(action_targets[0])))
            prediction = np.array(model.h_prediction_wta).reshape((-1, len(prediction_targets[0])))
            model.h_action_wta.clear()
            model.h_prediction_wta.clear()
            seq_choices.append(choice)
            seq_predictions.append(prediction)

    # Now evaluate accuracy:
    optimal_accuracy = np.asarray([.5, .5, 1., 1., 1., 1.])
    accuracy = np.zeros((len(pnas2018task.seq1) - 1))
    accuracy_weighted = np.zeros((len(pnas2018task.seq1) - 1))
    for i in range(len(all_choices)):
        action_targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:], pnas2018task.all_outputs)
        for j in range(len(action_targets)):
            if (all_choices[i][0][j] == action_targets[j]).all():
                accuracy_weighted[j] += 1 * pnas2018task.sequence_probabilities[i]
                accuracy[j] += 1/len(all_choices)
    optimal_actions = np.array_equal(accuracy_weighted, optimal_accuracy)

    optimal_accuracy_preds = [.5, .5, 1, 1, 1, 1]
    accuracy_preds = np.zeros((len(pnas2018task.seq1) - 1))
    accuracy_preds_weighted = np.zeros((len(pnas2018task.seq1) - 1))
    for i in range(len(all_predictions)):
        prediction_targets = utils.liststr_to_onehot(pnas2018task.seqs[i][1:], pnas2018task.all_inputs)
        for j in range(len(prediction_targets)):
            if (all_predictions[i][0][j] == prediction_targets[j]).all():
                accuracy_preds_weighted[j] += 1 * pnas2018task.sequence_probabilities[i]
                accuracy_preds[j] += 1/len(all_predictions)
    optimal_predictions = np.array_equal(accuracy_preds_weighted, optimal_accuracy_preds)

    if test_number is None:
        print(accuracy, accuracy_weighted, optimal_actions, accuracy_preds, accuracy_preds_weighted, optimal_predictions)
    else:
        print("Actions: {0} ({1}) - network {2} -- {3}".format(accuracy, accuracy_weighted, test_number, optimal_actions))
    if not optimal_actions or not optimal_predictions:
        print("actions:")
        for i in range(len(pnas2018task.seqs)):
            print([utils.onehot_to_str(all_choices[i][0][j], pnas2018task.all_outputs) for j in range(len(action_targets))])
        print("predictions:")
        for i in range(len(pnas2018task.seqs)):
            print([utils.onehot_to_str(all_predictions[i][0][j], pnas2018task.all_inputs) for j in range(len(prediction_targets))])
    return hidden_activation, optimal_actions and optimal_predictions
예제 #11
0
def accuracy_test_predictive(model, test_number=None, type='sigmoid'):
    inputs_str = [
        "start", "coffee", "milk", "cream", "water", "stir", "tea", "serve",
        "sugar", "end"
    ]
    outputs_str = [
        "start", "coffee", "milk", "cream", "water", "stir", "tea", "servetea",
        "servecoffee", "sugar", "end"
    ]
    seq1in = ['start', 'coffee', 'water', 'stir', 'cream', 'serve',
              'end']  # 60%
    seq1t = [
        'start', 'coffee', 'water', 'stir', 'cream', 'servecoffee', 'end'
    ]  # 60%
    seq2in = ['start', 'coffee', 'water', 'stir', 'milk', 'serve',
              'end']  # 20%
    seq2t = ['start', 'coffee', 'water', 'stir', 'milk', 'servecoffee',
             'end']  # 20%
    seq3in = ['start', 'tea', 'water', 'stir', 'sugar', 'serve', 'end']  # 20%
    seq3t = ['start', 'tea', 'water', 'stir', 'sugar', 'servetea',
             'end']  # 20%
    inputs_seqs = [seq1in, seq2in, seq3in]
    target_seqs = [seq1t, seq2t, seq3t]

    hidden_activation = []
    all_choices = []
    all_predictions = []
    for i in range(len(inputs_seqs)):
        sequence_i = inputs_seqs[i]
        sequence_t = target_seqs[i]
        seq_choices = []
        seq_predictions = []
        all_predictions.append(seq_predictions)
        all_choices.append(seq_choices)
        inputs = utils.liststr_to_onehot(sequence_i[:-1], inputs_str)
        action_targets = utils.liststr_to_onehot(sequence_t[:-1], outputs_str)
        prediction_targets = utils.liststr_to_onehot(sequence_i[1:],
                                                     inputs_str)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.prediction_linear = np.zeros(
                (1, model.size_observation),
                dtype=np.float32)  #initial prediction = 0,
            # Reset the previous action
            for i in range(len(action_targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation, type)
                hidden_activation.append(model.context)

            # Get some statistics about what was correct and what wasn't
            choice = np.array(model.h_action_wta).reshape(
                (-1, len(action_targets[0])))
            prediction = np.array(model.h_prediction_wta).reshape(
                (-1, len(prediction_targets[0])))
            model.h_action_wta.clear()
            model.h_prediction_wta.clear()
            seq_choices.append(choice)
            seq_predictions.append(prediction)

    # Now evaluate accuracy:
    optimal_accuracy = np.asarray([1., 1., 1., 1., 1., 1.])
    accuracy = np.zeros((len(seq1) - 1))
    accuracy_weighted = np.zeros((len(seq1) - 1))
    for i in range(len(all_choices)):
        action_targets = utils.liststr_to_onehot(target_seqs[i][:-1],
                                                 outputs_str)
        for j in range(len(action_targets)):
            if (all_choices[i][0][j] == action_targets[j]).all():
                accuracy_weighted[j] += 1 * sequence_probabilities[i]
                accuracy[j] += 1 / len(all_choices)
    optimal_actions = np.array_equal(accuracy_weighted, optimal_accuracy)

    optimal_accuracy_preds = [.8, 1, 1, .8, 1, 1]
    accuracy_preds = np.zeros((len(seq1) - 1))
    accuracy_preds_weighted = np.zeros((len(seq1) - 1))
    for i in range(len(all_predictions)):
        prediction_targets = utils.liststr_to_onehot(inputs_seqs[i][1:],
                                                     inputs_str)
        for j in range(len(prediction_targets)):
            if (all_predictions[i][0][j] == prediction_targets[j]).all():
                accuracy_preds_weighted[j] += 1 * sequence_probabilities[i]
                accuracy_preds[j] += 1 / len(all_predictions)
    optimal_predictions = np.array_equal(accuracy_preds_weighted,
                                         optimal_accuracy_preds)

    if test_number is None:
        print(accuracy, accuracy_weighted, optimal_actions, accuracy_preds,
              accuracy_preds_weighted, optimal_predictions)
    else:
        print("Actions: {0} ({1}) - network {2} -- {3}".format(
            accuracy, accuracy_weighted, test_number, optimal_actions
            and optimal_predictions))
    if not optimal_actions or not optimal_predictions:
        print("actions:")
        for i in range(len(seqs)):
            print([
                utils.onehot_to_str(all_choices[i][0][j], outputs_str)
                for j in range(len(action_targets))
            ])
        print("predictions:")
        for i in range(len(seqs)):
            print([
                utils.onehot_to_str(all_predictions[i][0][j], inputs_str)
                for j in range(len(prediction_targets))
            ])
    return hidden_activation, optimal_actions and optimal_predictions
예제 #12
0
def train_with_goals(model=None,
                     mse=False,
                     learning_rate=0.1,
                     noise=0.,
                     iterations=5000,
                     l2reg=0.0,
                     algorithm=nn.SGD,
                     hidden_units=15,
                     reg_strength=0.,
                     reg_increase="square"):
    num_goals = 2
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=hidden_units,
                                algorithm=algorithm,
                                size_observation=len(all_inputs),
                                size_action=len(all_inputs),
                                size_goal1=num_goals,
                                size_goal2=0)
    num_episodes = iterations
    model.learning_rate = 0.5 if mse else learning_rate
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        decider = np.random.uniform()
        if decider < 0.6:
            seqid = 0
        elif decider < 0.8:
            seqid = 1
        else:
            seqid = 2

        sequence = seqs[seqid]
        goal = goals[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], all_outputs)
        targets_goal1 = goal
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])

            cols = model.size_hidden
            # Regularization in the hidden layer weights
            # Recurrent hidden to hidden connections
            extra_loss = pnashierarchy.weight_regularization_calculator(
                model.hidden_layer.w, [0, model.size_hidden], [0, cols],
                reg_strength,
                reg_type="recurrent",
                reg_increase=reg_increase)
            # Prev action to hidden
            # extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                               [model.size_hidden+9, model.size_hidden+9+model.size_action],
            #                                               [0, cols],
            #                                               reg_strength, reg_type="input_right", reg_increase=reg_increase)
            # Prev goal to hidden
            extra_loss += pnashierarchy.weight_regularization_calculator(
                model.hidden_layer.w, [
                    model.size_hidden + 9 + model.size_action,
                    model.size_hidden + 9 + model.size_action + num_goals
                ], [0, cols],
                reg_strength,
                reg_type="input_left",
                reg_increase=reg_increase)

            # SWITCHED OUTPUT LEFT AND OUTPUT RIGHT.
            #Regularization in the output layers (goals and actions) weights
            # hidden to next action
            extra_loss += pnashierarchy.weight_regularization_calculator(
                model.action_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_right",
                reg_increase=reg_increase)
            # Hidden to next goal
            extra_loss += pnashierarchy.weight_regularization_calculator(
                model.goal1_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_left",
                reg_increase=reg_increase)

            # Regularization of the observation (only goes to the action side)
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden, model.size_hidden+model.size_observation],
            #                                                     [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)

            loss, _ = model.train_obsolete(targets, goal, None, tape,
                                           extra_loss)

            # Train model, record loss.
            #if mse:
            #    loss = model.train_MSE(targets, None, None, tape)
            #else:
            #    loss, gradients = model.train_obsolete(targets, targets_goal1, None, tape)
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model
예제 #13
0
def train(model=None,
          mse=False,
          noise=0.,
          iterations=5000,
          l2reg=0.0,
          learning_rate=0.1,
          algorithm=nn.SGD,
          hidden_units=15):
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=hidden_units,
                                algorithm=algorithm,
                                size_observation=len(all_inputs),
                                size_action=len(all_inputs),
                                size_goal1=0,
                                size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_full_seq = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(sequence_probabilities)
        sequence = seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)

        # run the network
        # Initialize context with random/uniform values.
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])

            # Train model, record loss.
            if mse:
                loss, gradients = model.train_MSE(targets, None, None, tape)
            else:
                loss, gradients = model.train_obsolete(targets, None, None,
                                                       tape)

        # Monitor progress using averages
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_full_seq = utils.rolling_avg(
            rng_avg_full_seq, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            grad_avg = sum([
                np.sum(tf.reduce_sum(tf.abs(gradient)).numpy())
                for gradient in gradients
            ]) / sum([tf.size(gradient).numpy() for gradient in gradients])
            grad_max = max([
                np.max(tf.reduce_max(tf.abs(gradient)).numpy())
                for gradient in gradients
            ])
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tgrad_avg={4}, \tgrad_max={5}"
                .format(episode, rng_avg_loss, rng_avg_actions,
                        rng_avg_full_seq, grad_avg, grad_max))

    return model
예제 #14
0
def train_hierarchical_nogoals(noise=0,
                               iterations=10000,
                               learning_rate=0.1,
                               reg_strength=0.001,
                               reg_increase="linear"):
    model = nn.ElmanGoalNet(size_hidden=15,
                            size_observation=9,
                            size_action=8,
                            size_goal1=0,
                            size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = 0.

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        model.new_episode()
        seqid = utils.idx_from_probabilities(
            pnas2018task.sequence_probabilities)

        #goal = pnas2018task.goals[seqid]
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            #model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            #model.goal1 = np.zeros_like(goal[0])
            for i in range(len(targets)):
                #model.action = np.zeros((1, model.size_action), dtype=np.float32)
                # Add noise
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            # Train model, record loss.
            cols = model.size_hidden
            # Regularization in the hidden layer weights
            # Recurrent hidden to hidden connections
            extra_loss = utils.weight_regularization_calculator(
                model.hidden_layer.w, [0, model.size_hidden], [0, cols],
                reg_strength,
                reg_type="recurrent",
                reg_increase=reg_increase)
            # Prev action to hidden
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden+9, model.size_hidden+9+model.size_action], [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)
            # Prev goal to hidden
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden+9+model.size_action, model.size_hidden+9+model.size_action+2], [0, cols],
            #                                                     reg_strength, reg_type="input_left", reg_increase=reg_increase)

            #Regularization in the output layers (goals and actions) weights
            # hidden to next action
            extra_loss += utils.weight_regularization_calculator(
                model.action_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_right",
                reg_increase=reg_increase)

            # Hidden to next goal
            #extra_loss += weight_regularization_calculator(model.goal1_layer.w,
            #                                                    [0, model.size_hidden], [0, model.size_action],
            #                                                     reg_strength, reg_type="output_left", reg_increase=reg_increase)

            # Regularization of the observation (only goes to the action side)
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden, model.size_hidden+model.size_observation],
            #                                                     [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)

            loss, _ = model.train_obsolete(targets, None, None, tape,
                                           extra_loss)
            #if(episode%100 == 0):
            #    print(loss.numpy()-extra_loss.numpy(), extra_loss.numpy())
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model