示例#1
0
def train_with_goals(noise=0, iterations=10000, learning_rate=0.1):
    model = nn.ElmanGoalNet(size_hidden=15,
                            size_observation=9,
                            size_action=8,
                            size_goal1=2,
                            size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = 0.

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(
            pnas2018task.sequence_probabilities)

        goal = pnas2018task.goals[seqid]
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            model.goal1 = goal[0]
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                # Add noise
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            loss, _ = model.train_obsolete(targets, goal, None, tape)
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model
示例#2
0
def train(model=None, noise=0., iterations=5000, l1reg=0.0, l2reg= 0.0, algorithm=nn.SGD,
          size_hidden=15, learning_rate=None, loss_type='cross_entropy',
          initial_context=pnas2018.ZEROS):
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=size_hidden, size_observation=len(rewardtask.all_inputs),
                                size_action=len(rewardtask.all_outputs), size_goal1=0, size_goal2=0,
                                algorithm=algorithm, initialization="normal")
    num_episodes = iterations
    if learning_rate is not None:  # Else keep the model's learning rate
        model.learning_rate = learning_rate
    model.L1_regularization = l1reg
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_sequence = 0.

    for episode in range(num_episodes):
        model.new_episode(initial_context=initial_context)
        seqid = utils.idx_from_probabilities(rewardtask.sequence_probabilities)

        sequence = rewardtask.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], rewardtask.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], rewardtask.all_outputs)
        # run the network
        with tf.GradientTape(persistent=True) as tape:
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            #if episode % 2 == 0:
                # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape((-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            # Train model, record loss.
            if loss_type==pnas2018.MSE:
                loss, _ = model.train_MSE(targets, None, None, tape)
            elif loss_type==pnas2018.CROSS_ENTROPY:
                loss, _ = model.train_obsolete(targets, None, None, tape)
            else:
                loss, _ = model.train(tape, targets)
        del tape

        #if episode % 2 == 0:
            # Monitor progress using rolling averages.
        speed = 2. / (episode + 2) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_sequence = utils.rolling_avg(rng_avg_sequence, ratios[0] == 1,
                                          speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_sequence))
    return model, rng_avg_sequence
示例#3
0
def train_predictive_net(model=None, iterations=5000, learning_rate=0.1, algorithm=nn.RMSPROP, hidden_units=15):
    if model is None:
        model = nn.PredictiveNet(size_hidden=hidden_units, algorithm=algorithm,
                                 size_observation=len(pnas2018task.all_inputs),
                                 size_action=len(pnas2018task.all_outputs))
    num_episodes = iterations
    model.learning_rate = learning_rate

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_full_seq = 0.
    rng_avg_preds = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(pnas2018task.sequence_probabilities)
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], pnas2018task.all_inputs)
        action_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_outputs)
        prediction_targets = utils.liststr_to_onehot(sequence[1:], pnas2018task.all_inputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        model.prediction_linear = np.zeros((1, model.size_observation), dtype=np.float32)  #initial prediction = 0
        # run the network
        # Initialize context with random/uniform values.
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            for i in range(len(action_targets)):
                model.action = np.zeros((1, model.size_action), dtype=np.float32)
                #model.context += np.float32(np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape((-1, len(action_targets[0])))    # reshape to (x, 8)
            ratios = scripts.evaluate([tchoices], [action_targets])
            tpreds = np.array(model.h_prediction_wta).reshape((-1, len(prediction_targets[0])))
            ratios_predictions = scripts.evaluate([tpreds], [prediction_targets])

            # Train model, record loss. NOTE: targets and predictions are identical for this task!!!
            loss, gradients = model.train(tape, [action_targets, prediction_targets])

        # Monitor progress using rolling averages.
        speed = 2. / (episode + 2) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_preds = utils.rolling_avg(rng_avg_preds, ratios_predictions[0], speed)
        rng_avg_full_seq = utils.rolling_avg(rng_avg_full_seq, ratios[0] == 1, speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            grad_avg = sum([np.sum(tf.reduce_sum(tf.abs(gradient)).numpy()) for gradient in gradients])/sum([tf.size(gradient).numpy() for gradient in gradients])
            grad_max = max([np.max(tf.reduce_max(tf.abs(gradient)).numpy()) for gradient in gradients])
            print("{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tpredictions={4}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_full_seq, rng_avg_preds))

    return model
示例#4
0
def train(model=None,
          mse=False,
          noise=0.,
          iterations=5000,
          l2reg=0.0,
          learning_rate=0.1,
          algorithm=nn.SGD,
          hidden_units=15):
    if model is None:
        model = nn.ElmanGoalNet(size_hidden=hidden_units,
                                algorithm=algorithm,
                                size_observation=len(all_inputs),
                                size_action=len(all_inputs),
                                size_goal1=0,
                                size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = l2reg

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_full_seq = 0.

    for episode in range(num_episodes):
        seqid = utils.idx_from_probabilities(sequence_probabilities)
        sequence = seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1], all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:], all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)

        # run the network
        # Initialize context with random/uniform values.
        with tf.GradientTape() as tape:
            model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            for i in range(len(targets)):
                model.action = np.zeros((1, model.size_action),
                                        dtype=np.float32)
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])

            # Train model, record loss.
            if mse:
                loss, gradients = model.train_MSE(targets, None, None, tape)
            else:
                loss, gradients = model.train_obsolete(targets, None, None,
                                                       tape)

        # Monitor progress using averages
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_full_seq = utils.rolling_avg(
            rng_avg_full_seq, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            grad_avg = sum([
                np.sum(tf.reduce_sum(tf.abs(gradient)).numpy())
                for gradient in gradients
            ]) / sum([tf.size(gradient).numpy() for gradient in gradients])
            grad_max = max([
                np.max(tf.reduce_max(tf.abs(gradient)).numpy())
                for gradient in gradients
            ])
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_seq={3}, \tgrad_avg={4}, \tgrad_max={5}"
                .format(episode, rng_avg_loss, rng_avg_actions,
                        rng_avg_full_seq, grad_avg, grad_max))

    return model
示例#5
0
def train_hierarchical_nogoals(noise=0,
                               iterations=10000,
                               learning_rate=0.1,
                               reg_strength=0.001,
                               reg_increase="linear"):
    model = nn.ElmanGoalNet(size_hidden=15,
                            size_observation=9,
                            size_action=8,
                            size_goal1=0,
                            size_goal2=0)
    num_episodes = iterations
    model.learning_rate = learning_rate
    model.L2_regularization = 0.

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.

    for episode in range(num_episodes):
        model.new_episode()
        seqid = utils.idx_from_probabilities(
            pnas2018task.sequence_probabilities)

        #goal = pnas2018task.goals[seqid]
        sequence = pnas2018task.seqs[seqid]
        inputs = utils.liststr_to_onehot(sequence[:-1],
                                         pnas2018task.all_inputs)
        targets = utils.liststr_to_onehot(sequence[1:],
                                          pnas2018task.all_outputs)
        model.action = np.zeros((1, model.size_action), dtype=np.float32)
        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            #model.context = np.zeros((1, model.size_hidden), dtype=np.float32)
            #model.goal1 = np.zeros_like(goal[0])
            for i in range(len(targets)):
                #model.action = np.zeros((1, model.size_action), dtype=np.float32)
                # Add noise
                model.context += np.float32(
                    np.random.normal(0., noise, size=(1, model.size_hidden)))
                observation = inputs[i].reshape(1, -1)
                model.feedforward(observation)

            # Get some statistics about what was correct and what wasn't
            tchoices = np.array(model.h_action_wta).reshape(
                (-1, len(targets[0])))
            ratios = scripts.evaluate([tchoices], [targets])
            # Train model, record loss.
            cols = model.size_hidden
            # Regularization in the hidden layer weights
            # Recurrent hidden to hidden connections
            extra_loss = utils.weight_regularization_calculator(
                model.hidden_layer.w, [0, model.size_hidden], [0, cols],
                reg_strength,
                reg_type="recurrent",
                reg_increase=reg_increase)
            # Prev action to hidden
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden+9, model.size_hidden+9+model.size_action], [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)
            # Prev goal to hidden
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden+9+model.size_action, model.size_hidden+9+model.size_action+2], [0, cols],
            #                                                     reg_strength, reg_type="input_left", reg_increase=reg_increase)

            #Regularization in the output layers (goals and actions) weights
            # hidden to next action
            extra_loss += utils.weight_regularization_calculator(
                model.action_layer.w, [0, model.size_hidden],
                [0, model.size_action],
                reg_strength,
                reg_type="output_right",
                reg_increase=reg_increase)

            # Hidden to next goal
            #extra_loss += weight_regularization_calculator(model.goal1_layer.w,
            #                                                    [0, model.size_hidden], [0, model.size_action],
            #                                                     reg_strength, reg_type="output_left", reg_increase=reg_increase)

            # Regularization of the observation (only goes to the action side)
            #extra_loss += weight_regularization_calculator(model.hidden_layer.w,
            #                                                     [model.size_hidden, model.size_hidden+model.size_observation],
            #                                                     [0, cols],
            #                                                     reg_strength, reg_type="input_right", reg_increase=reg_increase)

            loss, _ = model.train_obsolete(targets, None, None, tape,
                                           extra_loss)
            #if(episode%100 == 0):
            #    print(loss.numpy()-extra_loss.numpy(), extra_loss.numpy())
        # Monitor progress using rolling averages.
        speed = 2. / (
            episode + 2
        ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
        rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
        rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0], speed)
        rng_avg_goals = utils.rolling_avg(
            rng_avg_goals, ratios[0] == 1,
            speed)  # whole action sequence correct ?
        # Display on the console at regular intervals
        if (episode < 1000 and episode in [3 ** n for n in range(50)]) or episode % 1000 == 0 \
                or episode + 1 == num_episodes:
            print(
                "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}".format(
                    episode, rng_avg_loss, rng_avg_actions, rng_avg_goals))
    return model