Exemplo n.º 1
0
def run_model_with_noise(model, noise_level):
    env = tce.TeaCoffeeEnv()
    # Get the target actions and goals
    outputs_actions = []
    goal_list = [
        "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream",
        "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag"
    ]
    for goal in goal_list:
        targets = tce.target_list[goal]
        env.state = state.State(tce.TeaCoffeeData())  # Reinitialize the state
        env.state.current.set_field(goal, 1.)  # Set the goalS as active
        model.context = np.random.uniform(0.01, 0.1, (1, model.size_hidden))
        for i, target in enumerate(targets):
            model.action = utils.str_to_onehot(targets[i][0],
                                               tce.TeaCoffeeData.actions_list)
            model.goal1 = utils.str_to_onehot(targets[i][1],
                                              tce.TeaCoffeeData.goals1_list)
            model.goal2 = utils.str_to_onehot(targets[i][2],
                                              tce.TeaCoffeeData.goals2_list)
            model.context += noise_level * np.random.normal(
                size=tf.shape(model.context))
            observation = env.observe()
            model.feedforward(observation)
            env.do_action(target[0])
            outputs_actions.append(model.action)
    return outputs_actions
Exemplo n.º 2
0
 def reinitialize(self, initial_state=None):
     if initial_state is None:
         initial_state = state.State(GoalEnvData())
     self.state = initial_state
Exemplo n.º 3
0
 def __init__(self):
     super().__init__()
     self.state = state.State(GoalEnvData())
Exemplo n.º 4
0
def train_supervised_teacoffeeenv(model, environment, num_episodes):
    env = environment

    rng_avg_loss = 0.
    rng_avg_actions = 0.
    rng_avg_goals = 0.
    rng_avg_goal1 = 0.
    rng_avg_goal2 = 0.
    rng_avg_action1 = 0.

    goal_list = [
        "g_1_make_coffee", "g_1_make_tea", "g_2_add_grounds", "g_2_add_cream",
        "g_2_add_sugar", "g_2_drink", "g_2_dip_teabag"
    ]
    for episode in range(num_episodes):
        goal = goal_list[episode % len(goal_list)]  # Cycle through the goals
        targets = tce.target_list[goal]  # Get the target actions and goals
        env.state = state.State(tce.TeaCoffeeData())  # Reinitialize the state
        env.state.current.set_field(goal, 1.)  # Set the goalS as active

        # run the network
        with tf.GradientTape() as tape:
            # Initialize context with random/uniform values.
            #model.context = np.zeros((1, model.size_hidden))
            model.context = np.float32(
                np.random.uniform(0.01, 0.99, (1, model.size_hidden)))
            targets_onehot = [[], [], []]  # actions, goal1s, goal2s
            for i, target in enumerate(targets):
                # Set up the input to be the correct actions and goals
                targets_onehot[0].append(
                    utils.str_to_onehot(targets[i][0],
                                        tce.TeaCoffeeData.actions_list))
                targets_onehot[1].append(
                    utils.str_to_onehot(targets[i][1],
                                        tce.TeaCoffeeData.goals1_list))
                targets_onehot[2].append(
                    utils.str_to_onehot(targets[i][2],
                                        tce.TeaCoffeeData.goals2_list))
                model.action, model.goal1, model.goal2 = [
                    targets_onehot[j][-1] for j in range(3)
                ]

                observation = env.observe()
                model.feedforward(observation)
                env.do_action(
                    target[0]
                )  # Transition the MDP according to the *target* action, not the chosen action!

            # Get some statistics about what was correct and what wasn't
            ratios = evaluate(
                [model.h_action_wta, model.h_goal1_wta, model.h_goal2_wta],
                targets_onehot)
            rng_avg_action1 = utils.rolling_avg(
                rng_avg_action1,
                ratio_correct([model.h_action_wta[0]], [targets_onehot[0][0]]),
                2. / (episode + 2) if episode < 1000 else 0.001)
            # Train model, record loss.
            loss = model.train_obsolete(targets_onehot[0], targets_onehot[1],
                                        targets_onehot[2], tape)

            # Monitor progress using rolling averages.
            speed = 2. / (
                episode + 2
            ) if episode < 1000 else 0.001  # enables more useful evaluations for early trials
            rng_avg_loss = utils.rolling_avg(rng_avg_loss, loss, speed)
            rng_avg_actions = utils.rolling_avg(rng_avg_actions, ratios[0],
                                                speed)
            rng_avg_goals = utils.rolling_avg(
                rng_avg_goals, ratios[0] == 1,
                speed)  # whole action sequence correct ?
            rng_avg_goal1 = utils.rolling_avg(rng_avg_goal1, ratios[1], speed)
            rng_avg_goal2 = utils.rolling_avg(rng_avg_goal2, ratios[2], speed)
            # Display on the console at regular intervals
            if (episode < 1000 and episode in [3**n for n in range(50)]) or episode % 1000 == 0 \
                               or episode+1 == num_episodes:
                print(
                    "{0}: avg loss={1}, \tactions={2}, \tfull_sequence={3}\tgoal1={4}\tgoal2={5}\tfirst_action={6}"
                    .format(episode, rng_avg_loss, rng_avg_actions,
                            rng_avg_goals, rng_avg_goal1, rng_avg_goal2,
                            rng_avg_action1))
Exemplo n.º 5
0
 def __init__(self):
     super().__init__()
     self.state = state.State(TeaCoffeeData())