Пример #1
0
def run(global_env):
    rewards = None
    if isinstance(global_env, ArmEnvArticle):
        env = global_env.env
        internal_machine = M1(env=env)
        machine = RootMachine(
            LoopInvokerMachine(
                super_runner(call_me_maybe=internal_machine, env=env)))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    elif isinstance(global_env, MazeEnvArticle):
        env = global_env.env
        internal_machine = M2(env=env)
        machine = RootMachine(
            LoopInvokerMachine(
                super_runner(call_me_maybe=internal_machine, env=env)))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    elif isinstance(global_env, MazeEnvArticleSpecial):
        env = global_env.env
        internal_machine = M3(env=env)
        machine = RootMachine(
            LoopInvokerMachine(
                super_runner(call_me_maybe=internal_machine, env=env)))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    else:
        raise KeyError

    if rewards is not None:
        full_name = "_" + global_env.__class__.__name__
        with open(full_name + " cumulative_reward.txt", "w") as w:
            for out in get_cumulative_rewards(rewards=rewards):
                w.write(str(out) + '\n', )

        with open(full_name + " reward.txt", "w") as w:
            for out in rewards:
                w.write(str(out) + '\n', )
Пример #2
0
def run(global_env):
    if isinstance(global_env, ArmEnvArticle):
        env = global_env.env
        internal_machine = PullUpMachine(env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        params = HAMParamsCommon(env)
        draw_graph(file_name="arm_env",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]

    elif isinstance(global_env, MazeEnvArticle):
        env = global_env.env
        internal_machine = InterestingMachine(env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        draw_graph(file_name="maze_env",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    elif isinstance(global_env, MazeEnvArticleSpecial):
        env = global_env.env
        internal_machine = InterestingMachineLeftUpInteresting(env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    else:
        raise KeyError
    full_name = name + "_" + global_env.__class__.__name__
    # with open(full_name + " cumulative_reward.txt", "w") as w:
    #     for out in get_cumulative_rewards(rewards=rewards):
    #         w.write(str(out) + '\n', )

    with open(full_name + " reward.txt", "w") as w:
        for out in rewards:
            w.write(str(out) + '\n', )
Пример #3
0
def part_two(env):
    with open("machines_part_one.json") as json_file:
        machines = [MachineStored.ms_from_machine(AutoMachineSimple(env), env)]
        machines_to_save = []
        for ms_dict in json.load(json_file):
            machines.append(
                MachineStored.from_dict(graph_dict=ms_dict, env=env))

        m_id = 0

        params = HAMParamsCommon(env)
        am = AutoMachineSimple(env)

        runner(
            ham=am,
            num_episodes=2000,
            env=env,
            params=params,
            on_model_mapping={},
        )
        qv = params.q_value

        for on_model_part in list(reversed(env.get_all_on_model())):
            for ms in machines:
                machine = ms.get_machine()

                params = HAMParamsCommon(env)
                params.q_value = qv

                runner(
                    ham=am,
                    num_episodes=1,
                    env=env,
                    params=params,
                    on_model_mapping={on_model_part: machine},
                )
                to_plot = list()
                to_plot.append(
                    PlotParams(curve_to_draw=params.logs["ep_rewards"],
                               label="HAM_with_pull_up"))
                total_reward = sum(params.logs["ep_rewards"])
                print("rewards sum:", total_reward)
                # plot_multi(to_plot, filename="pics/" + str(m_id) + ":::" + str(on_model_part) + ":::" + str(ms.binary_matrix_representation) + ":::" + str(sum(params.logs["ep_rewards"])))
                # ms.draw("pics/" + str(m_id) + ":" + str(ms.binary_matrix_representation) + ":" + str(total_reward))
                m_id += 1

                if total_reward > 10:
                    machines_to_save.append(ms)
        with open("machines_part_two.json", "w") as out_f:
            t = compress_graphs_dicts([_.to_dict() for _ in machines_to_save])
            json.dump(obj=t, fp=out_f, sort_keys=True, indent=4)
Пример #4
0
def go(transitions, brute_force, index_):
    machine = AbstractMachine(MachineGraph(transitions=transitions))
    am = RootMachine(LoopInvokerMachine(machine))

    # if randrange(1000) == 0:
    #     draw_graph("{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
    #     exit(0)

    if is_it_machine_runnable(machine):
        sum_rew = 0
        try:
            params = HAMParamsCommon(environments[0])
            ham_runner(ham=am,
                       num_episodes=2,
                       env=environments[0],
                       params=params)
            sum_rew = sum(params.logs["ep_rewards"])

        except ChildProcessError:
            # print(brute_force)
            pass
            # if randrange(1500) == 0:
            #     draw_graph("bf{brute_force}".format(**locals()), am.get_graph_to_draw())

        if sum_rew > 0:
            # TODO
            # with open("out.txt", "a") as f:
            #     f.write(str(brute_force) + "\n")
            # return

            # print("\n\n EPISODE REWARD: ", sum_rew)
            # draw_graph("{sum_rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
            rew = None
            print("\n\n\n")
            for e in environments:
                params = HAMParamsCommon(e)
                ham_runner(ham=am, num_episodes=600, env=e, params=params)
                if rew is None:
                    rew = 0
                rew += sum(params.logs["ep_rewards"])
                print("to_add:", sum(params.logs["ep_rewards"]))
                # except ChildProcessError:
                #     draw_graph("{rew}__{brute_force}".format(**locals()), am.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
                #     exit(0)
                # pass
            if rew is not None:
                draw_graph(
                    "{rew}__{brute_force}_{index_}".format(**locals()),
                    am.get_graph_to_draw(
                        action_to_name_mapping=env.get_actions_as_dict()))
Пример #5
0
def part_seven(env):
    with open("machines_part_six.json") as json_file:
        cluster_best_machine_mapper_str_key = json.load(json_file)
        cluster_best_machine_mapper = {}
        for key in cluster_best_machine_mapper_str_key:
            tuple_key = key
            tuple_key = tuple_key.replace("(", "")
            tuple_key = tuple_key.replace(")", "")
            tuple_key = tuple(map(eval, tuple_key.split(",")))
            cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict(
                cluster_best_machine_mapper_str_key[key],
                env=env).get_machine()
            MachineStored.from_dict(cluster_best_machine_mapper_str_key[key],
                                    env=env).draw("ololo" + str(key))
        params = HAMParamsCommon(env)

        runner(
            ham=AutoMachineSimple(env),
            num_episodes=2000,
            env=env,
            params=params,
            on_model_mapping=cluster_best_machine_mapper,
            # no_output=True,
        )
        to_plot = list()
        to_plot.append(
            PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="clustering"))

        save_to_gif("olololo", params.logs["gif"][-1])

        params = HAMParamsCommon(env)
        runner(
            ham=AutoMachineSimple(env),
            num_episodes=2000,
            env=env,
            params=params,
            on_model_mapping={},
            # no_output=True,
        )
        to_plot.append(
            PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="q-learning"))

        plot_multi(to_plot, filename="ololo_result")
Пример #6
0
def test_draw_gid():
    env = ArmEnvToggleTopOnly(size_x=5,
                              size_y=4,
                              cubes_cnt=4,
                              episode_max_length=50,
                              finish_reward=100,
                              action_minus_reward=-0.001,
                              tower_target_size=4)

    def get_on_model(self):
        return self.get_arm_x(), self.is_cube_graped()

    def get_all_on_model(self):
        res = []
        for height in range(0, self._size_x):
            for graped in [True, False]:
                if height == self._size_x - 1 and graped is True:
                    continue
                res.append((height, graped))
        return res

    def get_arm_x(self):
        return self._arm_x
        return self._size_x - self._arm_x

    def is_cube_graped(self):
        cube_dx, cube_dy = self.MOVE_ACTIONS[self.ACTIONS.DOWN]
        cube_x, cube_y = self._arm_x + cube_dx, self._arm_y + cube_dy
        return self._magnet_toggle and self.ok(
            cube_x, cube_y) and self._grid[cube_x][cube_y] == 1

    ArmEnvToggleTopOnly.get_arm_x = get_arm_x
    ArmEnvToggleTopOnly.get_all_on_model = get_all_on_model
    ArmEnvToggleTopOnly.is_cube_graped = is_cube_graped
    ArmEnvToggleTopOnly.get_on_model = get_on_model

    params = HAMParamsCommon(env)

    runner(
        ham=AutoMachineSimple(env),
        num_episodes=1,
        env=env,
        params=params,
        on_model_mapping={},
        no_output=True,
    )

    save_to_gif("olololo", params.logs["gif"][0])
    # imageio.mimsave('movie.gif', images)
    # numpngw.write_apng('foo.png', images, delay=250, use_palette=True)

    exit(0)
Пример #7
0
    def run(global_env):
        full_name = name
        params = HAMParamsCommon(environments[0])
        ham_runner(ham=am,
                   num_episodes=global_episodes,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
        # with open(full_name + " cumulative_reward.txt", "w") as w:
        #     for out in get_cumulative_rewards(rewards=rewards):
        #         w.write(str(out) + '\n', )

        with open(full_name + " reward.txt", "w") as w:
            for out in rewards:
                w.write(str(out) + '\n', )
    def _step(self, action):
        self.state = self.state + tuple([action])

        self.ham = RootMachine(
            LoopInvokerMachine(
                machine_to_invoke=super_runner(self.machine, self.env)))
        reward = None

        if action is None:
            raise KeyError
        elif action == self.ACTIONS.ACTION_01:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_02:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_03:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_04:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_05:
            self.add(Action(action=action))
        elif action == self.ACTIONS.ACTION_06:
            self.add(Action(action=action))

        if is_it_machine_runnable(self.machine):
            if self.state in self.dp:
                reward = self.dp[self.state]
            else:
                params = HAMParamsCommon(self.env)
                ham_runner(ham=self.ham,
                           num_episodes=self.num_of_episodes,
                           env=self.env,
                           params=params,
                           no_output=True)
                reward = sum(params.logs["ep_rewards"])
                self.dp[self.state] = reward
            draw_graph(
                "pics/" + str(reward).rjust(10, "0") + str(self.state) + " ",
                self.machine.get_graph_to_draw(
                    action_to_name_mapping=self.env.get_actions_as_dict()))

        observation = self.state
        if len(self.state) >= self.max_size:
            self._done = True

        return observation, reward, self._done, None
Пример #9
0
def main(begin_seed=0):
    for seed in range(begin_seed, begin_seed + 5000):
        # maze = maze_world_input_special()
        # maze = generate_maze_please(size_x=2, size_y=2)
        # env = MazeWorldEpisodeLength(maze=maze)
        # global_env, save_folder  = MazeEnvArticleSpecial(), "laby_spec/"
        global_env, save_folder  = MazeEnvArticle(), "laby/"
        # global_env, save_folder  = ArmEnvArticle(), "arm/"

        env, num_episodes = global_env.env, global_env.episodes_count

        new_machine = create_random_machine(maximal_number_of_vertex=6, maximal_number_of_edges=6, random_seed=seed,
                                            env=env)

        if is_it_machine_runnable(new_machine):
            params = HAMParamsCommon(env)
            try:
                ham_runner(
                    ham=RootMachine(LoopInvokerMachine(machine_to_invoke=super_runner(new_machine, env))),
                    num_episodes=num_episodes,
                    env=env, params=params,
                    no_output=True
                    )
                ham_runner(ham=RootMachine(machine_to_invoke=LoopInvokerMachine(new_machine)),
                           num_episodes=num_episodes,
                           env=env, params=params, no_output=True)

                # to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="Random" + str(seed + 1)))
                reward = sum(params.logs["ep_rewards"])
                draw_graph(save_folder + str(reward) + ":::" + str(seed),
                           new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
                # draw_graph("pics/" + str(reward).rjust(10, "0"),
                #            new_machine.get_graph_to_draw(action_to_name_mapping=env.get_actions_as_dict()))
            except KeyError:
                print("keyError", end="")
            except AssertionError:
                print("assertion", end="")
    plot_multi(to_plot)
Пример #10
0
def main():
    def get_on_model(self):
        return self.get_arm_x(), self.is_cube_graped()

    def get_arm_x(self):
        return self._size_x - self._arm_x

    def is_cube_graped(self):
        cube_dx, cube_dy = self.MOVE_ACTIONS[self.ACTIONS.DOWN]
        cube_x, cube_y = self._arm_x + cube_dx, self._arm_y + cube_dy
        return self._magnet_toggle and self.ok(
            cube_x, cube_y) and self._grid[cube_x][cube_y] == 1

    ArmEnvToggleTopOnly.get_arm_x = get_arm_x
    ArmEnvToggleTopOnly.is_cube_graped = is_cube_graped
    ArmEnvToggleTopOnly.get_on_model = get_on_model

    env = ArmEnvToggleTopOnly(size_x=5,
                              size_y=5,
                              cubes_cnt=4,
                              episode_max_length=600,
                              finish_reward=100,
                              action_minus_reward=-0.001,
                              tower_target_size=4)

    params = HAMParamsCommon(env)
    runner(
        ham=AutoMachineNoLoop(env),
        num_episodes=2000,
        env=env,
        params=params,
        # no_output=True
    )
    to_plot = []
    to_plot.append(
        PlotParams(curve_to_draw=params.logs["ep_rewards"],
                   label="HAM_with_pull_up"))
    plot_multi(to_plot)
Пример #11
0
def run(global_env):
    rewards = None
    if isinstance(global_env, ArmEnvArticle):
        pass
    elif isinstance(global_env, MazeEnvArticle):
        pass
    elif isinstance(global_env, MazeEnvArticleSpecial):
        env = global_env.env
        seed = 573846788
        internal_machine = create_random_machine(maximal_number_of_vertex=6,
                                                 maximal_number_of_edges=6,
                                                 random_seed=seed,
                                                 env=env)
        machine = RootMachine(machine_to_invoke=LoopInvokerMachine(
            machine_to_invoke=internal_machine))
        draw_graph(file_name="maze_env_special",
                   graph=internal_machine.get_graph_to_draw(
                       action_to_name_mapping=env.get_actions_as_dict()))
        params = HAMParamsCommon(env)
        ham_runner(ham=machine,
                   num_episodes=global_env.episodes_count,
                   env=env,
                   params=params)
        rewards = params.logs["ep_rewards"]
    else:
        raise KeyError

    if rewards is not None:
        full_name = name + "_" + global_env.__class__.__name__
        # with open(full_name + " cumulative_reward.txt", "w") as w:
        #     for out in get_cumulative_rewards(rewards=rewards):
        #         w.write(str(out) + '\n', )

        with open(full_name + " reward.txt", "w") as w:
            for out in rewards:
                w.write(str(out) + '\n', )
Пример #12
0
def main(global_env, begin_seed=0):
    for seed in range(begin_seed, begin_seed + 5000):
        env = global_env.env

        num_episodes = global_env.episodes_count

        new_machine = create_random_machine(maximal_number_of_vertex=6,
                                            maximal_number_of_edges=6,
                                            random_seed=seed,
                                            env=env)

        if is_it_machine_runnable(new_machine):
            params = HAMParamsCommon(env)
            try:
                ham_runner(ham=RootMachine(
                    machine_to_invoke=LoopInvokerMachine(new_machine)),
                           num_episodes=num_episodes,
                           env=env,
                           params=params)

                if sum(params.logs["ep_rewards"][-100:]) > 0:
                    print("{test}done_it".format(**locals()),
                          sum(params.logs["ep_rewards"]))

                    to_plot.append(
                        PlotParams(curve_to_draw=params.logs["ep_rewards"],
                                   label="Random" + str(seed + 1)))
                    draw_graph(
                        "pics/" + str(seed),
                        new_machine.get_graph_to_draw(
                            action_to_name_mapping=env.get_actions_as_dict()))
            except KeyError:
                print("keyError", end="")
            except AssertionError:
                print("assertion", end="")
    plot_multi(to_plot)
Пример #13
0
def part_four(env):
    with open("machines_part_three.json") as json_file:
        cluster_best_machine_mapper_str_key = json.load(json_file)
        cluster_best_machine_mapper = {}

        for key in cluster_best_machine_mapper_str_key:
            tuple_key = key
            # tuple_key = key
            tuple_key = tuple_key.replace("(", "")
            tuple_key = tuple_key.replace(")", "")
            tuple_key = tuple(map(eval, tuple_key.split(",")))
            cluster_best_machine_mapper[tuple_key] = MachineStored.from_dict(
                cluster_best_machine_mapper_str_key[key]["graph_dict"],
                env=env)
        cluster_best_machine_mapper_machine = {}
        for i in cluster_best_machine_mapper:
            cluster_best_machine_mapper_machine[
                i] = cluster_best_machine_mapper[i].get_machine()
        params = HAMParamsCommon(env)

        runner(
            ham=AutoMachineSimple(env),
            num_episodes=300,
            env=env,
            params=params,
            on_model_mapping=cluster_best_machine_mapper_machine,
            no_output=True,
        )
        for cluster in cluster_best_machine_mapper:
            ms = cluster_best_machine_mapper[cluster]
            ms.draw(filename=str(cluster))
        to_plot = list()
        to_plot.append(
            PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="clustering, same env"))
        plot_multi(to_plot, filename="a")
Пример #14
0
def runner(ham, num_episodes, env, params, no_output=None):
    ham2 = AutoMachineNoLoop(env)
    params2 = HAMParamsCommon(env)
    for i_episode in range(1, num_episodes + 1):

        env.reset()
        print("****" * 10)
        while not env.is_done():
            print(env.get_on_model())
            if i_episode % 10 >= 5:
                ham.run(params)
            else:
                pass
                ham2.run(params2)
            # print(params.previous_machine_choice_state)
        env.render()
        assert env.is_done(
        ), "The machine is STOPPED before STOP(done) of the environment"
        if i_episode % 10 == 0:
            if no_output is None:
                print("\r{ham} episode {i_episode}/{num_episodes}.".format(
                    **locals()),
                      end="")
                sys.stdout.flush()
Пример #15
0
def part_six(env):
    #

    with open("machines_part_three.json") as json_file:
        cluster_best_machine_mapper_str_key = json.load(json_file)
        ololo_mapping = {}
        ololo_to_sort = []
        for key in cluster_best_machine_mapper_str_key:
            tuple_key = key
            tuple_key = tuple_key.replace("(", "")
            tuple_key = tuple_key.replace(")", "")
            tuple_key = tuple(map(eval, tuple_key.split(",")))
            ololo_mapping[tuple_key] = MachineStored.from_dict(
                cluster_best_machine_mapper_str_key[key]["graph_dict"],
                env=env)
            ololo_to_sort.append([
                cluster_best_machine_mapper_str_key[key]["total_reward"],
                tuple_key
            ])

        best_clusters = {}

        for i in sorted(ololo_to_sort, reverse=True):
            key = i[1]
            print(key, type(key), key[0])

            # print(ololo_mapping[key])
            total_reward_a = 0
            for i in range(10):
                params = HAMParamsCommon(env)
                to_run = {}
                ss = {**best_clusters, key: ololo_mapping[key]}
                for i in ss:
                    to_run[i] = ss[i].get_machine()

                runner(
                    ham=AutoMachineSimple(env),
                    num_episodes=800,
                    env=env,
                    params=params,
                    on_model_mapping=to_run,
                )
                total_reward_a += sum(params.logs["ep_rewards"])

            total_reward_b = 0
            for i in range(10):
                to_run = {}
                ss = {**best_clusters}
                for i in ss:
                    to_run[i] = ss[i].get_machine()
                to_run = {}
                params = HAMParamsCommon(env)
                runner(
                    ham=AutoMachineSimple(env),
                    num_episodes=800,
                    env=env,
                    params=params,
                    on_model_mapping=to_run,
                )
                total_reward_b += sum(params.logs["ep_rewards"])

            if total_reward_a > total_reward_b:
                best_clusters[key] = ololo_mapping[key]
            print()
            print(total_reward_a, " ::: ", total_reward_b)
        clusters_to_save = {}
        for i in best_clusters:
            on_model_part_str = str(i)
            clusters_to_save[on_model_part_str] = best_clusters[i].to_dict()
        with open("machines_part_six.json", "w") as out_f:
            json.dump(obj=clusters_to_save, fp=out_f, sort_keys=True, indent=4)
Пример #16
0
from HAM.HAM_core import AutoBasicMachine
from HAM.HAM_experiments.HAM_utils import HAMParamsCommon, maze_world_input_01, plot_multi, ham_runner, PlotParams
from environments.grid_maze_env.maze_world_env import MazeWorld

env = MazeWorld(maze_world_input_01())
params = HAMParamsCommon(env)
ham_runner(ham=AutoBasicMachine(env), num_episodes=300, env=env, params=params)

plot_multi((PlotParams(curve_to_draw=params.logs["ep_rewards"],
                       label="HAM_basic"), ))
Пример #17
0
def part_three(env):
    with open("machines_part_two.json") as json_file:
        machines = []
        for ms_dict in json.load(json_file):
            machines.append(
                MachineStored.from_dict(graph_dict=ms_dict, env=env))

        cluster_best_result_mapper = {}
        cluster_best_machine_mapper = {}
        clusters_to_save = {}
        for on_model_part in list(reversed(env.get_all_on_model())):
            for index, ms in enumerate(machines):
                machine = ms.get_machine()
                total_reward = 0
                for tests in range(5):
                    params = HAMParamsCommon(env)

                    runner(
                        ham=AutoMachineSimple(env),
                        num_episodes=30,
                        env=env,
                        params=params,
                        on_model_mapping={on_model_part: machine},
                        no_output=True,
                    )
                    to_plot = list()
                    to_plot.append(
                        PlotParams(curve_to_draw=params.logs["ep_rewards"],
                                   label="HAM_with_pull_up"))
                    total_reward += sum(params.logs["ep_rewards"])
                # print(total_reward)
                on_model_part_str = str(on_model_part)
                if on_model_part_str in cluster_best_result_mapper:
                    if cluster_best_result_mapper[
                            on_model_part_str] < total_reward:
                        cluster_best_result_mapper[
                            on_model_part_str], cluster_best_machine_mapper[
                                on_model_part_str] = total_reward, ms.to_dict(
                                )
                        clusters_to_save[on_model_part_str] = {
                            "total_reward": total_reward,
                            "graph_dict": ms.to_dict()
                        }
                else:
                    cluster_best_result_mapper[
                        on_model_part_str], cluster_best_machine_mapper[
                            on_model_part_str] = total_reward, ms.to_dict()
                    clusters_to_save[on_model_part_str] = {
                        "total_reward": total_reward,
                        "graph_dict": ms.to_dict()
                    }
                # print('\n')
                print("****")
                ms_len = len(machines)
                print("machine {index} of {ms_len}".format(**locals()))
                print()
                for i in ms.vertex_types:
                    print(i)
                print(on_model_part_str, total_reward)
        # print(clusters_to_save)
        # exit(0)
        with open("machines_part_three.json", "w") as out_f:
            json.dump(obj=clusters_to_save, fp=out_f, sort_keys=True, indent=4)
Пример #18
0
def q_learning(env, num_episodes, eps=0.1, alpha=0.1, gamma=0.9):
    to_plot = []

    q_table = defaultdict(lambda: 0)
    bns_count = defaultdict(lambda: 0)
    V = defaultdict(lambda: None)

    for _ in tqdm(range(num_episodes)):
        ep_reward = 0
        eps *= 0.9
        s = env.reset()

        bn_added = {}
        while True:
            if np.random.rand(1) < eps:
                action = np.random.choice(env.action_space.n, size=1)[0]
            else:
                action = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n)

            next_s, reward, done, _ = env.step(action)
            a = arg_max_action(q_dict=q_table, state=s, action_space=env.action_space.n)
            # noinspection PyTypeChecker
            V[s] = (*env.decode(s), q_table[s, a])
            # making +1 to bn_counts once for each episode
            if not bn_added.get(s, False):
                bns_count[s] += 1
                bn_added[s] = True
            q_table[s, action] = (1 - alpha) * q_table[s, action] + alpha * (reward + gamma * q_table[next_s, a])

            ep_reward += reward
            if done:
                break

            s = next_s
        to_plot.append(ep_reward)
    sleep(0.1)

    def get_clusters(V, n_clusters, affinity):
        states = sorted(V.keys())
        ss = {"state": states}
        # noinspection PyTypeChecker
        for i in range(len(V[states[0]])):
            ss[str(i)] = [V[_][i] for _ in states]
        df = pd.DataFrame(ss).set_index("state")
        sc = MinMaxScaler()
        df = df.rename(index=str, columns={"0": "x", "1": "y", "2": 'V'})
        X = df[["x", "y", "V"]]
        X[["V"]] *= 0.5
        sc.fit(np.vstack((df[["x"]], df[["y"]])))
        df[["x", "y"]] = sc.transform(df[["x", "y"]])
        ag = AgglomerativeClustering(n_clusters=n_clusters, affinity=affinity)
        clustered = list(ag.fit_predict(X))
        cluster_state_mapping = {}
        for i in range(len(states)):
            cluster_state_mapping[states[i]] = clustered[i]
        return cluster_state_mapping

    # all_states = V.keys()
    n_clusters = 4
    map_state_to_cluster = get_clusters(V=V, n_clusters=n_clusters, affinity="euclidean")

    def get_bns_in_increasing_order(bns_count):
        state_count_pairs = sorted([(bns_count[_], _) for _ in bns_count], reverse=True)
        return list(map(lambda x: x[1], state_count_pairs, ))

    def get_mapping_for_cluster_to_sorted_bns(sorted_bns, map_state_to_cluster):
        res = defaultdict(lambda: list())
        for state in sorted_bns:
            res[map_state_to_cluster[state]].append(state)
        return res

    # bns = bottlenecks
    sorted_bns = get_bns_in_increasing_order(bns_count=bns_count)
    map_cluster_to_sorted_bns = get_mapping_for_cluster_to_sorted_bns(sorted_bns=sorted_bns,
                                                                      map_state_to_cluster=map_state_to_cluster)

    env.mark = {}

    for current_state in map_state_to_cluster:
        env.mark[current_state] = str(map_state_to_cluster[current_state])

    class colors:
        HEADER = '\033[95m'
        OKBLUE = '\033[94m'
        OKGREEN = '\033[92m'
        WARNING = '\033[93m'
        FAIL = '\033[91m'
        ENDC = '\033[0m'
        BOLD = '\033[1m'
        UNDERLINE = '\033[4m'

        COLOR_LIST = [HEADER, OKBLUE, OKGREEN, WARNING, FAIL]

    # draw best bns for clusters
    BNS_FOR_CLUSTER = 10
    for q in map_cluster_to_sorted_bns:
        for j in map_cluster_to_sorted_bns[q][:BNS_FOR_CLUSTER]:
            env.mark[j] = colors.COLOR_LIST[q % len(colors.COLOR_LIST)] + str(q) + colors.ENDC
    env.render()
    env.mark = {}

    def runner(hams, num_episodes, env):
        for i_episode in range(1, num_episodes + 1):
            env.reset()
            while not env.is_done():
                for ham in hams:
                    if env.s in ham.states_in_my_cluster:
                        while not env.is_done() and env.s not in ham.bns:
                            ham.machine.run(params)
                        while not env.is_done() and env.s in ham.states_in_my_cluster:
                            ham.machine.run(params)

            if i_episode % 10 == 0:
                    print("\r{ham} episode {i_episode}/{num_episodes}.".format(**locals()), end="")
                    sys.stdout.flush()

    class BnsMachine:
        def __init__(self, params, cluster_index, list_of_bns, states_in_my_cluster):
            self.machine = AutoMachineSimple(env)
            self.cluster_index = cluster_index
            self.bns = set(list_of_bns)
            self.states_in_my_cluster = states_in_my_cluster
            self.params = params

    params = HAMParamsCommon(env)
    hams = [BnsMachine(params=params, cluster_index=_, list_of_bns=map_cluster_to_sorted_bns[_][:BNS_FOR_CLUSTER], states_in_my_cluster=set(map_cluster_to_sorted_bns[_])) for _ in
            map_cluster_to_sorted_bns]


    runner(hams = hams,
           num_episodes=2000,
           env=env,
           )
    to_plot = list()
    to_plot.append(PlotParams(curve_to_draw=params.logs["ep_rewards"], label="HAM_with_pull_up"))
    plot_multi(to_plot)
    # print(params.logs["ep_rewards"])
    return to_plot, q_table