예제 #1
0
    def __init__(self,metadata):
        self.gridworldwindow = GridWorldWindow(metadata=metadata)
        self.mdp = GridMDP(metadata=metadata)

        self.gridworldwindow.btn_value_iteration_1_step.configure(command=self._value_iteration_1_step)
        self.gridworldwindow.btn_value_iteration_100_steps.configure(command=self._value_iteration_100_steps)
        self.gridworldwindow.btn_value_iteration_slow.configure(command=self._value_iteration_slow)
        self.gridworldwindow.btn_policy_iteration_1_step.configure(command=self._policy_iteration_1_step)
        self.gridworldwindow.btn_policy_iteration_100_steps.configure(command=self._policy_iteration_100_steps)
        self.gridworldwindow.btn_policy_iteration_slow.configure(command=self._policy_iteration_slow)

        self.gridworldwindow.btn_reset.configure(command=self._reset_grid)
예제 #2
0
class ViewController(object):
    def __init__(self, metadata):
        self.gridworld = GridWorldWindow(metadata=metadata)
        self.mdp = GridMDP(metadata=metadata)

        # bind buttons
        self.gridworld.btn_value_iteration_1_step.configure(
            command=self._value_iteration_1_step)
        self.gridworld.btn_value_iteration_100_steps.configure(
            command=self._value_iteration_100_steps)
        self.gridworld.btn_value_iteration_slow.configure(
            command=self._value_iteration_slow)
        self.gridworld.btn_policy_iteration_1_step.configure(
            command=self._policy_iteration_1_step)
        self.gridworld.btn_policy_iteration_100_steps.configure(
            command=self._policy_iteration_100_steps)
        self.gridworld.btn_policy_iteration_slow.configure(
            command=self._policy_iteration_slow)

        self.gridworld.btn_reset.configure(command=self._reset_grid)

    def _value_iteration_1_step(self):
        values = value_iteration(self.mdp.values, self.mdp, num_iter=1)
        policy = policy_extraction(values, self.mdp)
        self.gridworld.update_grid(values, policy)
        self.mdp.update_values(values)
        self.mdp.update_policy(policy)

    def _value_iteration_100_steps(self):
        values = value_iteration(self.mdp.values, self.mdp, num_iter=100)
        policy = policy_extraction(values, self.mdp)
        self.gridworld.update_grid(values, policy)
        self.mdp.update_values(values)
        self.mdp.update_policy(policy)

    def _value_iteration_slow(self):
        # run one iteration of value iteration at a time
        old_values = dict(self.mdp.values)
        for i in range(100):
            values = value_iteration(self.mdp.values, self.mdp, num_iter=1)
            policy = policy_extraction(values, self.mdp)
            self.gridworld.update_grid(values, policy)
            self.mdp.update_values(values)
            self.mdp.update_policy(policy)

            self.gridworld.window.update()
            time.sleep(0.25)
            self.gridworld.window.update()

            new_values = dict(values)
            if values_converged(new_values, old_values):
                break

            old_values = new_values
        self.gridworld.show_dialog(
            'Value Iteration has converged in {} steps!'.format(i + 1))

    def _policy_iteration_1_step(self):
        policy, values = policy_iteration(self.mdp.policy,
                                          self.mdp,
                                          num_iter=1)
        self.gridworld.update_grid(values, policy)
        self.mdp.update_values(values)
        self.mdp.update_policy(policy)

    def _policy_iteration_100_steps(self):
        policy_iteration(self.mdp, num_iter=100)
        self.gridworld.update_grid(self.mdp.values, self.mdp.policy)

    def _policy_iteration_slow(self):
        # run one iteration of policy iteration at a time
        old_policy = dict(self.mdp.policy)
        for i in range(100):
            policy_iteration(self.mdp, num_iter=1)
            self.gridworld.update_grid(self.mdp.values, self.mdp.policy)
            self.gridworld.window.update()
            time.sleep(0.25)
            self.gridworld.window.update()

            new_policy = dict(self.mdp.policy)
            if policy_converged(new_policy, old_policy):
                break

            old_policy = new_policy
        self.gridworld.show_dialog(
            'Policy Iteration has converged in {} steps!'.format(i + 1))

    def _reset_grid(self):
        self.mdp.clear()
        self.gridworld.clear()

    def run(self):
        # main UI loop
        self.gridworld.run()