Exemplo n.º 1
0
 def __init__(self, boundaries, nparams, type, is_dynamic):
     """
     :param boundaries: Domain boundaries
     :param nparams: Dimension of the expected ld_params
     :param type: Type can be 'cont_sline', 'cont_spline*' 'disc_spline' or 'kernel_traj'
     :type is_dynamic: wether or not the time dimension has to be included in computations.
     """
     self.nparams = nparams
     self.type = type
     self.sampling_step_nb = 5
     if self.type != 'disc_spline' and self.type != 'cont_spline' and \
                     self.type != 'cont_spline*' and self.type != 'kernel_traj':
         raise Exception('Unrecognized action provider type {}'.format(
             self.type))
     self.simulator = Simulator(boundaries)
Exemplo n.º 2
0
def qlearning_rewards(alpha=0.1,
                      epsilon=0.1,
                      num_episodes=10001,
                      interval=100):
    # Create the simulator
    sim = Simulator()

    return qlearning(sim,
                     alpha=alpha,
                     num_episodes=num_episodes,
                     interval=interval,
                     epsilon=epsilon)
Exemplo n.º 3
0
def sarsa_rewards(k=1,
                  alpha=0.1,
                  epsilon=0.1,
                  num_episodes=10001,
                  interval=100,
                  decay=None):
    # Create the simulator
    sim = Simulator()

    return sarsa(sim,
                 k=k,
                 alpha=alpha,
                 num_episodes=num_episodes,
                 interval=interval,
                 initial_epsilon=epsilon,
                 decay=decay)
Exemplo n.º 4
0
def tdlambda_rewards(l=0.5,
                     alpha=0.1,
                     epsilon=0.1,
                     num_episodes=10001,
                     interval=1000,
                     decay=None):
    # Create the simulator
    sim = Simulator()

    return tdlambda(sim,
                    l=l,
                    alpha=alpha,
                    initial_epsilon=epsilon,
                    num_episodes=num_episodes,
                    interval=interval,
                    decay=decay)
Exemplo n.º 5
0
    while not done:
        s = state.state_rep()
        action = index_table(PItable, s)
        states.append(s)

        # Take action
        state, reward, done = sim.step(state, action)

    assert (len(states) > 0)

    return states, reward


if __name__ == '__main__':
    # Create the simulator
    sim = Simulator()

    # Running Parameters
    runs = 1000
    num_episodes = 1000  # Number of episodes in each run
    PItable = create_pi_table()
    plot = 'plots/MC'  # Directory where to save plots
    firstVisit = True

    # Visualizing the policy
    plot_PItable(PItable,
                 title='Monte-Carlo policy',
                 path=plot,
                 name='MC Policy')

    Qtable = create_q_table()
Exemplo n.º 6
0
class ActionProvider:
    """
    Interfaces between the behaviors algorithms and the several action types.
    Actions can be discrete splines (disc_spline), continuous splines (cont_spline or cont_spline* to use discrete
    spline initialization), or Kernel trajectories (kernel_traj).
    """
    def __init__(self, boundaries, nparams, type, is_dynamic):
        """
        :param boundaries: Domain boundaries
        :param nparams: Dimension of the expected ld_params
        :param type: Type can be 'cont_sline', 'cont_spline*' 'disc_spline' or 'kernel_traj'
        :type is_dynamic: wether or not the time dimension has to be included in computations.
        """
        self.nparams = nparams
        self.type = type
        self.sampling_step_nb = 5
        if self.type != 'disc_spline' and self.type != 'cont_spline' and \
                        self.type != 'cont_spline*' and self.type != 'kernel_traj':
            raise Exception('Unrecognized action provider type {}'.format(
                self.type))
        self.simulator = Simulator(boundaries)

    def get_low_dim(self, action):
        """
        Returns a low dimension representation of the action.
        :param action: the action to get a low dimension representation from.
        :return: a list of low dimension parameters.
        """
        return action.ld_params

    def get_boundary_discount(self, p):
        """
        Retrieves the UCB exploration discount for being near the domain boundaries.
        :param p: the Pose or list of Poses at which to compute the discount.
        :return: a discount factor (number between 0 and 1).
        """
        return map(self.simulator.get_boundary_discount, p)

    def sample_trajectory(self, action, sampling_step_nb=0):
        """
        Samples the given trajectory at multiple times.
        :param action: action to sample.
        :param sampling_step_nb: Number of samples in trajectory, 0 means using default class value. (default 0)
        :return: a list of Poses.
        """
        if sampling_step_nb == 0:
            sampling_step_nb = self.sampling_step_nb
        if self.type == 'disc_spline' or self.type == 'cont_spline' or self.type == 'cont_spline*':
            return self.simulator.sample_array_trajectory(
                action, sampling_step_nb)
        elif self.type == 'kernel_traj':
            return map(self.simulator.restrict_to_bounds,
                       action.get_samp_traj(sampling_step_nb))

    def sample_trajectory_at(self, action, time):
        """
        Samples the given trajectory at a specific time.
        :param action: action to sample.
        :param time: time at which to sample.
        :return: a Pose.
        """
        if self.type == 'disc_spline' or self.type == 'cont_spline' or self.type == 'cont_spline*':
            return self.simulator.sample_trajectory_at(action, time)
        elif self.type == 'kernel_traj':
            return self.simulator.restrict_to_bounds(action.samp_traj_at(time))

    def build_action_from_params(self, ld_params, pose):
        """
        Constructs an action from low dimension parameters.
        :param ld_params: a list of low dimensions parameters.
        :param pose: a starting Pose.
        :return: an action.
        """
        if self.type == 'disc_spline':
            return DiscreteSplines2D.get_splines(pose)
        elif self.type == 'cont_spline' or self.type == 'cont_spline*':
            return Splines2D.gen_spline_from_lowdim(ld_params, pose)
        elif self.type == 'kernel_traj':
            return KernelTraj(ld_params, pose)