예제 #1
0
class ShipEnv(Env):
    def __init__(self, type='continuous', action_dim=1):
        self.type = type
        self.action_dim = action_dim
        self.observation_space = spaces.Box(
            low=np.array([0, -np.pi / 2, 0, -4, -0.2]),
            high=np.array([150, np.pi / 2, 5.0, 4.0, 0.2]))
        self.init_space = spaces.Box(
            low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]),
            high=np.array([30, np.pi / 15, 1.5, 0.3, 0.01]))
        self.ship_data = None
        self.last_pos = np.zeros(3)
        self.last_action = np.zeros(self.action_dim)
        self.simulator = Simulator()
        self.point_a = (0.0, 0.0)
        self.point_b = (2000, 0.0)
        self.max_x_episode = (5000, 0)
        self.guideline = LineString([self.point_a, self.max_x_episode])
        self.start_pos = np.zeros(1)
        self.number_loop = 0  # loops in the screen -> used to plot
        self.borders = [[0, 150], [2000, 150], [2000, -150], [0, -150]]
        self.viewer = None

    def step(self, action):
        side = np.sign(self.last_pos[1])
        angle_action = action[0] * side
        rot_action = 0.2
        state_prime = self.simulator.step(angle_level=angle_action,
                                          rot_level=rot_action)
        # convert simulator states into obervable states
        obs = self.convert_state(state_prime)
        # print('Observed state: ', obs)
        dn = self.end(state_prime=state_prime, obs=obs)
        rew = self.calculate_reward(obs=obs)
        self.last_pos = [state_prime[0], state_prime[1], state_prime[2]]
        self.last_action = np.array([angle_action, rot_action])
        info = dict()
        return obs, rew, dn, info

    def convert_state(self, state):
        """
        This method generated the features used to build the reward function
        :param state: Global state of the ship
        """
        ship_point = Point((state[0], state[1]))
        side = np.sign(state[1] - self.point_a[1])
        d = ship_point.distance(self.guideline)  # meters
        theta = side * state[2]  # radians
        vx = state[3]  # m/s
        vy = side * state[4]  # m/s
        thetadot = side * state[5]  # graus/min
        obs = np.array([d, theta, vx, vy, thetadot])
        return obs

    def calculate_reward(self, obs):
        d, theta, vx, vy, thetadot = obs[0], obs[1] * 180 / np.pi, obs[2], obs[
            3], obs[4] * 180 / np.pi
        if not self.observation_space.contains(obs):
            print(
                "\n Action: %f,  State[%f %f %f], Velocidade [%f , %f] , Theta: %f, Distance: %f thetadot: %f \n"
                % (self.last_action[0], self.last_pos[0], self.last_pos[1],
                   self.last_pos[2], vx, vy, theta, d, thetadot))
            return -1000
        else:
            return 1 - 8 * np.abs(theta / 90) - np.abs(
                thetadot / 20) - 5 * np.abs(d) / 150 - np.abs(
                    vy / 4) - np.abs(vx - 2) / 2

    def end(self, state_prime, obs):
        if not self.observation_space.contains(obs) or -1 > state_prime[
                0] or state_prime[0] > self.max_x_episode[
                    0] or 160 < state_prime[1] or state_prime[1] < -160:
            if not self.observation_space.contains(obs):
                print("\n Smashed")
            if self.viewer is not None:
                self.viewer.end_episode()
            if self.ship_data is not None:
                if self.ship_data.iterations > 0:
                    self.ship_data.save_experiment(self.name_experiment)
            return True
        else:
            return False

    def set_init_space(self, low, high):
        self.init_space = spaces.Box(low=np.array(low), high=np.array(high))

    def reset(self):
        init = list(map(float, self.init_space.sample()))
        init_states = np.array([
            self.start_pos[0], init[0], init[1], init[2] * np.cos(init[1]),
            init[2] * np.sin(init[1]), 0
        ])
        self.simulator.reset_start_pos(init_states)
        self.last_pos = np.array([self.start_pos[0], init[0], init[1]])
        print('Reseting position')
        state = self.simulator.get_state()
        if self.viewer is not None:
            self.viewer.end_episode()
        return self.convert_state(state)

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = Viewer()
            self.viewer.plot_boundary(self.borders)
            self.viewer.plot_guidance_line(self.point_a, self.point_b)

        img_x_pos = self.last_pos[0] - self.point_b[0] * (self.last_pos[0] //
                                                          self.point_b[0])
        if self.last_pos[0] // self.point_b[0] > self.number_loop:
            self.viewer.end_episode()
            self.viewer.plot_position(img_x_pos, self.last_pos[1],
                                      self.last_pos[2],
                                      20 * self.last_action[0])
            self.viewer.restart_plot()
            self.number_loop += 1
        else:
            self.viewer.plot_position(img_x_pos, self.last_pos[1],
                                      self.last_pos[2],
                                      20 * self.last_action[0])

    def close(self, ):
        self.viewer.freeze_scream()
예제 #2
0
class ShipEnv(Env):
    def __init__(self, type='continuous', action_dim = 2):
        self.type = type
        self.action_dim = action_dim
        assert type == 'continuous' or type == 'discrete', 'type must be continuous or discrete'
        assert action_dim > 0 and action_dim <=2, 'action_dim must be 1 or 2'
        if type == 'continuous':
            self.action_space = spaces.Box(low=np.array([-1.0, 0]), high=np.array([1.0, 0.2]))
            self.observation_space = spaces.Box(low=np.array([0, -np.pi / 2, 0, -4, -0.2]), high=np.array([150, np.pi / 2, 4.0, 4.0, 0.2]))
            self.init_space = spaces.Box(low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 1.5, 0.3, 0.01]))
        elif type == 'discrete':
            if action_dim == 2:
                self.action_space = spaces.Discrete(63)
            else:
                self.action_space = spaces.Discrete(21)
            self.observation_space = spaces.Box(low=np.array([0, -np.pi / 2, 0, -4, -0.4]), high=np.array([150, np.pi / 2, 4.0, 4.0, 0.4]))
            self.init_space = spaces.Box(low=np.array([0, -np.pi / 15, 1.0, 0.2, -0.01]), high=np.array([30, np.pi / 15, 2.0, 0.3, 0.01]))
        self.ship_data = None
        self.name_experiment = None
        self.last_pos = np.zeros(3)
        self.last_action = np.zeros(self.action_dim)
        self.simulator = Simulator()
        self.point_a = (0.0, 0.0)
        self.point_b = (2000, 0.0)
        self.max_x_episode = (5000, 0)
        self.guideline = LineString([self.point_a, self.max_x_episode])
        self.start_pos = np.zeros(1)
        self.number_loop = 0  # loops in the screen -> used to plot
        self.borders = [[0, 150], [2000, 150], [2000, -150], [0, -150]]
        self.viewer = None
        self.test_performance = False
        self.init_test_performance = np.linspace(0, np.pi / 15, 10)
        self.counter = 0

    def step(self, action):
        # According to the action stace a different kind of action is selected
        if self.type == 'continuous' and self.action_dim == 2:
            side = np.sign(self.last_pos[1])
            angle_action = action[0]*side
            rot_action = (action[1]+1)/10
        elif self.type == 'continuous' and self.action_dim == 1:
            side = np.sign(self.last_pos[1])
            angle_action = action[0]*side
            rot_action = 0.2
        elif self.type == 'discrete' and self.action_dim == 2:
            side = np.sign(self.last_pos[1])
            angle_action = (action // 3 - 10) / 10
            angle_action = angle_action * side
            rot_action = 0.1 * (action % 3)
        elif self.type == 'discrete' and self.action_dim == 1:
            side = np.sign(self.last_pos[1])
            angle_action = (action - 10) / 10
            angle_action = angle_action * side
            rot_action = 0.2
        state_prime = self.simulator.step(angle_level=angle_action, rot_level=rot_action)
        # convert simulator states into obervable states
        obs = self.convert_state(state_prime)
        # print('Observed state: ', obs)
        dn = self.end(state_prime=state_prime, obs=obs)
        rew = self.calculate_reward(obs=obs)
        self.last_pos = [state_prime[0], state_prime[1], state_prime[2]]
        self.last_action = np.array([angle_action, rot_action])
        if self.ship_data is not None:
            self.ship_data.new_transition(state_prime, obs, self.last_action, rew)
        info = dict()
        return obs, rew, dn, info

    def convert_state(self, state):
        """
        This method generated the features used to build the reward function
        :param state: Global state of the ship
        """
        ship_point = Point((state[0], state[1]))
        side = np.sign(state[1] - self.point_a[1])
        d = ship_point.distance(self.guideline)  # meters
        theta = side*state[2]  # radians
        vx = state[3]  # m/s
        vy = side*state[4]  # m/s
        thetadot = side * state[5]  # graus/min
        obs = np.array([d, theta, vx, vy, thetadot])
        return obs

    def calculate_reward(self, obs):
        d, theta, vx, vy, thetadot = obs[0], obs[1]*180/np.pi, obs[2], obs[3], obs[4]*180/np.pi
        if self.last_pos[0] > 5000:
            print("\n Got there")
        if not self.observation_space.contains(obs):
            return -1000
        else:
            return (4*(vx-1.5) + 5*(1-d/20) + 2*(1-vy**2/10) + 5*(1-np.abs(theta/30)) + 3*(1 - np.abs(thetadot)/12)) / 24

    def end(self, state_prime, obs):
        if not self.observation_space.contains(obs) or -1 > state_prime[0] or state_prime[0] > self.max_x_episode[0] or 160 < state_prime[1] or state_prime[1]< -160:
            if not self.observation_space.contains(obs):
                print("\n Smashed")
            if self.viewer is not None:
                self.viewer.end_episode()
            if self.ship_data is not None:
                if self.ship_data.iterations > 0:
                    self.ship_data.save_experiment(self.name_experiment)
            return True
        else:
            return False

    def set_init_space(self, low, high):
        self.init_space = spaces.Box(low=np.array(low), high=np.array(high))

    def reset(self):
        init = list(map(float, self.init_space.sample()))
        if self.test_performance:
            angle = self.init_test_performance[self.counter]
            v_lon = 1.5
            init_states = np.array([self.start_pos[0], 30, angle, v_lon * np.cos(angle), v_lon * np.sin(angle), 0])
            self.counter += 1
            init[0] = 30
            init[1] = angle
        else:
            init_states = np.array([self.start_pos[0], init[0], init[1], init[2] * np.cos(init[1]), init[2] * np.sin(init[1]), 0])
        self.simulator.reset_start_pos(init_states)
        self.last_pos = np.array([self.start_pos[0], init[0],  init[1]])
        print('Reseting position')
        state = self.simulator.get_state()
        if self.ship_data is not None:
            if self.ship_data.iterations > 0:
                self.ship_data.save_experiment(self.name_experiment)
            self.ship_data.new_iter(state, self.convert_state(state), np.zeros(len(self.last_action)), np.array([0]))
        if self.viewer is not None:
            self.viewer.end_episode()
        return self.convert_state(state)

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = Viewer()
            self.viewer.plot_boundary(self.borders)
            self.viewer.plot_guidance_line(self.point_a, self.point_b)

        img_x_pos = self.last_pos[0] - self.point_b[0] * (self.last_pos[0] // self.point_b[0])
        if self.last_pos[0]//self.point_b[0] > self.number_loop:
            self.viewer.end_episode()
            self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0])
            self.viewer.restart_plot()
            self.number_loop += 1
        else:
            self.viewer.plot_position(img_x_pos, self.last_pos[1], self.last_pos[2], 20 * self.last_action[0])

    def close(self, ):
        self.viewer.freeze_scream()

    def set_save_experice(self, name='experiment_ssn_ddpg_10iter'):
        assert type(name) == type(""), 'name must be a string'
        self.ship_data = ShipExperiment()
        self.name_experiment = name

    def set_test_performace(self):
        self.test_performance = True