Exemplo n.º 1
0
    def playActor(self):
        self.load("NetworkParam/FinalParam")

        hdg0_rand_vec=[0,7,12]
        '''
        WIND CONDITIONS
        '''
        mean = 45 * TORAD
        std = 0.1 * TORAD
        wind_samples = 10
        w = wind(mean=mean, std=std, samples = wind_samples)

        try:
            for i in range(len(hdg0_rand_vec)):
                # Initial state
                WH = w.generateWind()
                hdg0_rand = hdg0_rand_vec[i]
                hdg0 = hdg0_rand * TORAD * np.ones(10)
                s = self.env.reset(hdg0,WH)
                episode_reward = 0
                episode_step=0
                v_episode=[]
                i_episode=[]
                while episode_step < 40: #not done:
                    if episode_step==0:
                        i_episode.append(hdg0_rand+WH[0]/TORAD-40)
                    else:
                        i_episode.append(s[0][-1]/TORAD)
                    s = np.reshape([s[0,:], s[1,:]], [self.state_size,1])

                    a, = self.sess.run(self.network.actions,
                                       feed_dict={self.network.state_ph: s[None]})
                    a = np.clip(a, self.low_bound, self.high_bound)
                    s_, r   = self.env.act(a,WH)
                    episode_reward += r
                    v_episode.append(r)
                    episode_step += 1
                    s = s_
                DISPLAYER.displayVI(v_episode,i_episode,i)
                print("Episode reward :", episode_reward," for incidence: ",hdg0_rand)

        except KeyboardInterrupt as e:
            pass

        except Exception as e:
            print("Exception :", e)

        finally:
            print("End of the demo")
Exemplo n.º 2
0
    def play(self, sess, number_run, path=''):
        print("Playing", self.name, "for", number_run, "runs")

        with sess.as_default(), sess.graph.as_default():
            hdg0_rand_vec = [0, 7, 13]
            '''
            WIND CONDITIONS
            '''
            mean = 45 * TORAD
            std = 0 * TORAD
            wind_samples = 10
            w = wind(mean=mean, std=std, samples=wind_samples)

            try:
                for i in range(number_run):

                    # Reset the local network to the global
                    if self.name != 'global':
                        sess.run(self.update_local_vars)

                    WH = w.generateWind()
                    hdg0_rand = hdg0_rand_vec[i]
                    hdg0 = hdg0_rand * TORAD * np.ones(10)
                    s = self.env.reset(hdg0, WH)
                    episode_reward = 0
                    episode_step = 0
                    v_episode = []
                    i_episode = []
                    done = False

                    #self.lstm_state = self.network.lstm_state_init

                    while (not done and episode_step < 70):
                        i_episode.append(round(s[0][-1] / TORAD))
                        s = np.reshape([s[0, :], s[1, :]],
                                       [2 * self.state_size, 1])
                        # Prediction of the policy
                        feed_dict = {self.network.inputs: [s]}
                        policy, value = sess.run(
                            [self.network.policy, self.network.value],
                            feed_dict=feed_dict)

                        policy = policy[0]

                        # Choose an action according to the policy
                        action = np.random.choice([1.5, 0, -1.5], p=policy)
                        s_, r = self.env.act(action, WH)
                        if episode_step > 12:
                            if np.mean(v_episode[-4:]) > 0.8:
                                #done=True
                                print("Done!")
                            else:
                                done = False
                        episode_reward += r
                        v_episode.append(r)
                        episode_step += 1
                        s = s_
                    DISPLAYER.displayVI(v_episode, i_episode, i)

                    print("Episode reward :", episode_reward)

            except KeyboardInterrupt as e:
                pass

            finally:
                print("End of the demo")
Exemplo n.º 3
0
    def playCritic(self):
        self.load("NetworkParam/FinalParam")

        hdg0_rand_vec=[0,7,12]
        '''
        WIND CONDITIONS
        '''
        mean = 45 * TORAD
        std = 0.1 * TORAD
        wind_samples = 10
        w = wind(mean=mean, std=std, samples = wind_samples)

        try:
            for i in range(len(hdg0_rand_vec)):
                # Initial state
                WH = w.generateWind()
                hdg0_rand = hdg0_rand_vec[i]
                hdg0 = hdg0_rand * TORAD * np.ones(10)
                s = self.env.reset(hdg0,WH)
                
                episode_reward = 0
                episode_step=0
                v_episode=[]
                i_episode=[]
                while episode_step < 30: #not done:
                    if episode_step==0:
                        i_episode.append(hdg0_rand+WH[0]/TORAD-40)
                    else:
                        i_episode.append(s[0][-1]/TORAD)
                    
                    # Critic policy
                    critic = [self.evaluate(s, -1.5),self.evaluate(s, -1.25),self.evaluate(s, -1),
                        self.evaluate(s, -0.75),self.evaluate(s, -0.5),self.evaluate(s, -0.25),self.evaluate(s, 0),self.evaluate(s, 0.25),
                            self.evaluate(s, 0.5),self.evaluate(s, 0.75),self.evaluate(s, 1),self.evaluate(s, 1.25),
                            self.evaluate(s, 1.5)]
                    a = np.argmax(critic)
                    if a == 0:
                        a = -1.5
                    if a == 1:
                        a = -1.25
                    if a == 2:
                        a = -1
                    if a == 3:
                        a = -0.75
                    if a == 4:
                        a = -0.5
                    if a == 5:
                        a = -0.25
                    if a == 6:
                        a = 0
                    if a == 7:
                        a = 0.25
                    if a == 8:
                        a = 0.5
                    if a == 9:
                        a = 0.75
                    if a == 10:
                        a = 1
                    if a == 11:
                        a = 1.25
                    if a == 12:
                        a = 1.5

                    s_, r   = self.env.act(a,WH)
                    episode_reward += r
                    v_episode.append(r)
                    episode_step += 1
                    s = s_
                DISPLAYER.displayVI(v_episode,i_episode,i+3)
                print("Episode reward :", episode_reward," for incidence: ",hdg0_rand)

        except KeyboardInterrupt as e:
            pass

        except Exception as e:
            print("Exception :", e)

        finally:
            print("End of the demo")