Python DISPLAYER.displayVI примеры использования

Язык программирования: Python

Пространство имен/Пакет: Displayer

Класс/Тип: DISPLAYER

Метод/Функция: displayVI

Примеров на hotexamples.com: 3

Python DISPLAYER.displayVI - 3 примера найдено. Это лучшие примеры Python кода для Displayer.DISPLAYER.displayVI, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

add_reward(10)

displayVI(3)

disp(2)

dispR(1)

disp_all(1)

disp_one(1)

disp_q(1)

disp_seq(1)

reset(1)

Пример #1

Показать файл

Файл: Agent.py Проект: guillemaru/IBOAT_RL

    def playActor(self):
        self.load("NetworkParam/FinalParam")

        hdg0_rand_vec=[0,7,12]
        '''
        WIND CONDITIONS
        '''
        mean = 45 * TORAD
        std = 0.1 * TORAD
        wind_samples = 10
        w = wind(mean=mean, std=std, samples = wind_samples)

        try:
            for i in range(len(hdg0_rand_vec)):
                # Initial state
                WH = w.generateWind()
                hdg0_rand = hdg0_rand_vec[i]
                hdg0 = hdg0_rand * TORAD * np.ones(10)
                s = self.env.reset(hdg0,WH)
                episode_reward = 0
                episode_step=0
                v_episode=[]
                i_episode=[]
                while episode_step < 40: #not done:
                    if episode_step==0:
                        i_episode.append(hdg0_rand+WH[0]/TORAD-40)
                    else:
                        i_episode.append(s[0][-1]/TORAD)
                    s = np.reshape([s[0,:], s[1,:]], [self.state_size,1])

                    a, = self.sess.run(self.network.actions,
                                       feed_dict={self.network.state_ph: s[None]})
                    a = np.clip(a, self.low_bound, self.high_bound)
                    s_, r   = self.env.act(a,WH)
                    episode_reward += r
                    v_episode.append(r)
                    episode_step += 1
                    s = s_
                DISPLAYER.displayVI(v_episode,i_episode,i)
                print("Episode reward :", episode_reward," for incidence: ",hdg0_rand)

        except KeyboardInterrupt as e:
            pass

        except Exception as e:
            print("Exception :", e)

        finally:
            print("End of the demo")

Пример #2

Показать файл

    def play(self, sess, number_run, path=''):
        print("Playing", self.name, "for", number_run, "runs")

        with sess.as_default(), sess.graph.as_default():
            hdg0_rand_vec = [0, 7, 13]
            '''
            WIND CONDITIONS
            '''
            mean = 45 * TORAD
            std = 0 * TORAD
            wind_samples = 10
            w = wind(mean=mean, std=std, samples=wind_samples)

            try:
                for i in range(number_run):

                    # Reset the local network to the global
                    if self.name != 'global':
                        sess.run(self.update_local_vars)

                    WH = w.generateWind()
                    hdg0_rand = hdg0_rand_vec[i]
                    hdg0 = hdg0_rand * TORAD * np.ones(10)
                    s = self.env.reset(hdg0, WH)
                    episode_reward = 0
                    episode_step = 0
                    v_episode = []
                    i_episode = []
                    done = False

                    #self.lstm_state = self.network.lstm_state_init

                    while (not done and episode_step < 70):
                        i_episode.append(round(s[0][-1] / TORAD))
                        s = np.reshape([s[0, :], s[1, :]],
                                       [2 * self.state_size, 1])
                        # Prediction of the policy
                        feed_dict = {self.network.inputs: [s]}
                        policy, value = sess.run(
                            [self.network.policy, self.network.value],
                            feed_dict=feed_dict)

                        policy = policy[0]

                        # Choose an action according to the policy
                        action = np.random.choice([1.5, 0, -1.5], p=policy)
                        s_, r = self.env.act(action, WH)
                        if episode_step > 12:
                            if np.mean(v_episode[-4:]) > 0.8:
                                #done=True
                                print("Done!")
                            else:
                                done = False
                        episode_reward += r
                        v_episode.append(r)
                        episode_step += 1
                        s = s_
                    DISPLAYER.displayVI(v_episode, i_episode, i)

                    print("Episode reward :", episode_reward)

            except KeyboardInterrupt as e:
                pass

            finally:
                print("End of the demo")

Пример #3

Показать файл

Файл: Agent.py Проект: guillemaru/IBOAT_RL

    def playCritic(self):
        self.load("NetworkParam/FinalParam")

        hdg0_rand_vec=[0,7,12]
        '''
        WIND CONDITIONS
        '''
        mean = 45 * TORAD
        std = 0.1 * TORAD
        wind_samples = 10
        w = wind(mean=mean, std=std, samples = wind_samples)

        try:
            for i in range(len(hdg0_rand_vec)):
                # Initial state
                WH = w.generateWind()
                hdg0_rand = hdg0_rand_vec[i]
                hdg0 = hdg0_rand * TORAD * np.ones(10)
                s = self.env.reset(hdg0,WH)
                
                episode_reward = 0
                episode_step=0
                v_episode=[]
                i_episode=[]
                while episode_step < 30: #not done:
                    if episode_step==0:
                        i_episode.append(hdg0_rand+WH[0]/TORAD-40)
                    else:
                        i_episode.append(s[0][-1]/TORAD)
                    
                    # Critic policy
                    critic = [self.evaluate(s, -1.5),self.evaluate(s, -1.25),self.evaluate(s, -1),
                        self.evaluate(s, -0.75),self.evaluate(s, -0.5),self.evaluate(s, -0.25),self.evaluate(s, 0),self.evaluate(s, 0.25),
                            self.evaluate(s, 0.5),self.evaluate(s, 0.75),self.evaluate(s, 1),self.evaluate(s, 1.25),
                            self.evaluate(s, 1.5)]
                    a = np.argmax(critic)
                    if a == 0:
                        a = -1.5
                    if a == 1:
                        a = -1.25
                    if a == 2:
                        a = -1
                    if a == 3:
                        a = -0.75
                    if a == 4:
                        a = -0.5
                    if a == 5:
                        a = -0.25
                    if a == 6:
                        a = 0
                    if a == 7:
                        a = 0.25
                    if a == 8:
                        a = 0.5
                    if a == 9:
                        a = 0.75
                    if a == 10:
                        a = 1
                    if a == 11:
                        a = 1.25
                    if a == 12:
                        a = 1.5

                    s_, r   = self.env.act(a,WH)
                    episode_reward += r
                    v_episode.append(r)
                    episode_step += 1
                    s = s_
                DISPLAYER.displayVI(v_episode,i_episode,i+3)
                print("Episode reward :", episode_reward," for incidence: ",hdg0_rand)

        except KeyboardInterrupt as e:
            pass

        except Exception as e:
            print("Exception :", e)

        finally:
            print("End of the demo")