Exemplo n.º 1
0
    def __init__(self, sess, gui, displayer, saver):
        """
        Build a new instance of Environment, QNetwork and ExperienceBuffer.

        Args:
            sess     : the tensorflow session in which to build the network
            gui      : a GUI instance to manage the control of the agent
            displayer: a Displayer instance to keep track of the episode rewards
            saver    : a Saver instance to save periodically the network
        """
        print("Initializing the agent...")

        self.sess = sess
        self.gui = gui
        self.displayer = displayer
        self.saver = saver

        self.env = Environment()
        self.network = Network(sess)
        self.buffer = ExperienceBuffer()

        self.best_run = -1e10
        self.n_gif = 0

        print("Agent initialized !")
Exemplo n.º 2
0
    def __init__(self, game, headless=True):
        if headless:
            logging.info("Running in headless mode")

        self.actions = [
            self.build_ship, self.move_south, self.move_west, self.move_north,
            self.move_east, self.stay_still
        ]

        self.network = Network(headless, len(self.actions))
        self.observer = Observer(game, headless)

        self.reward_map = {
            self.build_ship: [0, -10],
            self.move_south: [0, -10],
            self.move_west: [0, -10],
            self.move_north: [0, -10],
            self.move_east: [0, -10],
            self.stay_still: [1, -10]
        }

        self.network.draw()

        self.observer.new_observation()
        self.observer.draw(game)
        self.state = self.observer.show()
        self.command_queue = []
Exemplo n.º 3
0
 def __init__(self, dispatch):
     self.dispatch = dispatch
     self.reset()
     self.solver = Network(OBSERVATION_SPACE, len(self.actions) - 1)
     self.state_last = agent.AgentState()
     self.state_last_last = agent.AgentState()
     self.init()
     self.solver.load_model()
     self.lastRenderingTime = time.perf_counter()
     self.episodes = 0
     self.action_memory = []
     self.state_memory = []
     self.reward_memory = []
Exemplo n.º 4
0
    def __init__(self, sess):
        print("Initializing the agent...")

        self.sess = sess
        self.env = Environment()
        self.state_size = self.env.get_state_size()[0]
        self.action_size = self.env.get_action_size()
        self.low_bound, self.high_bound = self.env.get_bounds()

        self.buffer = ExperienceBuffer()

        print("Creation of the actor-critic network")
        self.network = Network(self.state_size, self.action_size,
                               self.low_bound, self.high_bound)

        self.sess.run(tf.global_variables_initializer())
        DISPLAYER.reset()
Exemplo n.º 5
0
    def __init__(self, sess):
        print("Initializing the agent...")

        self.sess = sess
        self.env = Environment()
        self.state_size = self.env.get_state_size()[0]
        self.action_size = self.env.get_action_size()
        self.bounds = self.env.get_bounds()

        print("Creation of the actor-critic network")
        self.network = Network(self.sess, self.state_size, self.action_size,
                               self.bounds)

        self.critic_lr = settings.CRITIC_LEARNING_RATE
        self.actor_lr = settings.ACTOR_LEARNING_RATE

        self.delta_critic_lr = self.critic_lr / settings.TRAINING_EPS
        self.delta_actor_lr = self.actor_lr / settings.TRAINING_EPS

        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 6
0
    def __init__(self, sess):
        print("Initializing the agent...")

        self.sess = sess
        self.env = Environment()
        self.state_size = self.env.get_state_size()[0]
        self.action_size = self.env.get_action_size()
        self.low_bound, self.high_bound = self.env.get_bounds()

        self.buffer = PrioritizedReplayBuffer(parameters.BUFFER_SIZE,
                                              parameters.ALPHA)

        print("Creation of the actor-critic network...")
        self.network = Network(self.state_size, self.action_size,
                               self.low_bound, self.high_bound)
        print("Network created !\n")

        self.epsilon = parameters.EPSILON_START
        self.beta = parameters.BETA_START

        self.best_run = -1e10

        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 7
0
    def __init__(self, sess):
        print("Initializing the agent...")

        self.sess = sess
        self.env = Environment()
        self.state_size = self.env.get_state_size()[0]
        self.action_size = self.env.get_action_size()
        self.low_bound, self.high_bound = self.env.get_bounds()

        self.buffer = ExperienceBuffer()

        print("Creation of the actor-critic network")
        self.network = Network(self.state_size, self.action_size,
                               self.low_bound, self.high_bound)

        self.epsilon = parameters.EPSILON_START
        self.epsilon_decay = (parameters.EPSILON_START -
                              parameters.EPSILON_STOP) \
            / parameters.EPSILON_STEPS

        self.best_run = -1e10
        self.n_gif = 0

        self.sess.run(tf.global_variables_initializer())