예제 #1
0
파일: play.py 프로젝트: bic4907/mario_rl
    def run(self):
        super(MarioEnv, self).run()

        self.env = gym_super_mario_bros.make(self.env_id)
        self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT)
        self.reset()
        print('[ Worker %2d ] ' % (self.idx), end='')
        print('Playing <', self.env_id, '>')

        self.request_action(0, False)

        while True:
            action = self.child_conn.recv()
            next_state, reward, done, info = self.env.step(action)
            self.steps += 1
            self.accum_reward += reward
            next_state = rgb2dataset(next_state)

            if self.is_render and self.idx == 0:
                self.env.render()

            # make a transition
            self.transition.append(next_state)
            if len(self.transition) > 4:
                self.transition.pop(0)

            if done:
                self.send_result(info['x_pos'])
                self.reset()
                self.request_action(reward, True)
            else:
                self.request_action(reward, False)
예제 #2
0
파일: train.py 프로젝트: bic4907/mario_rl
    def run(self):
        super(MarioEnv, self).run()

        self.model = A3C(
            self.s_dim,
            self.a_dim,
            gamma=0.95,
            epsilon_start=1.0,
            epsilon_end=0.1,
            epsilon_length=100000,
            use_cuda=self.use_cuda,
        )
        self.model.l_net.load_state_dict(self.g_net.state_dict())

        self.env = gym_super_mario_bros.make(self.env_id)
        self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT)
        self.reset()
        print('[ Worker %2d ] ' % (self.idx), end='')
        print('Playing <', self.env_id, '>')

        while True:
            if len(self.transition) != 4:
                action = self.model.get_action(self.transition, is_random=True)
            else:
                action = self.model.get_action(self.transition,
                                               is_random=False)

            next_state, reward, done, info = self.env.step(action)
            self.steps += 1
            self.accum_reward += reward
            next_state = rgb2dataset(next_state)

            if self.is_render and self.idx == 0:
                self.env.render()

            self.buffer_state.append(self.transition)
            self.buffer_action.append(action)
            self.buffer_reward.append(reward)

            if len(self.buffer_state
                   ) > 0 and self.steps % self.update_iter == 0:
                next_transition = self.transition[1:]
                next_transition.append(next_state)

                self.train(next_transition, done)

                self.buffer_state.clear()
                self.buffer_action.clear()
                self.buffer_reward.clear()

            # make a transition
            self.transition.append(next_state)
            if len(self.transition) > 4:
                self.transition.pop(0)

            if done:
                self.send_result(info['x_pos'])
                self.reset()
예제 #3
0
파일: play.py 프로젝트: bic4907/mario_rl
    def reset(self):
        state = self.env.reset()
        state = rgb2dataset(state)
        self.transition.clear()
        self.transition.append(state)

        self.steps = 0
        self.episodes += 1
        self.accum_reward = 0
예제 #4
0
파일: train.py 프로젝트: bic4907/mario_rl
    def reset(self):
        state = self.env.reset()
        state = rgb2dataset(state)

        self.transition = np.zeros([4, 84, 84])
        self.transition[-1, :] = state

        self.steps = 0
        self.episodes += 1
        self.accum_reward = 0
예제 #5
0
파일: train.py 프로젝트: bic4907/mario_rl
    def run(self):
        super(MarioEnv, self).run()

        self.env = gym_super_mario_bros.make(self.env_id)
        self.env = BinarySpaceToDiscreteSpaceEnv(self.env, SIMPLE_MOVEMENT)
        self.reset()
        print('[ Worker %2d ] ' % (self.idx), end='')
        print('Playing <', self.env_id, '>')

        self.request_action(0, False)

        while True:
            action = self.child_conn.recv()
            #            print(SIMPLE_MOVEMENT[action])
            next_state, reward, done, info = self.env.step(action)

            force_done = False
            if reward == -15:
                force_done = True

            self.steps += 1
            self.accum_reward += reward
            next_state = rgb2dataset(next_state)

            if self.is_render and self.idx == 0:
                self.env.render()

            # make a transition
            self.transition[:3, :, :] = self.transition[1:, :, :]
            self.transition[3, :, :] = next_state

            if done:
                self.send_result(self.prev_xpos)

                self.reset()
                self.request_action(reward, force_done)
            else:
                self.request_action(reward, force_done)
            self.prev_xpos = info['x_pos']
예제 #6
0
파일: train.py 프로젝트: bic4907/mario_rl
                epsilon_end=0.01,
                epsilon_length=300000,
                use_cuda=use_cuda,
                lr=lr,
                replay_buffer_size=replay_buffer_size,
                train_start_step=train_start_step,
                batch_size=batch_size,
                target_update_interval=target_update_interval,
                train_step_interval=train_step_interval)

    writer = SummaryWriter(log_dir='runs/ddqn')

    while model.episode < max_episode:

        state = env.reset()
        state = rgb2dataset(state)
        model.episode += 1
        accum_reward = 0

        # Transition
        transition = []
        transition.append(state)

        while True:
            if len(transition) == 4:
                action = model.get_action(transition, is_random=False)
            else:
                action = model.get_action(transition, is_random=True)

            state_, reward, done, info = env.step(action)