示例#1
0
    def __init__(self, frame_size, skip_frames, stack_size):
        self.env = gym_super_mario_bros.make('SuperMarioBrosRandomStages-v0')
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)
        self.agent = None
        self.frame_size = frame_size
        self.stack_size = stack_size
        self.action_size = self.env.action_space.n
        self.skip_frames = skip_frames
        self.render = False
        self.state_generator = StateGenerator(self.frame_size, self.stack_size)

        self.env.reset()
        raw_state, _, _, self.info = self.env.step(0)
        self.state = self.state_generator.get_stacked_frames(raw_state, True)

        self.states = []
        self.policies = []
        self.actions = []
        self.rewards = []
        self.values = []
        self.dones = []

        self.episode = 0
        self.episodeReward = 0
        self.maxEpisodeReward = 0
        self.current_episode_reward = 0

        self.done = False
示例#2
0
    def solver_DFS(self):

        if self.capacity >= sum(weights):

            return (StateGenerator(0, [1]*len(self.prices), self.prices, self.weights))

        max_index = len(self.prices)  # the len of items is constant, I don't want to calculate it in every loop

        root_state = StateGenerator(0, [0] * len(self.prices), self.prices, self.weights)
        best_state = root_state
        stack = [root_state]
        counter = 0
        while len(stack) > 0:
            current_state = stack.pop()
            index = current_state.index

            if current_state.value > best_state.value:
                best_state = current_state

            if index < max_index:
                if current_state.weight + self.items[index][1] <= self.capacity:        # If possible take in the item
                    next_taken = current_state.get_NextState()

                    stack.append(next_taken)

                next_nonTaken = StateGenerator(index+1, current_state.taken, self.prices, self.weights)
                stack.append(next_nonTaken)     # anyway, try the next state, without taking the item

                counter += 1
        print(counter)
        return best_state
示例#3
0
    def solver_BFS(self):   # the only difference is that I always take the first item and remove it of the queue

        if self.capacity >= sum(weights):

            return (StateGenerator(0, [1]*len(self.prices), self.prices, self.weights))

        max_index = len(self.prices)  # the len of items is constant, I don't want to calculate it in every loop
        root_state = StateGenerator(0, [0] * len(self.prices), self.prices, self.weights)
        best_state = root_state
        queue = [root_state]

        while len(queue) > 0:
            current_state = queue.pop(0)

            if current_state.value > best_state.value:
                best_state = current_state

            index = current_state.index

            if index < max_index:
                if current_state.weight + self.items[index][1] <= self.capacity:        # If possible take in the item
                    next_taken = current_state.get_NextState()

                    queue.append(next_taken)

                next_nonTaken = StateGenerator(index+1, current_state.taken, self.prices, self.weights)
                queue.append(next_nonTaken)     # anyway, try the next state, without taking the item

        return best_state
示例#4
0
文件: main.py 项目: Braamling/KR2016
def create_graph():
    init_logging()

    generator = StateGenerator()
    states, paths = generator.generate()

    pprint.pprint(states)
    pprint.pprint(paths)

    graph = Graph(states, paths)
    graph.render_graph()
class TestStateGenerator(unittest.TestCase):
    def setUp(self):
        prices = [5, 10, 25]
        weights = [1, 2, 3]
        wrong_prices = [1, 2]

        self.empty_state = StateGenerator(0, [0] * len(prices), prices,
                                          weights)  # empty beginning state
        self.end_state = StateGenerator(
            2, [0, 0, 1], prices, weights)  # state which can't be developed
        self.state1 = StateGenerator(
            1, [0, 1, 0], prices,
            weights)  # common state with already explored index

        self.a = StateGenerator(0, [0] * len(prices), prices, weights)

    def test_get_NextState_true(self):

        self.assertEqual([1, 0, 0], self.empty_state.get_NextState().taken)
        self.assertEqual(1, self.empty_state.get_NextState().index)
        self.assertEqual([0, 1, 1], self.state1.get_NextState().taken)

        self.empty_state.get_NextState(
        )  # checking if our start state became unchanged
        self.assertEqual(0, self.empty_state.index)

    def test_get_NextState_None(self):
        self.assertEqual(
            None,
            self.empty_state.get_NextState().get_NextState().get_NextState().
            get_NextState())
        self.assertEqual(None, self.state1.get_NextState().get_NextState())
        self.assertEqual(None, self.end_state.get_NextState())
示例#6
0
    def solver_optimized(self):

        if self.capacity >= sum(self.weights):

            return(StateGenerator(0, [1]*len(self.items), self.prices, self.weights))

        max_index = len(self.prices)  # the len of items is constant, I don't want to calculate it in every loop

        root_state = StateGenerator(0, [0] * len(self.prices), self.prices, self.weights)
        best_state = root_state

        stack = [root_state]
        counter = 0        # keeping track of the number of states I had to traversed, I have it here just to se if the
                            # optimalization has en effect on solving the problem

        while len(stack) > 0:
            current_state = stack.pop()
            index = current_state.index
            if current_state.value > best_state.value:
                best_state = current_state

            if index < max_index:
                if current_state.weight + self.items[index][1] <= self.capacity:        # If possible take in the item
                    next_taken = current_state.get_NextState()

                    if self.upper_bound(index + 1, next_taken.value, next_taken.weight) > best_state.value:
                        stack.append(next_taken)  # continue with developing this state only if it has a higher estimation
                                                  # than best_state's value
                    if next_taken.value > best_state.value:
                        best_state = next_taken

                next_nonTaken = StateGenerator(index+1, current_state.taken, self.prices, self.weights)

                # anyway, try the state, where you don't take in the item and its upper bound

                if self.upper_bound(index + 1, next_nonTaken.value, next_nonTaken.weight) > best_state.value:
                    stack.append(next_nonTaken)

                counter += 1

        print(counter)
        return best_state
    def setUp(self):
        prices = [5, 10, 25]
        weights = [1, 2, 3]
        wrong_prices = [1, 2]

        self.empty_state = StateGenerator(0, [0] * len(prices), prices,
                                          weights)  # empty beginning state
        self.end_state = StateGenerator(
            2, [0, 0, 1], prices, weights)  # state which can't be developed
        self.state1 = StateGenerator(
            1, [0, 1, 0], prices,
            weights)  # common state with already explored index

        self.a = StateGenerator(0, [0] * len(prices), prices, weights)
示例#8
0
from memory_db import MemoryDB
from state_generator import StateGenerator
from training_parameters import esplison, esplison_decay, gamma, input_size, frame_size, stack_size, max_steps, render, max_episodes, sample_size, epoch, esplison, esplison_decay, experiences_before_training, training_before_update_target, e, a, beta, beta_increment_per_sampling, capacity, max_priority

if __name__ == "__main__":
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
    print(SIMPLE_MOVEMENT, env.action_space.n)
    # get size of state and action from environment
    state_size = env.observation_space.shape
    action_size = env.action_space.n
    memorydb_instance = MemoryDB(e, a, beta, beta_increment_per_sampling,
                                 capacity, max_priority)
    agent_instance = DQNAgent(input_size, action_size, esplison,
                              esplison_decay, True)
    state_generator_instance = StateGenerator(frame_size, stack_size)
    scores, episodes = [], []

    for e in range(max_episodes):
        done = False
        score = 0
        raw_state = env.reset()
        state = state_generator_instance.get_stacked_frames(raw_state, True)

        steps = 0  # up to 500

        while not done and steps < max_steps:
            if render:  # if True
                env.render()
            steps += 1
            # get e greedy action
示例#9
0
class EnvWrapper():
    def __init__(self, frame_size, skip_frames, stack_size):
        self.env = gym_super_mario_bros.make('SuperMarioBrosRandomStages-v0')
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)
        self.agent = None
        self.frame_size = frame_size
        self.stack_size = stack_size
        self.action_size = self.env.action_space.n
        self.skip_frames = skip_frames
        self.render = False
        self.state_generator = StateGenerator(self.frame_size, self.stack_size)

        self.env.reset()
        raw_state, _, _, self.info = self.env.step(0)
        self.state = self.state_generator.get_stacked_frames(raw_state, True)

        self.states = []
        self.policies = []
        self.actions = []
        self.rewards = []
        self.values = []
        self.dones = []

        self.episode = 0
        self.episodeReward = 0
        self.maxEpisodeReward = 0
        self.current_episode_reward = 0

        self.done = False

    def step(self, n):
        for _ in range(n):
            policy, value = self.agent.get_actions_and_values(
                np.array([self.state]))
            action = np.random.choice(self.action_size, p=np.squeeze(policy))
            reward = 0

            for i in range(0, self.skip_frames):
                raw_state, frame_reward, done, info = self.env.step(action)
                if frame_reward == -15 or done:
                    self.episode += 1
                    done = True
                    if frame_reward == -15:
                        reward = -15 * self.skip_frames
                    else:
                        reward = 15 * self.skip_frames

                    raw_state = self.env.reset()

                    break
                else:
                    reward += frame_reward
                    reward += (5 if
                               (info["score"] - self.info["score"]) > 0 else 0)

            reward /= (15 * self.skip_frames)

            self.current_episode_reward += reward

            next_state = self.state_generator.get_stacked_frames(
                raw_state, done, frame_reward == 15
                or (done and self.episode % 100 == 0),
                self.current_episode_reward)

            self.states.append(self.state)
            self.policies.append(np.squeeze(policy))
            self.actions.append(action)
            self.rewards.append(reward)
            self.values.append(np.squeeze(value))
            self.dones.append(done)

            self.state = next_state
            self.done = done
            self.info = info

            if self.done:
                self.episodeReward = self.current_episode_reward

                if self.maxEpisodeReward < self.episodeReward:
                    self.maxEpisodeReward = self.episodeReward

                self.current_episode_reward = 0

    def get_experiences(self):
        if self.done:
            next_state_value = 0
        else:
            next_state_value = np.squeeze(
                self.agent.get_value(np.array([self.state])))

        states = self.states
        actions = self.actions
        policies = self.policies
        rewards = self.rewards
        values = self.values
        dones = [1 if done else 0 for done in self.dones]
        next_values = values[1:] + [next_state_value]

        self.states = []
        self.policies = []
        self.actions = []
        self.rewards = []
        self.values = []
        self.dones = []

        return states, policies, actions, rewards, values, next_values, dones

    def get_action_size(self):
        return self.action_size

    def set_agent(self, agent):
        self.agent = agent

    def set_render(self, render):
        self.render = render

    def get_max_and_current_episode_reward(self):
        return self.maxEpisodeReward, self.episodeReward
示例#10
0
if __name__ == "__main__":
    tf.reset_default_graph()
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

    env = gym_super_mario_bros.make('SuperMarioBrosRandomStages-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    action_size = env.action_space.n

    # envs[0].set_render(True)

    train_model = A2CAgent("train_model", False, sess, input_shape, action_size,
                           lr, GAMMA, LAMBDA, max_grad_norm, ent_coef, vf_coef, clip_range, True)
    while True:
        state_generator = StateGenerator(frame_size, stack_size)
        state = state_generator.get_stacked_frames(env.reset(), True)

        episodes_reward = 0
        while True:
            

            policy, value = train_model.get_actions_and_values(np.array([state]))
            action = np.random.choice(np.arange(action_size), p=np.squeeze(policy))
            
            for i in range(0, skip_frames):
                env.render()
                raw_state, frame_reward, done, info = env.step(action)
                if frame_reward == -15 or done:
                    raw_state = env.reset()
                    break