def fn():
     import pickle
     fn = 'env_data.npy'
     l = []
     with open(fn, 'rb') as f:
         l = pickle.load(f)
     env = PalleteWorld(env_id=env_id, datasets=l)
     env.seed(seed + rank)
     return env
示例#2
0
    def __init__(self, seed=None, id=0):
        self.id = id

        # import pickle
        # l = []
        # with open('env_data.npy', 'rb') as f:
        #     l = pickle.load(f)
        # self.env = PalleteWorld(datasets=l)

        self.env = PalleteWorld(n_random_fixed=1, env_id=id)
        self.episode_step = 0

        if seed is not None:
            self.env.seed(seed)
        print('env {} is created with seed {}'.format(self.id, seed))
 def __init__(self, num_env):
     self.envs = []
     for _ in range(num_env):
         self.envs.append(PalleteWorld(n_random_fixed=10))
示例#4
0
from environment import PalleteWorld
import pickle
from sample_agent.reinforce_agg import *

fn = 'env_data.npy'
# env = PalleteWorld(env_id=0, n_random_fixed=5)
# env.seed(1)
# env.save_dataset(fn)
#
fn = 'env_data.npy'
l = []
with open(fn, 'rb') as f:
    l = pickle.load(f)

for i, items in enumerate(l):
    env = PalleteWorld(datasets=[items], env_id=i)
    main(env)
示例#5
0
 def fn():
     env = PalleteWorld(env_id=env_id, n_random_fixed=10)
     env.seed(seed + rank)
     return env
            if env_cfg.ENV.RENDER:
                env.render()
                clock.tick(env_cfg.ENV.TICK_INTERVAL)

            policy.rewards.append(reward)
            ep_reward += reward
            if done:
                # print('episode end')
                # print('len of action : {}, episode actions : {}'.format(len(env.previous_actions), env.previous_actions))
                ep_rewards.append(ep_reward)
                n_placed_items = len(info['placed_items'])
                break

        finish_episode()
        if i_episode % 10 == 0:
            print('Episode {}\tLast reward: {:.2f}'.format(i_episode, ep_reward))
        writer.add_scalar('data/final_reward', ep_reward, i_episode)
        writer.add_scalar('data/n_placed_items', n_placed_items, i_episode)

        def checkEqual(iterator):
            return len(set(iterator)) <= 1

        if checkEqual(ep_rewards[-20:]) and len(ep_rewards) > 100:
            print("Solved! last reward is {}".format(ep_rewards[-1]))
            return ep_rewards[-1]

if __name__ == '__main__':
    env = PalleteWorld(n_random_fixed=1)
    main()
示例#7
0
class Game:
    """
    Game wrapper.
    """
    def __init__(self, seed=None, id=0):
        self.id = id

        # import pickle
        # l = []
        # with open('env_data.npy', 'rb') as f:
        #     l = pickle.load(f)
        # self.env = PalleteWorld(datasets=l)

        self.env = PalleteWorld(n_random_fixed=1, env_id=id)
        self.episode_step = 0

        if seed is not None:
            self.env.seed(seed)
        print('env {} is created with seed {}'.format(self.id, seed))

    def step(self, action):
        """
        Apply action to the game.
        
        Args:
            action : action of the action_space to take.
        Returns:
            The new observation, the reward and a boolean if the game has ended.
        """
        self.episode_step = self.episode_step + 1
        a = env_cfg.Action(bin_index=action, priority=1, rotate=1)
        o, reward, done, _ = self.env.step(a)

        # this is for visual state representation.
        import torch
        boxes = torch.zeros(10, 10,
                            env_cfg.ENV.BIN_MAX_COUNT)  # x, y, box count
        for i, box in enumerate(o[0]):  # box = x,y
            boxes[0:box[1], 0:box[0], i] = 1
        o = np.concatenate((o[1], boxes), axis=-1)

        print('A agent is taking action {} in {} step in game {}.'.format(
            action, self.episode_step, self.id))

        # this is for visual state representation.
        # import torch
        # embedding = torch.nn.Embedding(num_embeddings=env_cfg.ENV.BIN_MAX_X_SIZE*env_cfg.ENV.BIN_MAX_X_SIZE+env_cfg.ENV.BIN_MAX_Y_SIZE+1, embedding_dim=1)
        # i = [b[0] * env_cfg.ENV.BIN_MAX_X_SIZE + b[1] for b in o[0]]
        # boxlist = embedding(torch.Tensor(i).long()).squeeze(1).detach().numpy()
        # o = np.concatenate((o[1][:,:,0].flatten(), boxlist))

        return o, reward, done

    def to_play(self):
        """
        Return the current player.

        Returns:
            The current player, it should be an element of the players list in the config. 
        """
        return 0

    def reset(self):
        """
        Reset the game for a new game.
        Returns:
            Initial observation of the game.
        """
        o = self.env.reset()
        print('game {} is reset.'.format(self.id))
        self.episode_step = 0

        # import torch
        # embedding = torch.nn.Embedding(num_embeddings=env_cfg.ENV.BIN_MAX_X_SIZE*env_cfg.ENV.BIN_MAX_X_SIZE+env_cfg.ENV.BIN_MAX_Y_SIZE+1, embedding_dim=1)
        # i = [b[0] * env_cfg.ENV.BIN_MAX_X_SIZE + b[1] for b in o[0]]
        # boxlist = embedding(torch.Tensor(i).long()).squeeze(1).detach().numpy()
        # o = np.concatenate((o[1][:,:,0].flatten(), boxlist))

        # this is for visual state representation.
        import torch
        boxes = torch.zeros(10, 10,
                            env_cfg.ENV.BIN_MAX_COUNT)  # x, y, box count
        for i, box in enumerate(o[0]):  # box = x,y
            boxes[0:box[1], 0:box[0], i] = 1
        o = np.concatenate((o[1], boxes), axis=-1)

        return o

    def close(self):
        """
        Properly close the game.
        """
        self.env.close()

    def render(self):
        """
        Display the game observation.
        """
        self.env.render()
        input("Press enter to take a step ")
示例#8
0
from env_config import *
import logging
import pygame

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from tensorboardX import SummaryWriter

LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper()
logging.basicConfig(level=LOGLEVEL)

writer = SummaryWriter()

if __name__ == '__main__':

    env = PalleteWorld(n_random_fixed=1)
    agent = BDQNAgent()

    if ENV.RENDER:
        clock = pygame.time.Clock()

    total_step = 0

    for e in range(ENV.N_EPISODES):
        state = env.reset()
        d = False
        step = 0

        while True:
            while True:
                action = agent.get_action(state)