def fn(): import pickle fn = 'env_data.npy' l = [] with open(fn, 'rb') as f: l = pickle.load(f) env = PalleteWorld(env_id=env_id, datasets=l) env.seed(seed + rank) return env
def __init__(self, seed=None, id=0): self.id = id # import pickle # l = [] # with open('env_data.npy', 'rb') as f: # l = pickle.load(f) # self.env = PalleteWorld(datasets=l) self.env = PalleteWorld(n_random_fixed=1, env_id=id) self.episode_step = 0 if seed is not None: self.env.seed(seed) print('env {} is created with seed {}'.format(self.id, seed))
def __init__(self, num_env): self.envs = [] for _ in range(num_env): self.envs.append(PalleteWorld(n_random_fixed=10))
from environment import PalleteWorld import pickle from sample_agent.reinforce_agg import * fn = 'env_data.npy' # env = PalleteWorld(env_id=0, n_random_fixed=5) # env.seed(1) # env.save_dataset(fn) # fn = 'env_data.npy' l = [] with open(fn, 'rb') as f: l = pickle.load(f) for i, items in enumerate(l): env = PalleteWorld(datasets=[items], env_id=i) main(env)
def fn(): env = PalleteWorld(env_id=env_id, n_random_fixed=10) env.seed(seed + rank) return env
if env_cfg.ENV.RENDER: env.render() clock.tick(env_cfg.ENV.TICK_INTERVAL) policy.rewards.append(reward) ep_reward += reward if done: # print('episode end') # print('len of action : {}, episode actions : {}'.format(len(env.previous_actions), env.previous_actions)) ep_rewards.append(ep_reward) n_placed_items = len(info['placed_items']) break finish_episode() if i_episode % 10 == 0: print('Episode {}\tLast reward: {:.2f}'.format(i_episode, ep_reward)) writer.add_scalar('data/final_reward', ep_reward, i_episode) writer.add_scalar('data/n_placed_items', n_placed_items, i_episode) def checkEqual(iterator): return len(set(iterator)) <= 1 if checkEqual(ep_rewards[-20:]) and len(ep_rewards) > 100: print("Solved! last reward is {}".format(ep_rewards[-1])) return ep_rewards[-1] if __name__ == '__main__': env = PalleteWorld(n_random_fixed=1) main()
class Game: """ Game wrapper. """ def __init__(self, seed=None, id=0): self.id = id # import pickle # l = [] # with open('env_data.npy', 'rb') as f: # l = pickle.load(f) # self.env = PalleteWorld(datasets=l) self.env = PalleteWorld(n_random_fixed=1, env_id=id) self.episode_step = 0 if seed is not None: self.env.seed(seed) print('env {} is created with seed {}'.format(self.id, seed)) def step(self, action): """ Apply action to the game. Args: action : action of the action_space to take. Returns: The new observation, the reward and a boolean if the game has ended. """ self.episode_step = self.episode_step + 1 a = env_cfg.Action(bin_index=action, priority=1, rotate=1) o, reward, done, _ = self.env.step(a) # this is for visual state representation. import torch boxes = torch.zeros(10, 10, env_cfg.ENV.BIN_MAX_COUNT) # x, y, box count for i, box in enumerate(o[0]): # box = x,y boxes[0:box[1], 0:box[0], i] = 1 o = np.concatenate((o[1], boxes), axis=-1) print('A agent is taking action {} in {} step in game {}.'.format( action, self.episode_step, self.id)) # this is for visual state representation. # import torch # embedding = torch.nn.Embedding(num_embeddings=env_cfg.ENV.BIN_MAX_X_SIZE*env_cfg.ENV.BIN_MAX_X_SIZE+env_cfg.ENV.BIN_MAX_Y_SIZE+1, embedding_dim=1) # i = [b[0] * env_cfg.ENV.BIN_MAX_X_SIZE + b[1] for b in o[0]] # boxlist = embedding(torch.Tensor(i).long()).squeeze(1).detach().numpy() # o = np.concatenate((o[1][:,:,0].flatten(), boxlist)) return o, reward, done def to_play(self): """ Return the current player. Returns: The current player, it should be an element of the players list in the config. """ return 0 def reset(self): """ Reset the game for a new game. Returns: Initial observation of the game. """ o = self.env.reset() print('game {} is reset.'.format(self.id)) self.episode_step = 0 # import torch # embedding = torch.nn.Embedding(num_embeddings=env_cfg.ENV.BIN_MAX_X_SIZE*env_cfg.ENV.BIN_MAX_X_SIZE+env_cfg.ENV.BIN_MAX_Y_SIZE+1, embedding_dim=1) # i = [b[0] * env_cfg.ENV.BIN_MAX_X_SIZE + b[1] for b in o[0]] # boxlist = embedding(torch.Tensor(i).long()).squeeze(1).detach().numpy() # o = np.concatenate((o[1][:,:,0].flatten(), boxlist)) # this is for visual state representation. import torch boxes = torch.zeros(10, 10, env_cfg.ENV.BIN_MAX_COUNT) # x, y, box count for i, box in enumerate(o[0]): # box = x,y boxes[0:box[1], 0:box[0], i] = 1 o = np.concatenate((o[1], boxes), axis=-1) return o def close(self): """ Properly close the game. """ self.env.close() def render(self): """ Display the game observation. """ self.env.render() input("Press enter to take a step ")
from env_config import * import logging import pygame import os os.environ["CUDA_VISIBLE_DEVICES"] = "1" from tensorboardX import SummaryWriter LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper() logging.basicConfig(level=LOGLEVEL) writer = SummaryWriter() if __name__ == '__main__': env = PalleteWorld(n_random_fixed=1) agent = BDQNAgent() if ENV.RENDER: clock = pygame.time.Clock() total_step = 0 for e in range(ENV.N_EPISODES): state = env.reset() d = False step = 0 while True: while True: action = agent.get_action(state)