class Environment: def __init__(self, num_items, use_all=False, val=False): self.env = PointingEnv(num_items=num_items, use_all=use_all, val=val) def run(self, agent, inspect=False): s = self.env.reset() R = 0 global failed_cnt global done_cnt global R_total while True: sbar = agent.brain.encode( np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) a = agent.act(sbar) #print a s_, r, done = self.env.step(a) s = s_ R += r if done: if r < 1: failed_cnt += 1 else: done_cnt += 1 R_total += R break
class Environment: def __init__(self, num_items, use_all=False, val=False): self.env = PointingEnv(num_items=num_items, use_all=use_all, val=val) def run(self, agent): s = self.env.reset() R = 0 global r_history imaginary = False #flag to start using imaginary rollouts for training if episodes > I_START and USE_IMAGINARY == True: #print('Imaginary training started...') imaginary = True #start using imaginary rollouts # TODO: decide if imaginary or not while True: z, sigma = agent.brain.encode( np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) a = agent.act(z) # print('a = ', a) if imaginary: for i_b in range(I_B): agent.brain.env_model.init_model(z) zhat = z for i_d in range(I_D): zhat_, rhat, donehat = agent.brain.env_model.step(a) rhat, donehat = round(rhat), round(donehat) #if donehat == 1: # zhat_ = None agent.observe((zhat, a, rhat, zhat_, donehat), imaginary=True) if donehat == 1: break zhat = zhat_ s_, r, done = self.env.step(a) z_, sigma_ = agent.brain.encode( np.reshape(s_, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) #if done: # terminal state # z_ = None agent.observe((z, a, r, z_, done, sigma, sigma_), imaginary=False) if (episodes >= ENV_LEARN_START) and USE_IMAGINARY and ENV_LEARN: for i in range(R_ENV): agent.train_env() for i in range(R_CR): agent.replay(imaginary=False) if imaginary: for i in range(R_CI): agent.replay(imaginary=imaginary) s = s_ R += r r_history[episodes] = R if done: break print("Total reward:", R, ", episode: ", episodes)
class Environment: def __init__(self, num_items): self.env = PointingEnv(num_items) self.env_model = EnvironmentModel() def run(self, agent): s = self.env.reset() sbar = agent.brain.encode( np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) R = 0 imaginary = False if episodes > 0: imaginary = True self.env_model.init_model(sbar) #TODO: decide if imaginary or not while True: a = agent.act(sbar) #print('a = ', a) if imaginary: sbar_, r, done = self.env_model.step(a) r, done = round(r), round(done) else: s_, r, done = self.env.step(a) sbar_ = agent.brain.encode( np.reshape(s_, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) if done: # terminal state sbar_ = None agent.observe((sbar, a, r, sbar_, done), imaginary=imaginary) agent.replay(imaginary=imaginary) if imaginary: sbar = sbar_ else: s = s_ sbar = agent.brain.encode( np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) R += r if done: break print("Total reward:", R, ", episode: ", episodes)
class Environment: def __init__(self, num_items): self.env = PointingEnv(num_items) def run(self, agent, inspect=False): s = self.env.reset() R = 0 global selected global ss global r_history if not selected: ss = agent.brain.encode( np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) selected = True # print (agent.brain.predictOne(ss)) while True: if inspect: self.env.printState() sbar = agent.brain.encode( np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) # print(sbar) a = agent.act(sbar) # print(a) s_, r, done = self.env.step(a) sbar_ = agent.brain.encode( np.reshape(s_, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS))) if done: # terminal state sbar_ = None agent.observe((sbar, a, r, sbar_, done)) agent.replay() s = s_ R += r r_history[episodes] = R if done: break print("Total reward:", R, ", episode: ", episodes)
def __init__(self, num_items, use_all=False, val=False): self.env = PointingEnv(num_items=num_items, use_all=use_all, val=val)
def __init__(self, num_items): self.env = PointingEnv(num_items) self.env_model = EnvironmentModel()
def __init__(self, num_items): self.env = PointingEnv(num_items)