Exemplo n.º 1
0
 def step(self, action):
     self.canvas = (decode(action, self.canvas.float() / 255) * 255).byte()
     self.stepnum += 1
     ob = self.observation()
     done = (self.stepnum == self.max_step)
     reward = self.cal_reward()  # np.array([0.] * self.batch_size)
     return ob.detach(), reward, np.array([done] * self.batch_size), None
Exemplo n.º 2
0
 def step(self, action, episode_num):
     if self.use_multiple_renderers:
         self.canvas = (decode_multiple_renderers(
             action,
             self.canvas.float() / 255, episode_num) * 255).byte()
     else:
         self.canvas = (decode(action,
                               self.canvas.float() / 255) * 255).byte()
     self.stepnum += 1
     ob, mask = self.observation()
     done = (self.stepnum == self.max_step)
     reward = self.cal_reward()  # np.array([0.] * self.batch_size)
     return ob.detach(), reward, np.array([done] *
                                          self.batch_size), None, mask
Exemplo n.º 3
0
 def step(self, action):
     #if torch.is_tensor(self.prev_action):
         # strokeの間を繋ぐ
         #new_action = [
         #    self.prev_action[:,4:6],
         #    self.prev_action[:,4:6],
         #    action[:,:2],
         #    self.prev_action[:,7:8],
         #    action[:,6:7],
         #    action[:,8:13]
         #]
         #connect_action = torch.cat(new_action, dim=1)
         #self.canvas = (decode(connect_action, self.canvas.float() / 255) * 255).byte()
     self.canvas = (decode(action, self.canvas.float() / 255) * 255).byte()
     self.stepnum += 1
     self.prev_action = action
     ob = self.observation()
     done = (self.stepnum == self.max_step)
     reward = self.cal_reward() # np.array([0.] * self.batch_size)
     return ob.detach(), reward, np.array([done] * self.batch_size), None