def _is_done(self): coverage = float(self.graphics.draw_p) / float(self.graphics.target_p) if config['edge_detection'] == False and coverage > 0.99: end_reward_coeff = 1 if self.graphics.draw_wrong_p < ( self.graphics.target_p / 10) else 0 #end_reward_coeff2 = (self.graphics.draw_p) / (self.graphics.draw_wrong_p*1.5 + self.graphics.draw_p) if config['draw_random_start']: self.graphics.pixels[:, :, config['helper_channel']] = EMPTY self.fill_the_point(self.start_candidates) #print("candidate", self.start_candidates) #coeff = 1+6*self.remaining_steps/self.maximum_steps #print("COEEFFF", coeff) #print("total_repeat", self.total_repeat) return True, max( ((config['end_state_reward'] - self.total_repeat * 200) / self.total_repeat), 200) * end_reward_coeff if self.before_pen == True and self.graphics.pixels[ min(self.tx, self.width - 1), min(self.ty, self.height - 1), config['target_channel']] == FULL: self.fx = self.origin[0] - 10 self.sx = self.origin[0] + 30 self.fy = self.origin[1] - int(config['target_line_width'] / 2) self.sy = self.origin[1] + 40 self.pixels = self.graphics.pixels[self.fx:sx, fy:sy, :] temp = self.graphics.pixels[:, :, 1] + self.graphics.pixels[:, :, 3] data = np.uint8( np.stack((self.graphics.pixels[:, :, 0], temp, self.graphics.pixels[:, :, 2]), axis=2)) shape = data[fx:sx, fy:sy, :] header = data[0:40, 0:18, :] cut = np.hstack((header, shape)) scipy.misc.imsave('1.jpg', np.flip(cut, axis=0)) return True, config['end_state_reward'] if config["draw"] and (config["edge_detection"] or config['draw_random_start']): #right_reward = self.calc_prospective_reward(self.angle+config['rotate_degree']*DEGREE) #left_reward = self.calc_prospective_reward(self.angle-config['rotate_degree']*DEGREE) #print(right_reward, left_reward) #if right_reward>0 or left_reward>0: # return True, config['end_state_reward'] if len(self.recent_actions) > 0 and self.recent_rewards[-1] > ( -config['time_punish']) and self.recent_actions[-1] == 0: right_reward = self.calc_prospective_reward( self.angle + config['rotate_degree'] * DEGREE) left_reward = self.calc_prospective_reward( self.angle - config['rotate_degree'] * DEGREE) forward_reward = self.calc_prospective_reward(self.angle) if right_reward > 0 or left_reward > 0: self.start_candidates.append((self.tx, self.ty)) if right_reward <= 0 and left_reward <= 0 and forward_reward <= 0: self.start_candidates.append((self.tx, self.ty)) if self.remaining_steps <= 0: return True, 0 return False, 0
def forward(self, X, mode): batch_size = X.shape[0] seq_len = X.shape[1] X_emb = self.params['W_Emb'][X] hm1 = np.zeros((batch_size, self.HID_DIM)) hs = [] for t in xrange(seq_len): hm1 = self.one_step(X_emb[:,t,:], hm1) hs.append(hm1) hs = np.stack(hs, axis=1).reshape((batch_size*seq_len, self.HID_DIM)) pred_out = layers.affine(hs, self.params['W_Softmax'], self.params['b_Softmax']) return pred_out.reshape((batch_size, seq_len, self.WORD_DIM))
def forward(self, data, mode='training'): embs = self._embedding(data) N, length, D = embs.shape hm1 = None hs = [] for i in xrange(length): hm1 = self._rnn(embs[:, i, :], hm1) hs.append(hm1) hs = np.stack(hs, axis=1) return self._linear(hs.reshape((N * length, D))).reshape( (N, length, D))
def render(self, mode='human', close=False): if (self.display == None): pygame.init() display = pygame.display.set_mode((self.width, self.height)) temp = self.graphics.pixels[:, :, 1] + self.graphics.pixels[:, :, 3] data = np.uint8( np.stack( (self.graphics.pixels[:, :, 0], temp, self.graphics.pixels[:, :, 2]), axis=2)) surf = pygame.surfarray.make_surface(data) display.blit(surf, (0, 0)) pygame.display.update() return
def test_stack(): arr = [rnd.randn(3, 4) for _ in range(10)] res = np.stack(arr) assert res.shape == (10, 3, 4)