def forward(self, x, info=None): if (not isinstance(x, torch.FloatTensor)): x = process_x(x) x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) # self.info_add x = F.relu(self.fc(x.reshape(x.size(0), -1))) x = self.head(x) # action_prob = torch.softmax(x, -1) return x
def choose_action(self, x): self.eval_net.eval() N_ACTIONS = N_A x = process_x(x) # input only one sample if np.random.uniform() < EPSILON: # greedy actions_value = self.eval_net.forward(x) action = torch.max(actions_value, 1)[1].data.numpy() action = action[0] else: # random action = np.random.randint(0, N_ACTIONS) # action = action if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE) return action
def choose_action(self, x): self.eval_net.eval() N_ACTIONS = N_A x = process_x(x) x.to(device) # input only one sample if np.random.uniform() < EPSILON: # greedy actions_value = self.eval_net.forward(x) if(device == 'cuda'): action = torch.max(actions_value, 1)[1].cpu().data.numpy() if(device == 'cpu'): action = torch.max(actions_value, 1)[1].data.numpy() action = action[0] else: # random action = np.random.choice(arg.action_space) # action = action if ENV_A_SHAPE == 0 else action.reshape(ENV_A_SHAPE) return action
def forward(self, x, info=None): if(not isinstance(x, torch.FloatTensor)): x = process_x(x) x = x.to(device) # x = x.permute(1,0,2,3) x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) x = x.reshape(x.size(0), -1) if(info != None): # self.info_add if(isinstance(info, tuple)): info = torch.cat(info) x = torch.cat((x, info)) x = F.relu(self.fc(x)) x = self.head(x) # action_prob = torch.softmax(x, -1) return x
# if (arg.show_pre_image and cv_img(s_[-1])): break_flag = 1; break # tt.sleep(0.5) break s = s_ cv2.destroyAllWindows() f = round(tt.now() / steps, 3) step_per_second = round(1 / f, 3) print('##############################################################') print(i_episode, ' episodes--steps: ', steps, 'cost_time: ', tt.now(), 'frequence: ', f, '-- step_per_second: ', step_per_second) 1 s_ s_ = process_x(s_) # model.eval_net.train() model.eval_net(s_) model.target_net(s_) # torch.manual_seed(1) # model.target_net.load_state_dict(model.eval_net.state_dict()) # --------------- plot import matplotlib.pyplot as plt PLOT = True Plot_mean_reward = 0 Rewards = model.Rewards # def plot_mean_loss(ys): # cut_len = 10 # # y = np.zeros(len(ys))