policy_freq = 2 # Number of iterations to wait before the policy network (Actor model) is updated episode_reward = 0 maxepisode_timesteps = 500 torch.manual_seed(seed) np.random.seed(seed) state_dim = 5 action_dim = 1 max_action = 5 min_action = -5 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function #brain = Dqn(5,3,0.9) action2rotation = [0, 5, -5] #spacenetwork = ObsSpaceNetwork() policy = TD3(state_dim, action_dim, max_action) replay_buffer = ReplayBuffer() last_reward = 0 scores = [] im = CoreImage("./images/MASK1.png") total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 episode_timesteps = 0 done = True t0 = time.time() # textureMask = CoreImage(source="./kivytest/simplemask1.png") # Initializing the map first_update = True
# Initializing the last distance last_distance = 0 orientation = 0 obs_img = np.zeros((50, 50)) obs_dis = last_distance obs_ori = orientation new_obs_img = np.zeros((50, 50)) new_obs_dis = last_distance new_obs_ori = orientation ################################################## ''' We create the policy network (the Actor model) ''' policy = TD3(action_dim, max_action) ################################################## ''' We create the Experience Replay memory''' replay_buffer = ReplayBuffer(sample_size=sample_size) ################################################## im = CoreImage("./images/MASK1.png") imgCV2 = cv2.imread('./images/MASK1.png') rows, cols, dims = imgCV2.shape # Initializing the map first_update = True
from ai import TD3 # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') Config.set('graphics', 'resizable', False) Config.set('graphics', 'width', '1429') Config.set('graphics', 'height', '660') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = TD3((1, 40, 40), 1, 5) # action2rotation = [0,5,-5] last_reward = 0 scores = [] im = CoreImage("./images/MASK1.png") # textureMask = CoreImage(source="./kivytest/simplemask1.png") # Initializing the map first_update = True def init(): global sand global goal_x global goal_y
def update(self, dt): global longueur global largeur longueur = self.width largeur = self.height if first_update: init() def evaluate_policy(policy, eval_episodes=10): avg_reward = 0. for _ in range(eval_episodes): obs = reset(self) done = False while not done: action = policy.select_action(obs) obs, reward, done, _ = Car.move(action) avg_reward += reward avg_reward /= eval_episodes print("---------------------------------------") print("Average Reward over the Evaluation Step: %f" % (avg_reward)) print("---------------------------------------") return avg_reward file_name = "%s_%s_%s" % ("TD3", env_name, str(seed)) print("---------------------------------------") print("Settings: %s" % (file_name)) print("-------------------------------------- -") if not os.path.exists("./results"): os.makedirs("./results") if save_models and not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") torch.manual_seed(seed) np.random.seed(seed) state_dim = [32, 32, 1] action_dim = 1 max_action = 5 policy = TD3(state_dim, action_dim, max_action) replay_buffer = ReplayBuffer() evaluations = [evaluate_policy(policy)] def mkdir(base, name): path = os.path.join(base, name) if not os.path.exists(path): os.makedirs(path) return path work_dir = mkdir('exp', 'brs') monitor_dir = mkdir(work_dir, 'monitor') max_episode_steps = 400 total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 done = True t0 = time.time() # We start the main loop over 40,000 timesteps while total_timesteps < max_timesteps: # If the episode is done if done: # If we are not at the very beginning, we start the training process of the model if (total_timesteps != 0 and total_timesteps > (batch_size)): print("Total Timesteps: {} Episode Num: {} Reward: {}". format(total_timesteps, episode_num, episode_reward)) policy.train(replay_buffer, episode_timesteps, batch_size, discount, tau, policy_noise, noise_clip, policy_freq) # We evaluate the episode and we save the policy if timesteps_since_eval >= eval_freq: timesteps_since_eval %= eval_freq evaluations.append(evaluate_policy(policy)) policy.save(file_name, directory="./pytorch_models") np.save("./results/%s" % (file_name), evaluations) # When the training step is done, we reset the state of the environment obs = reset() # Set the Done to False done = False # Set rewards and episode timesteps to zero episode_reward = 0 episode_timesteps = 0 episode_num += 1 # Before 10000 timesteps, we play random actions if total_timesteps < start_timesteps: action = np.random.normal(0, 1, size=1).clip(-1, 1).astype(np.float32) else: # After 10000 timesteps, we switch to the model action = policy.select_action(obs) # If the explore_noise parameter is not 0, we add noise to the action and we clip it if expl_noise != 0: action = (action + np.random.normal(0, expl_noise, size=1)).clip( -1, 1) # The agent performs the action in the environment, then reaches the next state and receives the reward new_obs, reward, done, _ = move(action) # We check if the episode is done # done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done) if episode_timesteps + 1 == max_episode_steps: done = True done = float(done) # We increase the total reward episode_reward += reward # We store the new transition into the Experience Replay memory (ReplayBuffer) replay_buffer.add((obs, new_obs, action, reward, done_bool)) # We update the state, the episode timestep, the total timesteps, and the timesteps since the evaluation of the policy obs = new_obs episode_timesteps += 1 total_timesteps += 1 timesteps_since_eval += 1 t1 = time.time() print("Total time taken: {}".format(t1 - t0)) evaluations.append(evaluate_policy(policy)) if save_models: policy.save("%s" % (file_name), directory="./pytorch_models") np.save("./results/%s" % (file_name), evaluations) CarApp().stop()
# Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse, multitouch_on_demand') Config.set('graphics', 'resizable', False) Config.set('graphics', 'width', '1429') Config.set('graphics', 'height', '660') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 counter = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function policy = TD3(state_dim=22, action_dim=2, max_action=np.asarray([10., 2.])) replay_buffer = ReplayBuffer() last_reward = 0 scores = [] im = CoreImage("./images/MASK1.png") max_velocity = 6. min_velocity = 0 max_angle = +5. # min_angle = -5 max_stuck = 100 stuck_count = 0 # Initializing the map first_update = True
state_dim = 5 # position, velocity# ,orientation, action_dim = 1 #moving max_action = 5 min_action = -5 total_timesteps = 0 timesteps_since_eval = 0 episode_num = 0 episode_timesteps = 0 done = True t0 = time.time() # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function #brain = Dqn(5,3,0.9) # CHANGE #brain = TD3(6,3,5) # states, action, max_Action action2rotation = [0, 5,-5] #action2rotation = [0,5,-5] #angle of rotation brain = TD3(state_dim, action_dim, max_action) replay_buffer = ReplayBuffer() last_reward = 0 scores = [] im = CoreImage("./images/MASK1.png") # Initializing the map , keep i as 0 first_update = True i = 0 def init(): global sand global goal_x global goal_y global first_update global img
from ai import TD3 # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') Config.set('graphics', 'resizable', False) Config.set('graphics', 'width', '1429') Config.set('graphics', 'height', '660') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = TD3((1, 40, 40), 1, 10) # action2rotation = [0,5,-5] last_reward = 0 reward = 0 # scores = [] im = CoreImage("./images/MASK1.png") # textureMask = CoreImage(source="./kivytest/simplemask1.png") # Initializing the map first_update = True def init(): global sand global img
import cv2 # Adding this line if we don't want the right click to put a red point Config.set('input', 'mouse', 'mouse,multitouch_on_demand') Config.set('graphics', 'resizable', False) Config.set('graphics', 'width', '1429') Config.set('graphics', 'height', '660') # Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map last_x = 0 last_y = 0 n_points = 0 length = 0 # Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function brain = TD3(3, 1, 5) last_reward = 0 scores = [] crop_size = 80 border_size = 5 # Initializing the map first_update = True def init(): global sand global img global goal_x global goal_y