score_book = {} value_loss_book = {} actor_loss_book = {} critic_1_loss_book = {} critic_2_loss_book = {} critic_loss_book = {} for trial_num in range(n_trials): print('\nTrial num:', trial_num + 1) agent = Agent(input_dims=env.observation_space.shape, layer1_size=200, layer2_size=200, env=env, n_actions=env.action_space.shape[0], alpha=alpha, beta=beta, gamma=gamma, max_size=max_size, tau=tau, ent_alpha=ent_alpha, batch_size=batch_size, reward_scale=reward_scale, chkpt_dir=chkpt_dir) score_history = [] loss = [] value_loss = [] actor_loss = [] critic_1_loss = [] critic_2_loss = [] critic_loss = [] if load_checkpoint:
perturbations = [0.3, 0.5, 0.7, 0.9] for perturbation in perturbations: reward_history = [] score_book = {} value_loss_book = {} actor_loss_book = {} critic_1_loss_book = {} critic_2_loss_book = {} critic_loss_book = {} print("Perturbation amount: {}".format(perturbation)) for trial_num in range(n_trials): print('\nTrial num:', trial_num+1) agent = Agent(input_dims=env.observation_space.shape, layer1_size=256, layer2_size=256, env=env, n_actions=env.action_space.n, alpha=alpha, beta=beta, gamma=gamma, max_size=max_size, tau=tau, ent_alpha=ent_alpha, batch_size=batch_size, reward_scale = reward_scale, chkpt_dir=chkpt_dir) score_history = [] loss = [] value_loss = [] actor_loss = [] critic_1_loss = [] critic_2_loss = [] critic_loss = [] if load_checkpoint: agent.load_models() for i in tqdm(range(n_games)): if render:
if __name__ == '__main__': #=========================Enable GPU usage=========================== physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) #========Operational Instructions========= render = True debugging = False training = True load_checkpoint = True #============================Constants================ n_episodes = 7000 env = CarRacing(obstacles=False) agent = Agent(env) total_steps = 0 if load_checkpoint: total_rewards = list(agent.load_rewards('total')) avg_rewards = list(agent.load_rewards('avg')) episode_offset = len(total_rewards) agent.load_models() agent.load_buffer() else: total_rewards = [] avg_rewards = [] #================Main loop=============================== for episode in range(n_episodes): if load_checkpoint: episode += episode_offset env.reset()