parser.add_argument('--num_steps', type=int, default=1000001, metavar='N') parser.add_argument('--hidden_size', type=int, default=512, metavar='N') parser.add_argument('--updates_per_step', type=int, default=1, metavar='N') parser.add_argument('--start_steps', type=int, default=10000, metavar='N') parser.add_argument('--target_update_interval', type=int, default=1, metavar='N') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N') args = parser.parse_args() args.cuda =True if torch.cuda.is_available() else False env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) agent.load_model('full_player_models/sac_actor_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000','full_player_models/sac_critic_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000') # opponent = copy.deepcopy(agent) basic_strong = h_env.BasicOpponent(weak=False) time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') #Tesnorboard writer = SummaryWriter(f"strongplay-runs/ERE{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}") # Memory memory = ERE_PrioritizedReplay(args.replay_size) # memory = ReplayMemory(args.replay_size,args.seed) # Training Loop total_numsteps = 0 updates = 0
player2 = 3 basic1 = False basic2 = False # print(f"{runs[player1]} vs {runs[player2]}") print(f"{runs[player1]} vs {runs[player2]}") models1 = sorted(os.listdir(root + runs[player1])) actor = root + runs[player1] + models1[0] critic = root + runs[player1] + models1[1] target = root + runs[player1] + models1[2] if len(models1) == 3 else None models2 = sorted(os.listdir(root + runs[player2])) o_actor = root + runs[player2] + models2[0] o_critic = root + runs[player2] + models2[1] o_target = root + runs[player2] + models2[2] if len(models2) == 3 else None agent.load_model(actor, critic, target) opponent.load_model(o_actor, o_critic, o_target) basic = h_env.BasicOpponent(weak=False) time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') #Tesnorboard # writer = SummaryWriter(f"hockey-runs-defence/{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}") # Memory # memory = PrioritizedReplay(args.replay_size) memory = ReplayMemory(args.replay_size, args.seed) # Training Loop total_numsteps = 0 updates = 0
parser.add_argument('--start_steps', type=int, default=10000, metavar='N') parser.add_argument('--target_update_interval', type=int, default=1, metavar='N') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N') args = parser.parse_args() args.cuda =True if torch.cuda.is_available() else False env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) actor = "full_player_models/sac_actor_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000" critic = "full_player_models/sac_critic_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000" agent.load_model(actor,critic) # opponent = copy.deepcopy(agent) opponent = h_env.BasicOpponent(weak=False) time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') #Tesnorboard # Memory memory = PrioritizedReplay(args.replay_size) # memory = ReplayMemory(args.replay_size,args.seed) # Training Loop total_numsteps = 0 updates = 0 o = env.reset() # _ = env.render()
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N') args = parser.parse_args() args.cuda = True if torch.cuda.is_available() else False env = h_env.HockeyEnv(mode=h_env.HockeyEnv.TRAIN_DEFENSE) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) # actor512 = 'hockey-hidden-models-attack/sac_actor_hockey_reward-8.385833864540086_episode-41000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-512_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-40-41' # critic512 = 'hockey-hidden-models-attack/sac_critic_hockey_reward-8.385833864540086_episode-41000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-512_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-40-41' # actor128 = 'hockey-hidden-models-attack/sac_actor_hockey_reward-8.184820100545167_episode-39000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-128_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-16' # critic128 = 'hockey-hidden-models-attack/sac_critic_hockey_reward-8.184820100545167_episode-39000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-128_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-16' actor64 = 'hockey-hidden-models-attack/sac_actor_hockey_reward-8.407677291229737_episode-33000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-64_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-10' critic64 = 'hockey-hidden-models-attack/sac_critic_hockey_reward-8.407677291229737_episode-33000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-64_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-10' agent.load_model(actor64, critic64) time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') #Tesnorboard writer = SummaryWriter( f"hockey-hidden-runs-defence/{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}" ) # Memory memory = PrioritizedReplay(args.replay_size) # memory = ReplayMemory(args.replay_size,args.seed) # Training Loop total_numsteps = 0 updates = 0 o = env.reset()