# env = UnityEnvironment(file_name="Reacher_Linux/Reacher.x86_64", no_graphics=args.no_graphics) env = UnityEnvironment(file_name="Reacher.app", no_graphics=args.no_graphics) brain_name = env.brain_names[0] brain = env.brains[brain_name] env_info = env.reset(train_mode=False)[brain_name] action_size = brain.vector_action_space_size state = env_info.vector_observations[0] reward = env_info.rewards[0] action = 0 state_size = len(state) # b_agent = Agent(args.model_name, state_size, action_size) try: b_agent.load() # try to load to continue training except: pass for epx in range(1, args.episodes + 1): at_step = 0 env_info = env.reset(train_mode=False)[brain_name] b_agent.reset_episode() while True: action = b_agent.act(state) env_info = env.step(action)[brain_name] at_step += 1 next_state = env_info.vector_observations[0] reward = env_info.rewards[0] done = env_info.local_done[0] if at_step % 100 == 0:
args = parser.parse_args() is_training = args.mode == 'training' env = RLBenchEnv("ReachTarget", state_type_list=STATE_TYPE_LIST) state = env.reset() action_dim = env.action_space.shape[0] state_space = env.observation_space agent = Agent(state_space, HIDDEN_SIZE, action_dim, 1, seed=SEED, buffer_size=MEMORY_BUFFER_SIZE, actor_lr=ACTOR_LR, actor_hidden_sizes=ACTOR_HIDDEN_UNITS, actor_weight_decay=ACTOR_WEIGHT_DECAY, critic_lr=CRITIC_LR, critic_hidden_sizes=CRITIC_HIDDEN_UNITS, critic_weight_decay=CRITIC_WEIGHT_DECAY, batch_size=BATCH_SIZE, gamma=GAMMA, tau=TAU ) print(agent) agent.load() scores, actor_losses, critic_losses = run_ddpg(n_episodes=N_EPISODES, is_training=is_training, eps_start=EPS_START if is_training else EPS_END, eps_decay=EPS_DECAY, eps_end=EPS_END, max_t=MAX_STEPS, learn_every_step=LEARN_EVERY_STEP) if is_training: agent.save() fig = plt.figure() ax1 = fig.add_subplot(311) ax1.plot(np.arange(1, len(scores) + 1), scores) ax1.set_ylabel('Score') ax1.set_xlabel('Episode #')