class Trainer: def __init__(self, game_dir): self.agent = CustomAgent() self.env = Environment(game_dir) def train(self): self.start_time = time() for epoch_no in range(1, self.agent.nb_epochs + 1): for game_no in tqdm(range(len(self.env.games))): obs, infos = self.env.reset() self.agent.train() scores = [0] * len(obs) dones = [False] * len(obs) steps = [0] * len(obs) while not all(dones): # Increase step counts. steps = [ step + int(not done) for step, done in zip(steps, dones) ] commands = self.agent.act(obs, scores, dones, infos) obs, scores, dones, infos = self.env.step(commands) # Let the agent know the game is done. self.agent.act(obs, scores, dones, infos) score = sum(scores) / self.agent.batch_size score, possible_points, percentage = get_points( score, infos['extra.walkthrough'][0]) print('Score: {}/{}'.format(score, possible_points))
def train(game_files): agent = CustomAgent() requested_infos = agent.select_additional_infos() _validate_requested_infos(requested_infos) env_id = textworld.gym.register_games(game_files, requested_infos, max_episode_steps=agent.max_nb_steps_per_episode, name="training") env_id = textworld.gym.make_batch(env_id, batch_size=agent.batch_size, parallel=True) env = gym.make(env_id) for epoch_no in range(1, agent.nb_epochs + 1): stats = { "scores": [], "steps": [], } for game_no in tqdm(range(len(game_files))): obs, infos = env.reset() agent.train() scores = [0] * len(obs) dones = [False] * len(obs) steps = [0] * len(obs) while not all(dones): # Increase step counts. steps = [step + int(not done) for step, done in zip(steps, dones)] commands = agent.act(obs, scores, dones, infos) obs, scores, dones, infos = env.step(commands) # Let the agent knows the game is done. agent.act(obs, scores, dones, infos) stats["scores"].extend(scores) stats["steps"].extend(steps) score = sum(stats["scores"]) / agent.batch_size steps = sum(stats["steps"]) / agent.batch_size print("Epoch: {:3d} | {:2.1f} pts | {:4.1f} steps".format(epoch_no, score, steps)) agent.finish(final=True)
def __init__(self, game_dir): self.agent = CustomAgent() self.env = Environment(game_dir)
def __init__(self, game_dir): self.agent = CustomAgent(verbose=True) self.env = Environment(game_dir)
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) # agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, # memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, # random_process=random_process, gamma=.99, target_model_update=1e-3, processor=Processor()) agent = CustomAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=Processor()) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) # agent.load_weights('ddpg_init_experiences_Ship_Env_()_()_weights.h5f') steps = 20000 for i in range(10): # agent.fit(env, nb_steps=steps, visualize=False, verbose=1, log_interval=5) agent.fit(env, nb_steps=steps, visualize=False, verbose=1,
import gym from pathlib import Path from pprint import pprint as print import textworld from custom_agent import CustomAgent from train import _validate_requested_infos # games #train_dir = Path('/home/nik-96/Documents/datasets/tw_train_data/') #games = [str(file.relative_to(".")) for file in train_dir.iterdir() if file.suffix == '.ulx'][0] agent = CustomAgent() requested_infos = agent.select_additional_infos() _validate_requested_infos(requested_infos) env_id = textworld.gym.register_games([ '/home/nik-96/Documents/datasets/tw_train_data/train/tw-cooking-recipe2+cut+drop+go12-XQ7oC7pxS1OPsy6D.ulx' ], requested_infos, max_episode_steps=agent. max_nb_steps_per_episode, name="training") env_id = textworld.gym.make_batch(env_id, batch_size=agent.batch_size, parallel=True) env = gym.make(env_id) state, infos = env.reset() print(infos)