class Trainer:
    def __init__(self, game_dir):
        self.agent = CustomAgent()
        self.env = Environment(game_dir)

    def train(self):
        self.start_time = time()

        for epoch_no in range(1, self.agent.nb_epochs + 1):
            for game_no in tqdm(range(len(self.env.games))):
                obs, infos = self.env.reset()
                self.agent.train()

                scores = [0] * len(obs)
                dones = [False] * len(obs)
                steps = [0] * len(obs)
                while not all(dones):
                    # Increase step counts.
                    steps = [
                        step + int(not done)
                        for step, done in zip(steps, dones)
                    ]
                    commands = self.agent.act(obs, scores, dones, infos)
                    obs, scores, dones, infos = self.env.step(commands)

                # Let the agent know the game is done.
                self.agent.act(obs, scores, dones, infos)
                score = sum(scores) / self.agent.batch_size

                score, possible_points, percentage = get_points(
                    score, infos['extra.walkthrough'][0])
                print('Score: {}/{}'.format(score, possible_points))
示例#2
0
def train(game_files):

    agent = CustomAgent()
    requested_infos = agent.select_additional_infos()
    _validate_requested_infos(requested_infos)

    env_id = textworld.gym.register_games(game_files, requested_infos,
                                          max_episode_steps=agent.max_nb_steps_per_episode,
                                          name="training")
    env_id = textworld.gym.make_batch(env_id, batch_size=agent.batch_size, parallel=True)
    env = gym.make(env_id)

    for epoch_no in range(1, agent.nb_epochs + 1):
        stats = {
            "scores": [],
            "steps": [],
        }
        for game_no in tqdm(range(len(game_files))):
            obs, infos = env.reset()
            agent.train()

            scores = [0] * len(obs)
            dones = [False] * len(obs)
            steps = [0] * len(obs)
            while not all(dones):
                # Increase step counts.
                steps = [step + int(not done) for step, done in zip(steps, dones)]
                commands = agent.act(obs, scores, dones, infos)
                obs, scores, dones, infos = env.step(commands)

            # Let the agent knows the game is done.
            agent.act(obs, scores, dones, infos)

            stats["scores"].extend(scores)
            stats["steps"].extend(steps)

        score = sum(stats["scores"]) / agent.batch_size
        steps = sum(stats["steps"]) / agent.batch_size
        print("Epoch: {:3d} | {:2.1f} pts | {:4.1f} steps".format(epoch_no, score, steps))
    agent.finish(final=True)
示例#3
0
 def __init__(self, game_dir):
     self.agent = CustomAgent()
     self.env = Environment(game_dir)
 def __init__(self, game_dir):
     self.agent = CustomAgent(verbose=True)
     self.env = Environment(game_dir)
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=100, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.1)
# agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
#                   memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
#                   random_process=random_process, gamma=.99, target_model_update=1e-3, processor=Processor())
agent = CustomAgent(nb_actions=nb_actions,
                    actor=actor,
                    critic=critic,
                    critic_action_input=action_input,
                    memory=memory,
                    nb_steps_warmup_critic=100,
                    nb_steps_warmup_actor=100,
                    random_process=random_process,
                    gamma=.99,
                    target_model_update=1e-3,
                    processor=Processor())
agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])
# agent.load_weights('ddpg_init_experiences_Ship_Env_()_()_weights.h5f')

steps = 20000
for i in range(10):
    # agent.fit(env, nb_steps=steps, visualize=False, verbose=1, log_interval=5)
    agent.fit(env,
              nb_steps=steps,
              visualize=False,
              verbose=1,
示例#6
0
import gym
from pathlib import Path
from pprint import pprint as print
import textworld

from custom_agent import CustomAgent
from train import _validate_requested_infos

# games
#train_dir = Path('/home/nik-96/Documents/datasets/tw_train_data/')
#games = [str(file.relative_to(".")) for file in train_dir.iterdir() if file.suffix == '.ulx'][0]

agent = CustomAgent()
requested_infos = agent.select_additional_infos()
_validate_requested_infos(requested_infos)

env_id = textworld.gym.register_games([
    '/home/nik-96/Documents/datasets/tw_train_data/train/tw-cooking-recipe2+cut+drop+go12-XQ7oC7pxS1OPsy6D.ulx'
],
                                      requested_infos,
                                      max_episode_steps=agent.
                                      max_nb_steps_per_episode,
                                      name="training")
env_id = textworld.gym.make_batch(env_id,
                                  batch_size=agent.batch_size,
                                  parallel=True)
env = gym.make(env_id)

state, infos = env.reset()
print(infos)