Exemplo n.º 1
0
def main():
    config = configure()
    session = tf.Session()
    task = tasks.load(config)
    channel = channels.load(config)
    model = models.load(config)
    desc_model = models.desc_im.DescriptionImitationModel()
    translator = translators.load(config)

    rollout_ph = experience.RolloutPlaceholders(task, config)
    replay_ph = experience.ReplayPlaceholders(task, config)
    reconst_ph = experience.ReconstructionPlaceholders(task, config)
    channel.build(config)
    model.build(task, rollout_ph, replay_ph, channel, config)
    desc_model.build(task, rollout_ph, replay_ph, channel, config)
    translator.build(task, reconst_ph, channel, model, config)

    if config.task.train:
        trainer.run(task, rollout_ph, replay_ph, reconst_ph, model, desc_model,
                    translator, session, config)
    else:
        trainer.load(session, config)

    if config.task.lexicon:
        lex = lexicographer.run(task, rollout_ph, reconst_ph, model,
                                desc_model, translator, session, config)

    if config.task.visualize:
        visualizer.run(lex, task, config)

    if config.task.calibrate:
        calibrator.run(task, rollout_ph, model, desc_model, lexicographer,
                       session, config)

    if config.task.evaluate:
        evaluator.run(task, rollout_ph, replay_ph, reconst_ph, model,
                      desc_model, lex, session, config)
        sem_evaluator.run(task, rollout_ph, reconst_ph, model, desc_model,
                          translator, lex, session, config)

    if config.task.turkify:
        turkifier.run(task, rollout_ph, model, lex, session, config)
Exemplo n.º 2
0
def train_loop(trainer, lr):
    print(f"Current learning rate: {lr}")
    for epoch in range(0, 80):
        trainer.train(epoch, lr)
        if trainer.test(epoch):
            no_progress = 0
        else:
            no_progress += 1
        if no_progress >= 6:
            break
    lr = lr/10
    for i in range(0, 2):
        print(f"Current learning rate: {lr}")
        start_epoch = trainer.load()
        no_progress = 0
        for epoch in range(start_epoch, start_epoch+80):
            trainer.train(epoch, lr)
            if trainer.test(epoch):
                no_progress = 0
            else:
                no_progress += 1
            if no_progress >= 6:
                break
        lr = lr/10
Exemplo n.º 3
0

if __name__ == '__main__':
    max_episodes = 50000
    max_trajectory = 200

    targets = {('purple', 'square'), ('purple', 'circle'), ('blue', 'square'), ('blue', 'circle'), ('beige', 'square'),
               ('beige', 'circle')}
    task = MaxLength(WarpFrame(
        MultiCollectEnv(lambda collected: targets.issubset({(c.colour, c.shape) for c in collected}),
                        lambda x: (x.colour, x.shape) in targets)), max_trajectory)

    #agent = train('./models/temporal3/results', task) # 1 million
    #save('./models/temporal3/model.dqn', agent)

    dqn = load('./models/temporal3/model.dqn', task)  # dqn trained on full task

    max_episodes = 50000
    max_trajectory = 50

    dqn1 = load('./models/purple/model.dqn', task)
    dqn2 = load('./models/blue/model.dqn', task)
    dqn3 = load('./models/beige/model.dqn', task)
    dqn_composed = ComposedDQN([dqn1, dqn2, dqn3])

    for dqn, name in [(dqn, 'full_task'),  (dqn_composed, 'composed')]:
        env = Monitor(task, './experiment_temporal/' + name + '/', video_callable=False, force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
                print(episode)
            obs = env.reset()
Exemplo n.º 4
0
config = data.parse_config(config_path)

# Manually set seed (for reproducability)
np.random.seed(config.seed)
torch.manual_seed(config.seed)

if train:
    # Get datasets as pandas dataframes
    train_data, valid_data, test_data = data.get_dataset(config)
    # Initialize the model
    model = models.Model(config)
    # Initialie the training class
    trainer = trainer.Trainer(model, config)
    # If continuing, load previous checkpoint
    if cont:
        trainer.load()
    # Train the model
    for epoch in range(config.num_epochs):
        print("----------------Epoch #%d of %d" %
              (epoch + 1, config.num_epochs))
        # Train the model on the training dataset
        train_accuracy, train_loss = trainer.train(train_data)
        valid_accuracy, valid_loss = trainer.test(valid_data)
        # Print results of epoch of training
        print("-------Results: training accuracy: %.2f, training loss: %.2f, \
                valid accuracy: %.2f, valid loss %.2f"                                                       % (train_accuracy, train_loss, \
                valid_accuracy, valid_loss))
        # Save model at end of each epoch
        trainer.save()
    # Get final test set results
    test_accuracy, test_loss = trainer.test(test_data)
from gym.wrappers import Monitor

from dqn import ComposedDQN, get_action
from gym_repoman.envs import CollectEnv
from trainer import load
from wrappers import WarpFrame, MaxLength

if __name__ == '__main__':

    max_episodes = 50000
    max_trajectory = 50

    task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square')),
                     max_trajectory)

    dqn_blue_crate = load('./models/blue_crate/model.dqn', task)
    dqn_blue = load('./models/blue/model.dqn', task)
    dqn_crate = load('./models/crate/model.dqn', task)

    dqn_composed_or = ComposedDQN([dqn_blue, dqn_crate])
    dqn_composed_and = ComposedDQN([dqn_blue, dqn_crate], or_compose=False)

    for dqn, name in [(dqn_blue_crate, 'blue_crate'), (dqn_composed_or, 'blue_or_crate'),
                      (dqn_composed_and, 'blue_and_crate')]:

        env = Monitor(task, './experiment_approx_and/' + name + '/', video_callable=False, force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
                print(episode)
            obs = env.reset()
            for _ in range(max_trajectory):
    def exp(name='or',
            save_trajectories=True,
            max_episodes=4,
            max_trajectory=7):

        env = CollectEnv()
        dqn_blue = load('./models/blue/model.dqn', env)
        dqn_crate = load('./models/crate/model.dqn', env)
        if torch.cuda.is_available():
            dqn_blue.cuda()
            dqn_crate.cuda()

        dqn_not = ComposedDQN([dqn_blue], compose="not")
        dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
        dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
        dqn_not_and = ComposedDQN([dqn_and], compose="not")
        dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and")

        goals = []
        if os.path.exists('./goals.h5'):
            goals = dd.io.load('goals.h5')

        if name == 'blue':
            dqn = dqn_blue
            goal_condition = lambda x: x.colour == 'blue'
        elif name == 'square':
            dqn = dqn_crate
            goal_condition = lambda x: x.shape == 'square'
        elif name == 'not':
            dqn = dqn_not
            goal_condition = lambda x: not x.colour == 'blue'
        elif name == 'or':
            dqn = dqn_or
            goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
        elif name == 'and':
            dqn = dqn_and
            goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
        elif name == 'xor':
            dqn = dqn_xor
            goal_condition = lambda x: (
                x.colour == 'blue' or x.shape == 'square') and not (
                    x.colour == 'blue' and x.shape == 'square')
        # else:
        #     print("Invalid name")
        #     return

        # env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=lambda x: True)), max_trajectory)

        poss = [(3, 4), (1, 2), (5, 7), (5, 2)]
        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                start_positions['crate_beige'] = poss[episode]
                env = (WarpFrame(
                    CollectEnv(start_positions=start_positions,
                               changePlayerPos=False,
                               goal_condition=lambda x: True)))
                obs = env.reset()

                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn_blue(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    valuesb = values.data.max(0)[0]
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn_crate(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    valuess = values.data.max(0)[0]
                    values = torch.stack((valuesb, valuess), 0).min(0)[0]
                    action = values.max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        break
                trajectories += trajectory[:-1]
                episode += 1

        if save_trajectories:
            trajectories[0].save('./trajectories/' + name + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)
Exemplo n.º 7
0
            done = True
        return ob, reward, done, info


if __name__ == '__main__':

    max_iterations = 80
    max_episodes = 100
    max_trajectory = 50

    task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                                   or (x.colour == 'purple' and x.shape == 'circle'))),
                     max_trajectory)
    env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True)

    dqn_purple_circle = load('./models/purple_circle/model.dqn', task)  # entropy regularised functions
    dqn_beige_crate = load('./models/beige_crate/model.dqn', task)  # entropy regularised functions
    weights = np.arange(1/3, 3.01, 0.05)

    tally = {i: [] for i in range(len(weights))}

    for iter in range(max_iterations):
        for i, weight in enumerate(weights):
            collected_count = [0, 0]
            weight = 1
            dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1])
            for episode in range(max_episodes):
                if episode % 1000 == 0:
                    print(episode)
                obs = env.reset()
Exemplo n.º 8
0
    def exp(name='or',
            save_trajectories=True,
            max_episodes=4,
            max_trajectory=20):

        env = CollectEnv()
        dqn_purple = load('./models/purple/model.dqn', env)
        dqn_blue = load('./models/blue/model.dqn', env)
        dqn_crate = load('./models/crate/model.dqn', env)
        if torch.cuda.is_available():
            dqn_purple.cuda()
            dqn_blue.cuda()
            dqn_crate.cuda()

        dqn_max = ComposedDQN([dqn_purple, dqn_blue, dqn_crate], compose="or")
        dqn_not = ComposedDQN([dqn_blue], dqn_max=dqn_max, compose="not")
        dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or")
        dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and")
        dqn_not_and = ComposedDQN([dqn_and], dqn_max=dqn_max, compose="not")
        dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and")

        goals = []
        if os.path.exists('./goals.h5'):
            goals = dd.io.load('goals.h5')

        if name == 'blue':
            dqn = dqn_blue
            goal_condition = lambda x: x.colour == 'blue'
        elif name == 'purple':
            dqn = dqn_purple
            goal_condition = lambda x: x.colour == 'purple'
        elif name == 'square':
            dqn = dqn_crate
            goal_condition = lambda x: x.shape == 'square'
        if name == 'not':
            dqn = dqn_not
            goal_condition = lambda x: not x.colour == 'blue'
        elif name == 'or':
            dqn = dqn_or
            goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square'
        elif name == 'and':
            dqn = dqn_and
            goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square'
        elif name == 'xor':
            dqn = dqn_xor
            goal_condition = lambda x: (
                x.colour == 'blue' or x.shape == 'square') and not (
                    x.colour == 'blue' and x.shape == 'square')
        else:
            print("Invalid name")
            return

        env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)),
                        max_trajectory)

        trajectories = []
        with torch.no_grad():
            episode = 0
            while episode < max_episodes:
                obs = env.reset()
                trajectory = []
                for _ in range(max_trajectory):
                    trajectory.append(
                        Image.fromarray(np.uint8(env.render(mode='rgb_img'))))

                    obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
                    values = []
                    for goal in goals:
                        goal = torch.from_numpy(
                            np.array(goal)).type(FloatTensor).unsqueeze(0)
                        x = torch.cat((obs, goal), dim=3)
                        values.append(dqn(x).squeeze(0))
                    values = torch.stack(values, 1).t()
                    action = values.data.max(0)[0].max(0)[1].item()
                    obs, reward, done, _ = env.step(action)
                    if done:
                        episode += 1
                        trajectories += trajectory[:-1]
                        break

        if save_trajectories:
            trajectories[0].save('./trajectories/' + name + '.gif',
                                 save_all=True,
                                 append_images=trajectories[1:],
                                 optimize=False,
                                 duration=250,
                                 loop=0)
Exemplo n.º 9
0
                   'crate_purple': (8, 1),
                   'circle_blue': (1, 8)}
all_goals = ['BC','BS','bS','PS','bC','PC']
all_goals_P = [(1,8),(8,1),(1,1),(6,3),(1,7),(7,7)]

# Tasks = ["B.S", "B.-S", "S.-B", "-(B+S)", "B", "-B", "S", "-S", "B+S", "B+-S", "S+-B", "-(B.S)", "-BxorS", "BxorS"]
# Tasks_N = [1, 1, 2, 2, 2, 4, 3, 3, 4, 4, 5, 5, 3, 3]
Tasks = ["B", "S", "B+S", "B.S", "BxorS"]
Tasks_P = [[(1,8),(8,1)], [(8,1),(1,1),(6,3)], [(1,8),(8,1),(1,1),(6,3)], [(8,1)], [(1,8),(1,1),(6,3)]]

goals = []
if os.path.exists('./goals.h5'):
    goals = dd.io.load('goals.h5')

env = CollectEnv()
dqn_blue = load('./models/blue/model.dqn', env)
dqn_square = load('./models/crate/model.dqn', env)
if torch.cuda.is_available():
    dqn_blue.cuda()
    dqn_square.cuda()
dqn_not_blue = ComposedDQN([dqn_blue], compose="not")
dqn_not_square = ComposedDQN([dqn_square], compose="not")
dqn_or = ComposedDQN([dqn_blue,dqn_square], compose="or")
dqn_not_or = ComposedDQN([dqn_or], compose="not")
dqn_and = ComposedDQN([dqn_blue,dqn_square], compose="and")
dqn_not_and = ComposedDQN([dqn_and], compose="not")
dqn_xor = ComposedDQN([dqn_or,dqn_not_and], compose="and")
    
def evaluate(name='or', max_trajectory = 20):    
            
    if name == 'B.S':
Exemplo n.º 10
0
start_positions = {
    'player': (3, 4),
    'crate_purple': (6, 3),
    'circle_purple': (7, 7),
    'circle_beige': (1, 7),
    'crate_beige': (2, 2),
    'crate_blue': (8, 1),
    'circle_blue': (2, 8)
}
env = WarpFrame(
    CollectEnv(
        start_positions=start_positions,
        goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue'))

dqn_blue = load('../../models/blue/model.dqn', env)
dqn_purple = load('../../models/purple/model.dqn', env)
dqn = ComposedDQN([dqn_blue, dqn_purple], [1, 1])

values = np.zeros_like(env.env.board, dtype=float)
for pos in env.env.free_spaces:
    positions = copy.deepcopy(start_positions)

    positions = remove(positions, pos)

    positions['player'] = pos
    env = WarpFrame(
        CollectEnv(start_positions=positions,
                   goal_condition=lambda x: x.colour == 'purple' or x.colour ==
                   'blue'))
    obs = env.reset()
Exemplo n.º 11
0
if __name__ == '__main__':
    start_positions = {
        'player': (5, 5),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.shape == 'square' and x.colour ==
                   'blue'))

    dqn1 = load('../../models/crate/model.dqn', env)
    dqn2 = load('../../models/blue/model.dqn', env)
    dqn = ComposedDQN([dqn1, dqn2], [1, 1], or_compose=False)
    obs = env.reset()
    positions = list()
    positions.append(env.env.player.position)
    env.render()

    for _ in range(100):
        obs = np.array(obs)
        obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)

        action = get_action(dqn, obs)
        obs, reward, done, _ = env.step(action)
        env.render()
        positions.append(env.env.player.position)
Exemplo n.º 12
0
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }

    targets = {('purple', 'square'), ('blue', 'circle'), ('blue', 'square'),
               ('beige', 'square')}

    env = WarpFrame(
        MultiCollectEnv(termination_condition=lambda collected: targets.
                        issubset({(c.colour, c.shape)
                                  for c in collected}),
                        reward_condition=lambda x:
                        (x.colour, x.shape) in targets,
                        start_positions=start_positions))

    dqn1 = load('../../models/purple/model.dqn', env)
    # dqn2 = load('../../models/purple_circle/model.dqn', env)
    dqn3 = load('../../models/blue/model.dqn', env)
    dqn4 = load('../../models/beige/model.dqn', env)
    # dqn = ComposedDQN([dqn1, dqn2, dqn3, dqn4], [1,1,1,1])
    #dqn1 = load('../../models/crate/model.dqn', env)
    #dqn2 = load('../../models/blue/model.dqn', env)
    dqn = ComposedDQN([dqn1, dqn3, dqn4])

    obs = env.reset()
    positions = list()
    positions.append(env.env.player.position)
    env.render()

    for _ in range(100):
        obs = np.array(obs)
Exemplo n.º 13
0
if __name__ == '__main__':

    start_positions = {'player': (3, 4),
                       'crate_purple': (6, 3),
                       'circle_purple': (7, 7),
                       'circle_beige': (1, 7),
                       'crate_beige': (2, 2),
                       'crate_blue': (8, 1),
                       'circle_blue': (2, 8)}

    env = WarpFrame(CollectEnv(start_positions=start_positions,
                               goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                        or (x.colour == 'purple' and x.shape == 'circle')))

    dqn_purple_circle = load('../../models/purple_circle/model.dqn', env)
    dqn_beige_crate = load('../../models/beige_crate/model.dqn', env)
    dqn = ComposedDQN([dqn_purple_circle, dqn_beige_crate], [3, 2])  # TODO put weights here!

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(CollectEnv(start_positions=positions,
                                   goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square')
                                                            or (x.colour == 'purple' and x.shape == 'circle')))

        obs = env.reset()
Exemplo n.º 14
0
from dqn import ComposedDQN, FloatTensor, get_action
from trainer import load
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame, MaxLength

if __name__ == '__main__':

    max_episodes = 50000
    max_trajectory = 50

    task = MaxLength(
        WarpFrame(
            CollectEnv(goal_condition=lambda x: x.colour == 'blue' or x.colour
                       == 'purple')), max_trajectory)

    dqn_blue = load('./models/blue/model.dqn', task)
    dqn_purple = load('./models/purple/model.dqn', task)
    dqn_composed = ComposedDQN([dqn_blue, dqn_purple], [1, 1])

    for dqn, name in [(dqn_blue, 'blue'), (dqn_purple, 'purple'),
                      (dqn_composed, 'composed')]:
        env = Monitor(task,
                      './experiment_or/' + name + '/',
                      video_callable=False,
                      force=True)
        for episode in range(max_episodes):
            if episode % 1000 == 0:
                print(episode)
            obs = env.reset()
            for _ in range(max_trajectory):
                obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
Exemplo n.º 15
0
 def __init__(self):
     X, y = load()
     X, y = preprocess(X, y)
     self.model, self.sc = train(X, y)
     self.rec = Recorder(NUM_WINDOWS, FFT_SIZE)
Exemplo n.º 16
0
if __name__ == '__main__':
    start_positions = {
        'player': (3, 4),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.shape == 'square' and x.colour ==
                   'blue'))

    dqn_blue = load('../../models/blue/model.dqn', env)
    dqn_crate = load('../../models/crate/model.dqn', env)
    dqn = ComposedDQN([dqn_blue, dqn_crate], [1, 1], or_compose=False)

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(
            CollectEnv(start_positions=positions,
                       goal_condition=lambda x: x.shape == 'square' and x.
                       colour == 'blue'))
        obs = env.reset()
from trainer import load
from gym_repoman.envs import CollectEnv
from wrappers import WarpFrame, MaxLength
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw

if __name__ == '__main__':
    env = CollectEnv()

    all_goals = np.array([('beige', 'circle'), ('beige', 'square'),
                          ('blue', 'circle'), ('blue', 'square'),
                          ('purple', 'circle'), ('purple', 'square')])
    base_tasks = np.array([[0, 0, 0, 0, 1, 1], [0, 0, 1, 1, 0, 0],
                           [0, 1, 0, 1, 0, 1]])

    dqn_purple = load('./models/purple/model.dqn', env)
    dqn_blue = load('./models/blue/model.dqn', env)
    dqn_crate = load('./models/crate/model.dqn', env)
    if torch.cuda.is_available():
        dqn_purple.cuda()
        dqn_blue.cuda()
        dqn_crate.cuda()
    models = {'P': dqn_purple, 'B': dqn_blue, 'S': dqn_crate}
    mgoals = []
    if os.path.exists('./goals.h5'):
        mgoals = dd.io.load('goals.h5')

    def experiment(task=None,
                   save_trajectories=True,
                   max_episodes=4,
                   max_trajectory=20):
Exemplo n.º 18
0
if __name__ == '__main__':
    start_positions = {
        'player': (2, 1),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.colour == 'purple' or x.colour ==
                   'blue'))

    dqn1 = load('../../models/purple/model.dqn', env)
    dqn2 = load('../../models/blue/model.dqn', env)
    dqn = ComposedDQN([dqn1, dqn2], [1, 1.1])
    obs = env.reset()
    positions = list()
    positions.append(env.env.player.position)
    env.render()

    for _ in range(100):
        obs = np.array(obs)
        obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)

        action = get_action(dqn, obs)
        obs, reward, done, _ = env.step(action)
        env.render()
        positions.append(env.env.player.position)
Exemplo n.º 19
0
 def __init__(self,
              modelfilestring,
              lrows,
              lcols,
              m_test_set="",
              m_model="",
              context="",
              device="cpu"):
     self.nb_proc = len(os.sched_getaffinity(0))
     self.device = device
     # self.nb_proc = 1
     self.is_ready = False
     # Semi-constants :
     self.quiet = False
     self.epsilon = 1e-30
     self.batch_vol = 1024
     self.randwords_minlen = 0
     self.randwords_maxlen = 100
     self.randwords_nb = 1000
     # Debug Warning !
     if self.randwords_nb < 1000:
         print("DEBUG - DEBUG - DEBUG - DEBUG - DEBUG")
         print("Low random words number for debug purpose ?")
         print("DEBUG - DEBUG - DEBUG - DEBUG - DEBUG")
     self.patience = 250
     self.rand_temperature = 6  # >= 1
     # Arguments :
     self.rnn_model = trainer.load(*(modelfilestring.split()))  # pytorch OK
     self.rnn_model = self.rnn_model.to(self.device)
     self.lrows = lrows
     self.lcols = lcols
     self.metrics_test_set = m_test_set
     self.metrics_model = m_model
     self.context = context
     # Attributes derived from arguments :
     self.nalpha = self.rnn_model.nalpha
     # self.pad = int(self.rnn_model.input.shape[1])
     self.metrics_calc_level = 0
     if m_test_set != "":
         # We have access to a test set, like in SPICE and PAUTOMAC
         self.metrics_calc_level += 1
         if m_model != "":
             # We have access to a target WA, like in PAUTOMAC
             self.metrics_calc_level += 1
     # Computed attributes
     self.prefixes = None
     self.suffixes = None
     self.words = None
     self.words_probas = None
     self.lhankels = None
     self.last_extr_aut = None
     # metrics calculations attributes
     self.ranks = []
     self.true_automaton = None
     self.metrics = dict()
     self.x_test = None
     self.x_rand = None
     self.x_rnnw = None
     self.y_test_target = None
     self.y_test_rnn = None
     self.y_test_extr = None
     self.y_rand_target = None
     self.y_rand_rnn = None
     self.y_rand_extr = None
     self.y_rnnw_rnn = None
     self.y_rnnw_extr = None
     self.y_test_target_prefixes = None
     self.y_test_rnn_prefixes = None
     self.y_test_extr_prefixes = None
     self.y_rnnw_rnn_prefixes = None
     self.y_rnnw_extr_prefixes = None
     self.perp_test_target = None
     self.perp_test_rnn = None
     self.perp_test_extr = None
     self.perp_rand_target = None
     self.perp_rand_rnn = None
     self.kld_test_target_rnn = None
     self.kld_test_rnn_extr = None
     self.kld_test_target_extr = None
     self.kld_rand_target_rnn = None
     self.kld_rand_rnn_extr = None
     self.kld_rand_target_extr = None
     self.kld_rand_extr_rnn = None
     self.wer_test_target = None
     self.wer_test_rnn = None
     self.wer_rnnw_rnn = None
     self.wer_test_extr = None
     self.wer_rnnw_extr = None
     self.ndcg1_test_target_rnn = None
     self.ndcg1_test_rnn_extr = None
     self.ndcg1_test_target_extr = None
     self.ndcg1_rnnw_rnn_extr = None
     self.ndcg5_test_target_rnn = None
     self.ndcg5_test_rnn_extr = None
     self.ndcg5_test_target_extr = None
     self.ndcg5_rnnw_rnn_extr = None
     self.perp_rand_extr = None
     self.eps_test_zeros_extr = None
     self.l2dis_target_extr = None
     self.eps_rand_zeros_target = None
     self.eps_rand_zeros_rnn = None
     self.eps_rand_zeros_extr = None
     self.eps_kl_rand_target_extr = None
     self.eps_kl_rand_target_rnn = None
     #
     self.perprnn_test_rnn = None
     self.perprnn_test_extr = None
     self.perprnn_rnnw_rnn = None
     self.perprnn_rnnw_extr = None
     self.eps_rnnw_zeros_extr = None
if __name__ == '__main__':
    start_positions = {
        'player': (3, 4),
        'crate_purple': (6, 3),
        'circle_purple': (7, 7),
        'circle_beige': (1, 7),
        'crate_beige': (2, 2),
        'crate_blue': (8, 1),
        'circle_blue': (2, 8)
    }
    env = WarpFrame(
        CollectEnv(start_positions=start_positions,
                   goal_condition=lambda x: x.shape == 'square' and x.colour ==
                   'blue'))

    dqn = load('../../models/blue_crate/model.dqn', env)

    values = np.zeros_like(env.env.board, dtype=float)
    for pos in env.env.free_spaces:
        positions = copy.deepcopy(start_positions)

        positions = remove(positions, pos)

        positions['player'] = pos
        env = WarpFrame(
            CollectEnv(start_positions=positions,
                       goal_condition=lambda x: x.shape == 'square' and x.
                       colour == 'blue'))
        obs = env.reset()
        obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
        v = dqn(Variable(obs, volatile=True)).data.max(1)[0].view(1, 1)[0][0]