def main(): config = configure() session = tf.Session() task = tasks.load(config) channel = channels.load(config) model = models.load(config) desc_model = models.desc_im.DescriptionImitationModel() translator = translators.load(config) rollout_ph = experience.RolloutPlaceholders(task, config) replay_ph = experience.ReplayPlaceholders(task, config) reconst_ph = experience.ReconstructionPlaceholders(task, config) channel.build(config) model.build(task, rollout_ph, replay_ph, channel, config) desc_model.build(task, rollout_ph, replay_ph, channel, config) translator.build(task, reconst_ph, channel, model, config) if config.task.train: trainer.run(task, rollout_ph, replay_ph, reconst_ph, model, desc_model, translator, session, config) else: trainer.load(session, config) if config.task.lexicon: lex = lexicographer.run(task, rollout_ph, reconst_ph, model, desc_model, translator, session, config) if config.task.visualize: visualizer.run(lex, task, config) if config.task.calibrate: calibrator.run(task, rollout_ph, model, desc_model, lexicographer, session, config) if config.task.evaluate: evaluator.run(task, rollout_ph, replay_ph, reconst_ph, model, desc_model, lex, session, config) sem_evaluator.run(task, rollout_ph, reconst_ph, model, desc_model, translator, lex, session, config) if config.task.turkify: turkifier.run(task, rollout_ph, model, lex, session, config)
def train_loop(trainer, lr): print(f"Current learning rate: {lr}") for epoch in range(0, 80): trainer.train(epoch, lr) if trainer.test(epoch): no_progress = 0 else: no_progress += 1 if no_progress >= 6: break lr = lr/10 for i in range(0, 2): print(f"Current learning rate: {lr}") start_epoch = trainer.load() no_progress = 0 for epoch in range(start_epoch, start_epoch+80): trainer.train(epoch, lr) if trainer.test(epoch): no_progress = 0 else: no_progress += 1 if no_progress >= 6: break lr = lr/10
if __name__ == '__main__': max_episodes = 50000 max_trajectory = 200 targets = {('purple', 'square'), ('purple', 'circle'), ('blue', 'square'), ('blue', 'circle'), ('beige', 'square'), ('beige', 'circle')} task = MaxLength(WarpFrame( MultiCollectEnv(lambda collected: targets.issubset({(c.colour, c.shape) for c in collected}), lambda x: (x.colour, x.shape) in targets)), max_trajectory) #agent = train('./models/temporal3/results', task) # 1 million #save('./models/temporal3/model.dqn', agent) dqn = load('./models/temporal3/model.dqn', task) # dqn trained on full task max_episodes = 50000 max_trajectory = 50 dqn1 = load('./models/purple/model.dqn', task) dqn2 = load('./models/blue/model.dqn', task) dqn3 = load('./models/beige/model.dqn', task) dqn_composed = ComposedDQN([dqn1, dqn2, dqn3]) for dqn, name in [(dqn, 'full_task'), (dqn_composed, 'composed')]: env = Monitor(task, './experiment_temporal/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset()
config = data.parse_config(config_path) # Manually set seed (for reproducability) np.random.seed(config.seed) torch.manual_seed(config.seed) if train: # Get datasets as pandas dataframes train_data, valid_data, test_data = data.get_dataset(config) # Initialize the model model = models.Model(config) # Initialie the training class trainer = trainer.Trainer(model, config) # If continuing, load previous checkpoint if cont: trainer.load() # Train the model for epoch in range(config.num_epochs): print("----------------Epoch #%d of %d" % (epoch + 1, config.num_epochs)) # Train the model on the training dataset train_accuracy, train_loss = trainer.train(train_data) valid_accuracy, valid_loss = trainer.test(valid_data) # Print results of epoch of training print("-------Results: training accuracy: %.2f, training loss: %.2f, \ valid accuracy: %.2f, valid loss %.2f" % (train_accuracy, train_loss, \ valid_accuracy, valid_loss)) # Save model at end of each epoch trainer.save() # Get final test set results test_accuracy, test_loss = trainer.test(test_data)
from gym.wrappers import Monitor from dqn import ComposedDQN, get_action from gym_repoman.envs import CollectEnv from trainer import load from wrappers import WarpFrame, MaxLength if __name__ == '__main__': max_episodes = 50000 max_trajectory = 50 task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: x.colour == 'blue' and x.shape == 'square')), max_trajectory) dqn_blue_crate = load('./models/blue_crate/model.dqn', task) dqn_blue = load('./models/blue/model.dqn', task) dqn_crate = load('./models/crate/model.dqn', task) dqn_composed_or = ComposedDQN([dqn_blue, dqn_crate]) dqn_composed_and = ComposedDQN([dqn_blue, dqn_crate], or_compose=False) for dqn, name in [(dqn_blue_crate, 'blue_crate'), (dqn_composed_or, 'blue_or_crate'), (dqn_composed_and, 'blue_and_crate')]: env = Monitor(task, './experiment_approx_and/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset() for _ in range(max_trajectory):
def exp(name='or', save_trajectories=True, max_episodes=4, max_trajectory=7): env = CollectEnv() dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_crate.cuda() dqn_not = ComposedDQN([dqn_blue], compose="not") dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_and = ComposedDQN([dqn_and], compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') if name == 'blue': dqn = dqn_blue goal_condition = lambda x: x.colour == 'blue' elif name == 'square': dqn = dqn_crate goal_condition = lambda x: x.shape == 'square' elif name == 'not': dqn = dqn_not goal_condition = lambda x: not x.colour == 'blue' elif name == 'or': dqn = dqn_or goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'and': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'xor': dqn = dqn_xor goal_condition = lambda x: ( x.colour == 'blue' or x.shape == 'square') and not ( x.colour == 'blue' and x.shape == 'square') # else: # print("Invalid name") # return # env = MaxLength(WarpFrame(CollectEnv(start_positions=start_positions,goal_condition=lambda x: True)), max_trajectory) poss = [(3, 4), (1, 2), (5, 7), (5, 2)] trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: start_positions['crate_beige'] = poss[episode] env = (WarpFrame( CollectEnv(start_positions=start_positions, changePlayerPos=False, goal_condition=lambda x: True))) obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn_blue(x).squeeze(0)) values = torch.stack(values, 1).t() valuesb = values.data.max(0)[0] values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn_crate(x).squeeze(0)) values = torch.stack(values, 1).t() valuess = values.data.max(0)[0] values = torch.stack((valuesb, valuess), 0).min(0)[0] action = values.max(0)[1].item() obs, reward, done, _ = env.step(action) if done: break trajectories += trajectory[:-1] episode += 1 if save_trajectories: trajectories[0].save('./trajectories/' + name + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)
done = True return ob, reward, done, info if __name__ == '__main__': max_iterations = 80 max_episodes = 100 max_trajectory = 50 task = MaxLength(WarpFrame(CollectEnv(goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))), max_trajectory) env = Monitor(task, './experiment_weighted_or/', video_callable=False, force=True) dqn_purple_circle = load('./models/purple_circle/model.dqn', task) # entropy regularised functions dqn_beige_crate = load('./models/beige_crate/model.dqn', task) # entropy regularised functions weights = np.arange(1/3, 3.01, 0.05) tally = {i: [] for i in range(len(weights))} for iter in range(max_iterations): for i, weight in enumerate(weights): collected_count = [0, 0] weight = 1 dqn_composed = ComposedDQN([dqn_beige_crate, dqn_purple_circle], [weight, 1]) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset()
def exp(name='or', save_trajectories=True, max_episodes=4, max_trajectory=20): env = CollectEnv() dqn_purple = load('./models/purple/model.dqn', env) dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_purple.cuda() dqn_blue.cuda() dqn_crate.cuda() dqn_max = ComposedDQN([dqn_purple, dqn_blue, dqn_crate], compose="or") dqn_not = ComposedDQN([dqn_blue], dqn_max=dqn_max, compose="not") dqn_or = ComposedDQN([dqn_blue, dqn_crate], compose="or") dqn_and = ComposedDQN([dqn_blue, dqn_crate], compose="and") dqn_not_and = ComposedDQN([dqn_and], dqn_max=dqn_max, compose="not") dqn_xor = ComposedDQN([dqn_or, dqn_not_and], compose="and") goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') if name == 'blue': dqn = dqn_blue goal_condition = lambda x: x.colour == 'blue' elif name == 'purple': dqn = dqn_purple goal_condition = lambda x: x.colour == 'purple' elif name == 'square': dqn = dqn_crate goal_condition = lambda x: x.shape == 'square' if name == 'not': dqn = dqn_not goal_condition = lambda x: not x.colour == 'blue' elif name == 'or': dqn = dqn_or goal_condition = lambda x: x.colour == 'blue' or x.shape == 'square' elif name == 'and': dqn = dqn_and goal_condition = lambda x: x.colour == 'blue' and x.shape == 'square' elif name == 'xor': dqn = dqn_xor goal_condition = lambda x: ( x.colour == 'blue' or x.shape == 'square') and not ( x.colour == 'blue' and x.shape == 'square') else: print("Invalid name") return env = MaxLength(WarpFrame(CollectEnv(goal_condition=goal_condition)), max_trajectory) trajectories = [] with torch.no_grad(): episode = 0 while episode < max_episodes: obs = env.reset() trajectory = [] for _ in range(max_trajectory): trajectory.append( Image.fromarray(np.uint8(env.render(mode='rgb_img')))) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) values = [] for goal in goals: goal = torch.from_numpy( np.array(goal)).type(FloatTensor).unsqueeze(0) x = torch.cat((obs, goal), dim=3) values.append(dqn(x).squeeze(0)) values = torch.stack(values, 1).t() action = values.data.max(0)[0].max(0)[1].item() obs, reward, done, _ = env.step(action) if done: episode += 1 trajectories += trajectory[:-1] break if save_trajectories: trajectories[0].save('./trajectories/' + name + '.gif', save_all=True, append_images=trajectories[1:], optimize=False, duration=250, loop=0)
'crate_purple': (8, 1), 'circle_blue': (1, 8)} all_goals = ['BC','BS','bS','PS','bC','PC'] all_goals_P = [(1,8),(8,1),(1,1),(6,3),(1,7),(7,7)] # Tasks = ["B.S", "B.-S", "S.-B", "-(B+S)", "B", "-B", "S", "-S", "B+S", "B+-S", "S+-B", "-(B.S)", "-BxorS", "BxorS"] # Tasks_N = [1, 1, 2, 2, 2, 4, 3, 3, 4, 4, 5, 5, 3, 3] Tasks = ["B", "S", "B+S", "B.S", "BxorS"] Tasks_P = [[(1,8),(8,1)], [(8,1),(1,1),(6,3)], [(1,8),(8,1),(1,1),(6,3)], [(8,1)], [(1,8),(1,1),(6,3)]] goals = [] if os.path.exists('./goals.h5'): goals = dd.io.load('goals.h5') env = CollectEnv() dqn_blue = load('./models/blue/model.dqn', env) dqn_square = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_blue.cuda() dqn_square.cuda() dqn_not_blue = ComposedDQN([dqn_blue], compose="not") dqn_not_square = ComposedDQN([dqn_square], compose="not") dqn_or = ComposedDQN([dqn_blue,dqn_square], compose="or") dqn_not_or = ComposedDQN([dqn_or], compose="not") dqn_and = ComposedDQN([dqn_blue,dqn_square], compose="and") dqn_not_and = ComposedDQN([dqn_and], compose="not") dqn_xor = ComposedDQN([dqn_or,dqn_not_and], compose="and") def evaluate(name='or', max_trajectory = 20): if name == 'B.S':
start_positions = { 'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv( start_positions=start_positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) dqn_blue = load('../../models/blue/model.dqn', env) dqn_purple = load('../../models/purple/model.dqn', env) dqn = ComposedDQN([dqn_blue, dqn_purple], [1, 1]) values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame( CollectEnv(start_positions=positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) obs = env.reset()
if __name__ == '__main__': start_positions = { 'player': (5, 5), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.shape == 'square' and x.colour == 'blue')) dqn1 = load('../../models/crate/model.dqn', env) dqn2 = load('../../models/blue/model.dqn', env) dqn = ComposedDQN([dqn1, dqn2], [1, 1], or_compose=False) obs = env.reset() positions = list() positions.append(env.env.player.position) env.render() for _ in range(100): obs = np.array(obs) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) env.render() positions.append(env.env.player.position)
'crate_blue': (8, 1), 'circle_blue': (2, 8) } targets = {('purple', 'square'), ('blue', 'circle'), ('blue', 'square'), ('beige', 'square')} env = WarpFrame( MultiCollectEnv(termination_condition=lambda collected: targets. issubset({(c.colour, c.shape) for c in collected}), reward_condition=lambda x: (x.colour, x.shape) in targets, start_positions=start_positions)) dqn1 = load('../../models/purple/model.dqn', env) # dqn2 = load('../../models/purple_circle/model.dqn', env) dqn3 = load('../../models/blue/model.dqn', env) dqn4 = load('../../models/beige/model.dqn', env) # dqn = ComposedDQN([dqn1, dqn2, dqn3, dqn4], [1,1,1,1]) #dqn1 = load('../../models/crate/model.dqn', env) #dqn2 = load('../../models/blue/model.dqn', env) dqn = ComposedDQN([dqn1, dqn3, dqn4]) obs = env.reset() positions = list() positions.append(env.env.player.position) env.render() for _ in range(100): obs = np.array(obs)
if __name__ == '__main__': start_positions = {'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8)} env = WarpFrame(CollectEnv(start_positions=start_positions, goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))) dqn_purple_circle = load('../../models/purple_circle/model.dqn', env) dqn_beige_crate = load('../../models/beige_crate/model.dqn', env) dqn = ComposedDQN([dqn_purple_circle, dqn_beige_crate], [3, 2]) # TODO put weights here! values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame(CollectEnv(start_positions=positions, goal_condition=lambda x: (x.colour == 'beige' and x.shape == 'square') or (x.colour == 'purple' and x.shape == 'circle'))) obs = env.reset()
from dqn import ComposedDQN, FloatTensor, get_action from trainer import load from gym_repoman.envs import CollectEnv from wrappers import WarpFrame, MaxLength if __name__ == '__main__': max_episodes = 50000 max_trajectory = 50 task = MaxLength( WarpFrame( CollectEnv(goal_condition=lambda x: x.colour == 'blue' or x.colour == 'purple')), max_trajectory) dqn_blue = load('./models/blue/model.dqn', task) dqn_purple = load('./models/purple/model.dqn', task) dqn_composed = ComposedDQN([dqn_blue, dqn_purple], [1, 1]) for dqn, name in [(dqn_blue, 'blue'), (dqn_purple, 'purple'), (dqn_composed, 'composed')]: env = Monitor(task, './experiment_or/' + name + '/', video_callable=False, force=True) for episode in range(max_episodes): if episode % 1000 == 0: print(episode) obs = env.reset() for _ in range(max_trajectory): obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0)
def __init__(self): X, y = load() X, y = preprocess(X, y) self.model, self.sc = train(X, y) self.rec = Recorder(NUM_WINDOWS, FFT_SIZE)
if __name__ == '__main__': start_positions = { 'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.shape == 'square' and x.colour == 'blue')) dqn_blue = load('../../models/blue/model.dqn', env) dqn_crate = load('../../models/crate/model.dqn', env) dqn = ComposedDQN([dqn_blue, dqn_crate], [1, 1], or_compose=False) values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame( CollectEnv(start_positions=positions, goal_condition=lambda x: x.shape == 'square' and x. colour == 'blue')) obs = env.reset()
from trainer import load from gym_repoman.envs import CollectEnv from wrappers import WarpFrame, MaxLength from matplotlib import pyplot as plt from PIL import Image, ImageDraw if __name__ == '__main__': env = CollectEnv() all_goals = np.array([('beige', 'circle'), ('beige', 'square'), ('blue', 'circle'), ('blue', 'square'), ('purple', 'circle'), ('purple', 'square')]) base_tasks = np.array([[0, 0, 0, 0, 1, 1], [0, 0, 1, 1, 0, 0], [0, 1, 0, 1, 0, 1]]) dqn_purple = load('./models/purple/model.dqn', env) dqn_blue = load('./models/blue/model.dqn', env) dqn_crate = load('./models/crate/model.dqn', env) if torch.cuda.is_available(): dqn_purple.cuda() dqn_blue.cuda() dqn_crate.cuda() models = {'P': dqn_purple, 'B': dqn_blue, 'S': dqn_crate} mgoals = [] if os.path.exists('./goals.h5'): mgoals = dd.io.load('goals.h5') def experiment(task=None, save_trajectories=True, max_episodes=4, max_trajectory=20):
if __name__ == '__main__': start_positions = { 'player': (2, 1), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.colour == 'purple' or x.colour == 'blue')) dqn1 = load('../../models/purple/model.dqn', env) dqn2 = load('../../models/blue/model.dqn', env) dqn = ComposedDQN([dqn1, dqn2], [1, 1.1]) obs = env.reset() positions = list() positions.append(env.env.player.position) env.render() for _ in range(100): obs = np.array(obs) obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) action = get_action(dqn, obs) obs, reward, done, _ = env.step(action) env.render() positions.append(env.env.player.position)
def __init__(self, modelfilestring, lrows, lcols, m_test_set="", m_model="", context="", device="cpu"): self.nb_proc = len(os.sched_getaffinity(0)) self.device = device # self.nb_proc = 1 self.is_ready = False # Semi-constants : self.quiet = False self.epsilon = 1e-30 self.batch_vol = 1024 self.randwords_minlen = 0 self.randwords_maxlen = 100 self.randwords_nb = 1000 # Debug Warning ! if self.randwords_nb < 1000: print("DEBUG - DEBUG - DEBUG - DEBUG - DEBUG") print("Low random words number for debug purpose ?") print("DEBUG - DEBUG - DEBUG - DEBUG - DEBUG") self.patience = 250 self.rand_temperature = 6 # >= 1 # Arguments : self.rnn_model = trainer.load(*(modelfilestring.split())) # pytorch OK self.rnn_model = self.rnn_model.to(self.device) self.lrows = lrows self.lcols = lcols self.metrics_test_set = m_test_set self.metrics_model = m_model self.context = context # Attributes derived from arguments : self.nalpha = self.rnn_model.nalpha # self.pad = int(self.rnn_model.input.shape[1]) self.metrics_calc_level = 0 if m_test_set != "": # We have access to a test set, like in SPICE and PAUTOMAC self.metrics_calc_level += 1 if m_model != "": # We have access to a target WA, like in PAUTOMAC self.metrics_calc_level += 1 # Computed attributes self.prefixes = None self.suffixes = None self.words = None self.words_probas = None self.lhankels = None self.last_extr_aut = None # metrics calculations attributes self.ranks = [] self.true_automaton = None self.metrics = dict() self.x_test = None self.x_rand = None self.x_rnnw = None self.y_test_target = None self.y_test_rnn = None self.y_test_extr = None self.y_rand_target = None self.y_rand_rnn = None self.y_rand_extr = None self.y_rnnw_rnn = None self.y_rnnw_extr = None self.y_test_target_prefixes = None self.y_test_rnn_prefixes = None self.y_test_extr_prefixes = None self.y_rnnw_rnn_prefixes = None self.y_rnnw_extr_prefixes = None self.perp_test_target = None self.perp_test_rnn = None self.perp_test_extr = None self.perp_rand_target = None self.perp_rand_rnn = None self.kld_test_target_rnn = None self.kld_test_rnn_extr = None self.kld_test_target_extr = None self.kld_rand_target_rnn = None self.kld_rand_rnn_extr = None self.kld_rand_target_extr = None self.kld_rand_extr_rnn = None self.wer_test_target = None self.wer_test_rnn = None self.wer_rnnw_rnn = None self.wer_test_extr = None self.wer_rnnw_extr = None self.ndcg1_test_target_rnn = None self.ndcg1_test_rnn_extr = None self.ndcg1_test_target_extr = None self.ndcg1_rnnw_rnn_extr = None self.ndcg5_test_target_rnn = None self.ndcg5_test_rnn_extr = None self.ndcg5_test_target_extr = None self.ndcg5_rnnw_rnn_extr = None self.perp_rand_extr = None self.eps_test_zeros_extr = None self.l2dis_target_extr = None self.eps_rand_zeros_target = None self.eps_rand_zeros_rnn = None self.eps_rand_zeros_extr = None self.eps_kl_rand_target_extr = None self.eps_kl_rand_target_rnn = None # self.perprnn_test_rnn = None self.perprnn_test_extr = None self.perprnn_rnnw_rnn = None self.perprnn_rnnw_extr = None self.eps_rnnw_zeros_extr = None
if __name__ == '__main__': start_positions = { 'player': (3, 4), 'crate_purple': (6, 3), 'circle_purple': (7, 7), 'circle_beige': (1, 7), 'crate_beige': (2, 2), 'crate_blue': (8, 1), 'circle_blue': (2, 8) } env = WarpFrame( CollectEnv(start_positions=start_positions, goal_condition=lambda x: x.shape == 'square' and x.colour == 'blue')) dqn = load('../../models/blue_crate/model.dqn', env) values = np.zeros_like(env.env.board, dtype=float) for pos in env.env.free_spaces: positions = copy.deepcopy(start_positions) positions = remove(positions, pos) positions['player'] = pos env = WarpFrame( CollectEnv(start_positions=positions, goal_condition=lambda x: x.shape == 'square' and x. colour == 'blue')) obs = env.reset() obs = torch.from_numpy(obs).type(FloatTensor).unsqueeze(0) v = dqn(Variable(obs, volatile=True)).data.max(1)[0].view(1, 1)[0][0]