def __init__( self, expname='key', goalspecs='F P_[KE][1,none,==]', keys=['LO', 'FW', 'KE'], actions=list(range(5)), seed=None, maxtracelen=40, trainc=False, epoch=80): env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) if seed is None: pass else: env = ReseedWrapper(env, seeds=[seed]) env = FullyObsWrapper(env) self.env = env self.env.max_steps = min(env.max_steps, 200) # self.env.agent_view_size = 1 self.env.reset() self.expname = expname self.goalspecs = goalspecs self.epoch = epoch self.maxtracelen = maxtracelen self.trainc = trainc self.allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM'] self.keys = keys self.actions = actions root = goalspec2BT(goalspecs, planner=None, node=CompetentNode) self.behaviour_tree = BehaviourTree(root) self.blackboard = Blackboard()
def get_init_state(size, num_corssings, seed=None): env = CrossingEnv(size=size, num_crossings=num_corssings, obstacle_type=Wall, seed=seed) env = FullyObsWrapper(env) obs = env.reset() return MiniGridState(obs, env, False, 0)
def _build_environment(name, n_actions=3, max_steps=500): raw_env = gym.make(name) raw_env.action_space.n = n_actions raw_env.max_steps = max_steps env = ImgFlatObsWrapper(FullyObsWrapper(raw_env)) env = gym_wrapper.GymWrapper(env) env = CustomSinglePrecisionWrapper(env) spec = specs.make_environment_spec(env) return env, spec
def e16xn(offset=0): env = EmptyEnvV2(size=16, agent_pos=(1, 1), agent_dir=None, goal_pos=(8, 8), train=True, goal_rand_offset=offset) env = FreeMove(env) env = ConstantReward(env) env = FullyObsWrapper(env) return env
def get_minigrid_environment(environment_name='MiniGrid-UnlockPickup-v0', domain_file="domains/gridworld_abstract.pddl", render=False, use_executor=False, actions=None): from agent.env_wrappers import ExecutorWrapper env = gym.make(environment_name) env.seed(seed=seed()) env = FullyObsWrapper(env) env = NamedObjectWrapper(env) env = LastObsWrapper(env) # TODO note: this is a relative path, so this code needs to be run from a file in the uppermost directory. # if you want a different relative path, you'll have to specify it yourself. if use_executor and actions is not None: env = ExecutorWrapper(env, domain_file, MiniGridDetector, MiniGridExecutor, render, actions) return env
def make_single_env(game): """Make a preprocessed gym.Env.""" if 'MiniGrid' in game: env = PreprocessEnv(FullyObsWrapper(gym.make(game))) else: env = gym.make(game) print( 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' ) print('action space: %s obs space: %s' % (env.action_space, env.observation_space)) print( 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' ) #sys.exit() return GrayscaleEnv(DownsampleEnv(env, 2))
def __init__(self, env, args): super().__init__( env, args, # default values for this algorithm default_learning_rate=0.1, default_discount_factor=0.99, default_start_eps=0.5, default_end_eps=0.05, default_annealing_steps=2500, default_num_updates=4000, ) try: # for MiniGrid environments self.env: MiniGridEnv = FullyObsWrapper(self.env) width, height = self.env.observation_space.shape[0:2] self.in_features = width * height * DIRECTIONS # really Discrete(7) for this env but we don't need the pick up, drop... actions self.env.action_space = Discrete(3) self.discrete_obs_space = False except Exception: # for other gym environments like FrozenLake-v0 if isinstance(self.env.observation_space, Discrete): self.in_features = self.env.observation_space.n self.discrete_obs_space = True # for other enviroments, we don't know how in_features is calculated from the obs space else: raise RuntimeError( f"Don't know how to handle this observation space{self.env.obeservation_space}" ) self.model = { "q_network": Net(self.in_features, self.env.action_space).to(device) }
def test_comptency(): # import py_trees # behaviour_tree = BehaviourTree(root) # # Remember to comment set_state in GenRecProp before # # running this test case one = BehaviourTree(Sequence(name=str(1))) two = Sequence(name=str(2)) three = Sequence(name=str(3)) four = Selector(name=str(4)) five = Sequence(name=str(5)) six = Sequence(name=str(6)) # seven = Parallel(name=str(7)) seven = Selector(name=str(7)) exenodes = [ CompetentNode(name=chr(ord('A') + i), planner=None) for i in range(0, 11) ] three.add_children(exenodes[:3]) four.add_children(exenodes[3:6]) six.add_children(exenodes[6:9]) seven.add_children(exenodes[9:]) two.add_children([three, four]) five.add_children([six, seven]) one.root.add_children([two, five]) # py_trees.logging.level = py_trees.logging.Level.DEBUG # py_trees.display.print_ascii_tree(one.root) blackboard = Blackboard() env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) env = ReseedWrapper(env, seeds=[3]) env = FullyObsWrapper(env) env.max_steps = min(env.max_steps, 200) env.agent_view_size = 1 env.reset() # env.render(mode='human') state, reward, done, _ = env.step(2) # print(state['image'].shape, reward, done, _) # Find the key goalspec = 'F P_[KE][1,none,==]' # keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O'] allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM' ] keys = ['LO', 'FW', 'KE'] actions = [0, 1, 2, 3, 4, 5] def fn_c(child): pass def fn_eset(child): planner = GenRecPropMultiGoal(env, keys, goalspec, dict(), actions=actions, max_trace=40, seed=None, allkeys=allkeys, id=child.name) child.setup(0, planner, True, 50) def fn_einf(child): child.train = False child.planner.epoch = 5 child.planner.tcount = 0 def fn_ecomp(child): child.planner.compute_competency() print(child.name, child.planner.blackboard.shared_content['curve'][child.name]) recursive_setup(one.root, fn_eset, fn_c) # Train for i in range(100): one.tick(pre_tick_handler=reset_env(env)) print(i, 'Training', one.root.status) # Inference recursive_setup(one.root, fn_einf, fn_c) for i in range(5): one.tick(pre_tick_handler=reset_env(env)) print(i, 'Inference', one.root.status) recursive_setup(one.root, fn_ecomp, fn_c) # Manually setting the competency ckeys = [chr(ord('A') + i) for i in range(0, 11)] manval = [ np.array([0.84805786, 4.76735384, 0.20430223]), np.array([0.54378425, 4.26958399, 3.50727315]), np.array([0.50952059, 5.54225945, 5.28025611]) ] j = 0 for c in ckeys: blackboard.shared_content['curve'][c] = manval[j % 3] j += 1 # Recursively compute competency for control nodes recursive_com(one.root, blackboard) # print(exenodes[0].planner.blackboard.shared_content['curve']) # Manually compare the recursively computed competency values # for the control # First sub-tree a = exenodes[0].planner.blackboard.shared_content['curve']['A'] b = exenodes[0].planner.blackboard.shared_content['curve']['B'] c = exenodes[0].planner.blackboard.shared_content['curve']['C'] threec = sequence([a, b, c]) # print('three', threec) # print( # 'three', exenodes[0].planner.blackboard.shared_content['curve']['3']) assert threec == exenodes[0].planner.blackboard.shared_content['curve'][ '3'] # Second sub-tree d = exenodes[0].planner.blackboard.shared_content['curve']['D'] e = exenodes[0].planner.blackboard.shared_content['curve']['E'] f = exenodes[0].planner.blackboard.shared_content['curve']['F'] fourc = selector([d, e, f]) # print( # 'four', exenodes[0].planner.blackboard.shared_content['curve']['4']) assert fourc == exenodes[0].planner.blackboard.shared_content['curve']['4'] # Third sub-tree g = exenodes[0].planner.blackboard.shared_content['curve']['G'] h = exenodes[0].planner.blackboard.shared_content['curve']['H'] i = exenodes[0].planner.blackboard.shared_content['curve']['I'] sixc = sequence([g, h, i]) # print( # 'six', exenodes[0].planner.blackboard.shared_content['curve']['6']) assert sixc == exenodes[0].planner.blackboard.shared_content['curve']['6'] # Fourth sub-tree j = exenodes[0].planner.blackboard.shared_content['curve']['J'] k = exenodes[0].planner.blackboard.shared_content['curve']['K'] sevenc = selector([j, k]) # print( # 'seven', exenodes[0].planner.blackboard.shared_content['curve']['7']) assert sevenc == exenodes[0].planner.blackboard.shared_content['curve'][ '7'] twoc = sequence([threec, fourc]) assert twoc == exenodes[0].planner.blackboard.shared_content['curve']['2'] fivec = sequence([sixc, sevenc]) assert fivec == exenodes[0].planner.blackboard.shared_content['curve']['5'] onec = sequence([twoc, fivec]) assert onec == exenodes[0].planner.blackboard.shared_content['curve']['1'] print(onec)
writer = SummaryWriter(f"runs/{experiment_name}") writer.add_text('hyperparameters', "|param|value|\n|-|-|\n%s" % ( '\n'.join([f"|{key}|{value}|" for key, value in vars(args).items()]))) if args.prod_mode: import wandb wandb.init(project=args.wandb_project_name, entity=args.wandb_entity, sync_tensorboard=True, config=vars(args), name=experiment_name, monitor_gym=True, save_code=True) writer = SummaryWriter(f"/tmp/{experiment_name}") # TRY NOT TO MODIFY: seeding device = torch.device('cuda' if torch.cuda.is_available() and args.cuda else 'cpu') env = gym.make(args.gym_id) #env = wrap_atari(env) if args.fully_observable: env = FullyObsWrapper(env) print("Fully Observable Obs space: ", env.observation_space) env = ImgObsWrapper(env) print("Obs space: ", env.observation_space) #env = gym.wrappers.RecordEpisodeStatistics(env) # records episode reward in `info['episode']['r']` if args.capture_video: env = Monitor(env, f'videos/{experiment_name}') #env = wrap_deepmind( # env, # clip_rewards=True, # frame_stack=True, # scale=False, #)
entryDoorWall=nextEntryWall, entryDoorPos=exitDoorPos) if success: break return True class MultiRoomEnvN6(MultiRoomEnv): def __init__(self): super().__init__(minNumRooms=MIN_NUM_ROOMS, maxNumRooms=MAX_NUM_ROOMS) register(id='MiniGrid-Maze-v0', entry_point=lambda: FullyObsWrapper(MultiRoomEnvN6()), reward_threshold=1000.0) def BFS(grid, q, visited, paths): current_index = q.get() current_x, current_y = current_index[0], current_index[1] element = grid[current_x, current_y] visited[current_x, current_y] = 1 if element == 9: return current_x, current_y for x in range(current_x - 1, current_x + 2): for y in range(current_y - 1, current_y + 2):
def run_transfer(orig_level, transfer_level, transfer_goal, run_id, domain_file, min_episodes, render, checkpoint_every, log_every, ops_every): orig_pickle_filename = get_pickle_file(orig_level, run_id) agent_render = 'HUMAN' if render else None with open(orig_pickle_filename, "rb") as file: eps_so_far, brain = pickle.load(file) agent = Solver(None, None, None, None, None, brain, eps_so_far, None, MinigridStateHasher, None, 0, "", "", get_operator_filename(orig_level, run_id), render=None, ops_every=ops_every) inherited_executor = agent.brain.motor.executor inherited_executor.clear_learners() inherited_operators = [ op for op in agent.brain.wm.task.operators if op.name.startswith("new_action") ] num_new_ops = 0 for op in inherited_operators: inherited_executor.rename_op( op, "transfer_" + str(num_new_ops).zfill(4)) num_new_ops += 1 env = gym.make(transfer_level) env.seed(seed=seed()) env = FullyObsWrapper(env) env = NamedObjectWrapper(env) env = LastObsWrapper(env) env.reset() os.makedirs("results" + os.sep + "spotter" + os.sep + str(transfer_level) + os.sep + "operators", exist_ok=True) os.makedirs("results" + os.sep + "spotter" + os.sep + str(transfer_level) + os.sep + "pickles", exist_ok=True) inherited_executor.set_environment(env) results_filename = get_results_filename(transfer_level, run_id) pickle_filename = get_pickle_file(transfer_level, run_id) agent_render = 'HUMAN' if render else None agent = Solver(env, domain_bias=domain_file, goal=transfer_goal, detector=MiniGridDetector, executor_class=MiniGridExecutor, state_hasher=MinigridStateHasher, executor=inherited_executor, operators=inherited_operators, min_episodes=min_episodes, results_filename=results_filename, operator_filename=get_operator_filename( transfer_level, run_id), pickle_filename=pickle_filename, render=agent_render, checkpoint_every=checkpoint_every, log_every=log_every) agent.solve() agent.evaluate() # Final pickle of the agent's brain at the conclusion of the episode with open(pickle_filename, "wb") as file: pickle.dump((agent.episode_counter, agent.brain), file)
from hrl.envs.four_rooms import FourRooms from hrl.experiments import EXPERIMENT_DIR from hrl.frameworks.options.SMDP import SMDPValueLearning from hrl.frameworks.options.hard_coded_options import HallwayOption, PrimitiveOption from hrl.frameworks.options.intra_option import IntraOptionValueLearning from hrl.project_logger import ProjectLogger from hrl.visualization import PlotterOneHot SAVEPATH = Path(f'{EXPERIMENT_DIR}/value_learning') if __name__ == '__main__': # Create environment tasks = iter([(15, 15), (10, 17), (17, 10), (17, 1), (8, 8)]) env = FullyObsWrapper(FourRooms(goal_pos=next(tasks))) env.unwrapped.max_steps = 1000000 # Create loggers LOGLEVEL = 10 logger = ProjectLogger(level=LOGLEVEL, printing=False) logger.critical(env) plotter = PlotterOneHot(env=env) SAVEPATH /= env.unwrapped.__class__.__name__ SAVEPATH.mkdir(parents=True, exist_ok=True) # Create hard-coded options options = [ HallwayOption(o, env.observation_space.shape[::-1]) for o in sorted(HallwayOption.hallway_options) ]
def make_env(env_key, seed=None): env = FullyObsWrapper(gym.make(env_key)) env.seed(seed) return env
""" error = 0 for i, option in enumerate(options): if isinstance(option, PrimitiveOption): continue abs_rew = np.abs(true_R[i] - R[i]) error += np.sum(np.multiply(abs_rew, option.initiation_set)) error /= len(options) return error if __name__ == "__main__": # Specify the environment env = RandomRewards( FullyObsWrapper(FourRooms(agent_pos=(1, 1), goal_pos=(0, 0)))) env.unwrapped.max_steps = 1000000 # env.step = partial(stochastic_step, env) # Use hard-coded hallway options options = [ HallwayOption(o, env.observation_space.shape[::-1]) for o in sorted(HallwayOption.hallway_options) ] options += [ PrimitiveOption(o, env.observation_space.shape[::-1]) for o in sorted(PrimitiveOption.primitive_options) ] @ray.remote def single_run(env, options, seed, record_every: int = 1000):
def find_key(): env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) # env = ReseedWrapper(env, seeds=[3]) # Easy env = ReseedWrapper(env, seeds=[5]) # Medium # env = ReseedWrapper(env, seeds=[7]) # Hard env = FullyObsWrapper(env) env.max_steps = min(env.max_steps, 200) env.agent_view_size = 1 env.reset() # env.render(mode='human') # time.sleep(10) # state, reward, done, _ = env.step(2) # print(state['image'].shape, reward, done, _) # Find the key goalspec = 'F P_[KE][1,none,==]' # keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O'] allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM' ] keys = ['LO', 'FW', 'KE'] actions = [0, 1, 2] root = goalspec2BT(goalspec, planner=None, node=CompetentNode) behaviour_tree = BehaviourTree(root) child = behaviour_tree.root planner = GenRecPropMultiGoal(env, keys, child.name, dict(), actions=actions, max_trace=50, seed=None, allkeys=allkeys) def run(pepoch=50, iepoch=10): # pepoch = 50 child.setup(0, planner, True, pepoch) # Train for i in range(pepoch): behaviour_tree.tick(pre_tick_handler=reset_env(env)) # Inference child.train = False child.planner.epoch = iepoch child.planner.tcount = 0 for i in range(iepoch): behaviour_tree.tick(pre_tick_handler=reset_env(env)) competency = [] epochs = [(80, 10)] * 2 datas = [] for i in range(2): run(epochs[i][0], epochs[i][1]) datas.append( np.mean( planner.blackboard.shared_content['ctdata'][planner.goalspec], axis=0)) competency.append(planner.compute_competency()) print(competency) compare_curve(competency, datas)
train_obs = [] train_act = [] elite_batch = [] for example, discounted_reward in zip(batch, disc_rewards): if discounted_reward > reward_bound: train_obs.extend(map(lambda step: step.observation, example.steps)) train_act.extend(map(lambda step: step.action, example.steps)) elite_batch.append(example) return elite_batch, train_obs, train_act, reward_bound if __name__ == "__main__": env = FullyObsWrapper(gym.make("MiniGrid-Empty-5x5-v0")) env = FlatteningFullyObsWrapper(env) env = ReducingActionWrapper(env) env = Monitor(env, directory="mon", force=True) obs_size = env.observation_space.shape[0] n_actions = env.action_space.n net = Net(obs_size, HIDDEN_SIZE, n_actions) objective = nn.CrossEntropyLoss() optimizer = optim.Adam(params=net.parameters(), lr=0.001) writer = SummaryWriter(comment="-minigrid-emtpy-5x5") full_batch = [] for iter_no, batch in enumerate(iterate_batches( env, net, BATCH_SIZE)): reward_mean = float(np.mean(list(map(
nchannels = 1 img = np.resize(img, (height, width, nchannels)) print('RESIZED: ', img.shape) return img def to_grayscale(self, img): img = Image.fromarray(img, 'RGB').convert('L') img = np.array(img) return img def downsample(self, img, rate): return img[::rate, ::rate] import gym env = PreprocessEnv(FullyObsWrapper(gym.make('MiniGrid-Empty-8x8-v1'))) #env = gym.make('MiniGrid-Empty-8x8-v1') print('FIRST obs: %s, act: %s' % (env.observation_space, env.action_space)) env.reset() env.render() for s in range(1000): if s % 100 == 0: #env.render() pass if s == 20: env.render() print('obs: %s, act: %s' % (env.observation_space, env.action_space)) obs, action, reward, done = env.step( env.action_space.sample()) # take a random action if s == 20: print(obs)
def create_gymenv(flags): if flags.env in [ "seaquest", "breakout", "asterix", "freeway", "space_invaders" ]: env_type = "minatar" elif flags.env == "random": env_type = "random" elif "block-" in flags.env: env_type = "blockworld" elif flags.env in ["rtfm", "rtfm-onehop"]: env_type = "rtfm" elif flags.env == "boxworld": env_type = "boxworld" else: env_type = "minigrid" portal_pairs = [] if env_type == "minigrid": env = gym.make(flags.env) #env = ReseedWrapper(env) env = FullyObsWrapper(env) env = PaddingWrapper(env) if flags.action == "moveto": env = MoveToActionWrapper(env) elif flags.action == "move_dir": env = MoveDirActionWrapper(env) if flags.env == "MiniGrid-LavaCrossingClosed-v0": env = ProtalWrapper(env, portal_pairs) elif env_type == "minatar": from environment.minatarwarpper import MinAtarEnv env = MinAtarEnv(flags.env, flags.sticky_prob) elif env_type == "random": from environment.random import RandomEnv env = RandomEnv() elif env_type == "blockworld": from environment.blockworld import BlockEnv, GridActionWrapper, BlockActionWrapper state_block_spec = False if flags.state != "block" and flags.action == "propositional" else True env = BlockEnv(flags.env, nb_blocks=flags.nb_blocks, variation=flags.variation, rand_env=flags.rand_env, state_block_spec=state_block_spec) if flags.state != "block" and flags.action == "relational": env = GridActionWrapper(env) if flags.state == "block" and flags.action == "relational": env = BlockActionWrapper(env) elif env_type in ["rtfm"]: from environment.rtfmkbenv import RTFMEnv, RTFMAbstractEnv, RTFMOneHopEnv with_vkb = False if flags.agent in ["CNN", "MHA" ] or flags.disable_wiki else True if with_vkb: if flags.env == "rtfm": env = RTFMAbstractEnv(flags.room_size) elif flags.env == "rtfm-onehop": env = RTFMOneHopEnv(flags.room_size) else: raise ValueError() else: env = RTFMEnv() if flags.agent in ["NLM", "KBMLP", "GCN"]: if env_type == "minigrid": env = DirectionWrapper(env) if flags.state == "absolute": env = AbsoluteVKBWrapper(env, flags.bg_code, portal_pairs) elif flags.state == "block": from environment.blockworld import BlockVKBWarpper env = BlockVKBWarpper(env) else: raise ValueError(f"state encoding cannot be {flags.state}") elif flags.agent in ["SNLM"]: if env_type == "minigrid": env = DirectionWrapper(env, type="onehot") env = OneHotFullyObsWrapper(env) return env
from agent.planning_terminator import DepthPlanningTerminator from agent.policy_terminator import StrictGoalTerminator from env.minigrid.wrappers import OnehotWrapper, find, onehot2directedpoint from env.minigrid import MinigridBacktrackingAgent, SimpleMinigridGenerator, VModel, QModel, Evaluator from misc.typevars import Option states = [] initials = [] settings = {'random': 2, 'device': torch.device("cuda:0")} N_EPISODES = 10 env = gym.make('MiniGrid-LavaGapS7-v0') # env = gym.make("MiniGrid-SimpleCrossingS9N2-v0") env.seed(settings['random']) env = FullyObsWrapper(env) env = ImgObsWrapper(env) env = OnehotWrapper(env) env.render() assert isinstance(env.observation_space, gym.spaces.Box) low_level_agent = MinigridBacktrackingAgent() shape = env.observation_space.shape shape = (-1, shape[-1], shape[0], shape[1]) v_model = VModel(shape, 32, 2, device=settings['device']) q_model = QModel(shape, 32, 2, device=settings['device']) planning_terminator = DepthPlanningTerminator(max_depth=3) evaluator = Evaluator(v_model, q_model,
def carry_key(): env_name = 'MiniGrid-Goals-v0' env = gym.make(env_name) env = ReseedWrapper(env, seeds=[3]) env = FullyObsWrapper(env) env.max_steps = min(env.max_steps, 200) env.agent_view_size = 1 env.reset() # env.render(mode='human') state, reward, done, _ = env.step(2) # Find the key goalspec = 'F P_[KE][1,none,==] U F P_[CK][1,none,==]' allkeys = [ 'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK', 'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM' ] keys = ['LO', 'FW', 'KE', 'CK'] actions = [0, 1, 2, 3, 4, 5] root = goalspec2BT(goalspec, planner=None, node=CompetentNode) behaviour_tree = BehaviourTree(root) epoch = 80 def fn_c(child): pass def fn_eset(child): planner = GenRecPropMultiGoal(env, keys, child.name, dict(), actions=actions, max_trace=40, seed=None, allkeys=allkeys) child.setup(0, planner, True, epoch) def fn_einf(child): child.train = False child.planner.epoch = 5 child.planner.tcount = 0 def fn_ecomp(child): child.planner.compute_competency() recursive_setup(behaviour_tree.root, fn_eset, fn_c) # recursive_setup(behaviour_tree.root, fn_c, fn_c) # py_trees.logging.level = py_trees.logging.Level.DEBUG # py_trees.display.print_ascii_tree(behaviour_tree.root) # Train for i in range(100): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Training', behaviour_tree.root.status) # Inference recursive_setup(behaviour_tree.root, fn_einf, fn_c) for i in range(5): behaviour_tree.tick(pre_tick_handler=reset_env(env)) print(i, 'Inference', behaviour_tree.root.status) recursive_setup(behaviour_tree.root, fn_ecomp, fn_c) # recursive_setup(behaviour_tree.root, fn_c, fn_c) blackboard = Blackboard() print(recursive_com(behaviour_tree.root, blackboard))
from hrl.envs.four_rooms import FourRooms from hrl.experiments import EXPERIMENT_DIR from hrl.learning_algorithms.SMDP import SMDPModelLearning, SMDPPlanning from hrl.frameworks.options.hard_coded_options import HallwayOption, PrimitiveOption from hrl.project_logger import ProjectLogger from hrl.utils import cache from hrl.visualization.plotter_one_hot import PlotterOneHot """ Evaluate the benefits of planning with options. """ SAVEPATH = Path(f'{EXPERIMENT_DIR}/SMDP_planning') if __name__ == '__main__': # Create environment env = FullyObsWrapper(FourRooms(goal_pos=(15, 15))) # Create loggers LOGLEVEL = 20 logger = ProjectLogger(level=LOGLEVEL, printing=False) logger.critical(env) plotter = PlotterOneHot(env=env) SAVEPATH /= env.unwrapped.__class__.__name__ SAVEPATH.mkdir(parents=True, exist_ok=True) # Create hard-coded options options = [ HallwayOption(o, env.observation_space.shape[::-1]) for o in sorted(HallwayOption.hallway_options) ] options += [
actions, advantages = tf.split(acts_and_advs, 2, axis=-1) # sparse categorical CE loss obj that supports sample_weight arg on call() # from_logits argument ensures transformation into normalized probabilities weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True) # policy loss is defined by policy gradients, weighted by advantages # note: we only calculate the loss on the actions we've actually taken actions = tf.cast(actions, tf.int32) policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages) # entropy loss can be calculated via CE over itself entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True) # here signs are flipped because optimizer minimizes return policy_loss - self.params['entropy'] * entropy_loss if __name__ == '__main__': # Create environment env = SimplifyActionSpace(SimplifyObsSpace(FullyObsWrapper(FourRooms()))) env.max_steps = 1000000 # Create model model = Model(num_actions=env.action_space.n) # Create agent agent = A2CAgent(model) rewards_history = agent.train(env, updates=100000) print("Finished training, testing...") print(f"Took {agent.test(env)} steps")
def callback(_locals, _globals): n_steps = _locals['_'] if n_steps and (n_steps % 1000 == 0): print(n_steps) print(_locals['episode_successes']) # env.render() # time.sleep(0.2) n_steps += 1 # Returning False will stop training early return True # Create log dir log_dir = f"{EXPERIMENT_DIR}/sb/gym" os.makedirs(log_dir, exist_ok=True) # Create environment env_name = 'MiniGrid-FourRooms-v1' env = FullyObsWrapper(ImgObsWrapper(gym.make(env_name))) env.max_steps = 100000 # env.step = partial(stochastic_step, env) env = DummyVecEnv([lambda: env]) # Train a model model = DQN(policy=MlpPolicy, env=env, tensorboard_log=f"{EXPERIMENT_DIR}/sb/tensorboard/{env_name}") model.learn(total_timesteps=10000000, callback=callback)
elif k == 1: n = max(COLOR_TO_IDX.values()) + 1 elif k == 2: n = 4 else: raise Exception("Bad k") npo[i, j, k] = Discrete(n) ospace = tuple(npo.flat) sz = np.cumsum([o.n for o in ospace]) sz = sz - sz[0] self.sz = sz # from gym.spaces.box import Box self.observation_space = ospace def observation(self, obs): s = obs['image'].reshape((obs['image'].size, )) return s if __name__ == "__main__": """ Example use: """ env = gym.make("MiniGrid-Empty-5x5-v0") env = FullyObsWrapper(env) # use this env = LinearSpaceWrapper(env) s = env.reset() print(s) # Use with for instance: # agent = LinearSemiGradSarsa(env, gamma=1, epsilon=0.1, alpha=0.5)
'N_EPISODES': 5, 'TEST_FREQ': 1, 'VIZ_FREQ': 1, 'max_depth': 1, 'environment_name': 'MiniGrid-SimpleCrossingS9N1-v0' } runtime = datetime.now().strftime("%Y-%m-%d @ %H-%M-%S") writer = SummaryWriter(os.path.join("..", "runs", runtime)) env = gym.make(settings['environment_name']) # env = gym.make("MiniGrid-SimpleCrossingS9N2-v0") env.seed(settings['random']) env = FullyObsWrapper(env) env = ImgObsWrapper(env) env = OnehotWrapper(env) assert isinstance(env.observation_space, gym.spaces.Box) low_level_agent = MinigridBacktrackingAgent() shape = env.observation_space.shape shape = (-1, shape[-1], shape[0], shape[1]) v_model = VModel(shape, 32, 2, device=settings['device']) q_model = QModel(shape, 32, 2, device=settings['device']) planning_terminator = DepthPlanningTerminator(max_depth=settings['max_depth']) evaluator = Evaluator(v_model, q_model, planning_terminator, settings, get_beta=lambda step: 3, gamma=0.99) generator = SimpleMinigridGenerator() memory = CompleteMemory(max_length=100000) def goal_met(s, o):
if __name__ == "__main__": a = Ana() a.test() # ================================================================================================== import gym import gym_minigrid from gym_minigrid.wrappers import RGBImgObsWrapper, FullyObsWrapper, RGBImgPartialObsWrapper import numpy as np import cv2 env_key = "MiniGrid-FourRooms-v0" seed = 0 env = gym.make(env_key, agent_pos=(1, 1), goal_pos=None, doors=True) env.max_steps = 400 env = FullyObsWrapper(env) env.seed(seed) obs = env.reset()["image"] while True: act = np.random.randint(3) obs, r, done, info = env.step(act) img = obs["image"] * 15 img = cv2.resize(img, (0, 0), fx=20, fy=20) cv2.imshow("test", img) cv2.waitKey(1) if done: env.reset() print("RESET")
def run_consecutive_levels(level_goal_episodes, domain_file, render=False, checkpoint_every=500, log_every=100, freeze_task=False, ops_every=1): inherited_executor = None inherited_operators = None num_new_ops = 0 run_id = uuid.uuid4().hex for level, goal, min_episodes in level_goal_episodes: env = gym.make(level) env.seed(seed=seed()) env = FullyObsWrapper(env) env = NamedObjectWrapper(env) env = LastObsWrapper(env) env.reset() os.makedirs("results" + os.sep + "spotter" + os.sep + str(level) + os.sep + "operators", exist_ok=True) os.makedirs("results" + os.sep + "spotter" + os.sep + str(level) + os.sep + "pickles", exist_ok=True) if inherited_executor: inherited_executor.set_environment(env) results_filename = get_results_filename(level, run_id) pickle_filename = get_pickle_file(level, run_id) agent_render = 'HUMAN' if render else None agent = Solver(env, domain_bias=domain_file, goal=goal, detector=MiniGridDetector, executor_class=MiniGridExecutor, state_hasher=MinigridStateHasher, executor=inherited_executor, operators=inherited_operators, min_episodes=min_episodes, results_filename=results_filename, operator_filename=get_operator_filename(level, run_id), pickle_filename=pickle_filename, render=agent_render, checkpoint_every=checkpoint_every, log_every=log_every, freeze_task=freeze_task, ops_every=ops_every) agent.solve() agent.evaluate() # Final pickle of the agent's brain at the conclusion of the episode with open(pickle_filename, "wb") as file: pickle.dump((agent.episode_counter, agent.brain), file) inherited_executor = agent.brain.motor.executor inherited_executor.clear_learners() inherited_operators = [ op for op in agent.brain.wm.task.operators if op.name.startswith("new_action") ] for op in inherited_operators: inherited_executor.rename_op( op, "transfer_" + str(num_new_ops).zfill(4)) num_new_ops += 1
def setup_env(env): # ReseedWrapper env = Torch(FullyObsWrapper(SimplifyActionSpace(env))) # env.step = partial(stochastic_step, env) return env
def full_state_train(env): return FullyObsWrapper(env)