示例#1
0
 def __init__(
         self, expname='key', goalspecs='F P_[KE][1,none,==]',
         keys=['LO', 'FW', 'KE'], actions=list(range(5)),
         seed=None, maxtracelen=40, trainc=False, epoch=80):
     env_name = 'MiniGrid-Goals-v0'
     env = gym.make(env_name)
     if seed is None:
         pass
     else:
         env = ReseedWrapper(env, seeds=[seed])
     env = FullyObsWrapper(env)
     self.env = env
     self.env.max_steps = min(env.max_steps, 200)
     # self.env.agent_view_size = 1
     self.env.reset()
     self.expname = expname
     self.goalspecs = goalspecs
     self.epoch = epoch
     self.maxtracelen = maxtracelen
     self.trainc = trainc
     self.allkeys = [
         'LO', 'FW', 'KE', 'DR',
         'BOB', 'BOR', 'BAB', 'BAR',
         'LV', 'GO', 'CK',
         'CBB', 'CBR', 'CAB', 'CAR',
         'DO', 'RM']
     self.keys = keys
     self.actions = actions
     root = goalspec2BT(goalspecs, planner=None, node=CompetentNode)
     self.behaviour_tree = BehaviourTree(root)
     self.blackboard = Blackboard()
def get_init_state(size, num_corssings, seed=None):
    env = CrossingEnv(size=size,
                      num_crossings=num_corssings,
                      obstacle_type=Wall,
                      seed=seed)
    env = FullyObsWrapper(env)
    obs = env.reset()
    return MiniGridState(obs, env, False, 0)
示例#3
0
def _build_environment(name, n_actions=3, max_steps=500):
  raw_env = gym.make(name)
  raw_env.action_space.n = n_actions
  raw_env.max_steps = max_steps
  env = ImgFlatObsWrapper(FullyObsWrapper(raw_env))
  env = gym_wrapper.GymWrapper(env)
  env = CustomSinglePrecisionWrapper(env)
  spec = specs.make_environment_spec(env)
  return env, spec
示例#4
0
def e16xn(offset=0):
    env = EmptyEnvV2(size=16,
                     agent_pos=(1, 1),
                     agent_dir=None,
                     goal_pos=(8, 8),
                     train=True,
                     goal_rand_offset=offset)
    env = FreeMove(env)
    env = ConstantReward(env)
    env = FullyObsWrapper(env)
    return env
示例#5
0
def get_minigrid_environment(environment_name='MiniGrid-UnlockPickup-v0',
                             domain_file="domains/gridworld_abstract.pddl",
                             render=False,
                             use_executor=False,
                             actions=None):
    from agent.env_wrappers import ExecutorWrapper
    env = gym.make(environment_name)
    env.seed(seed=seed())
    env = FullyObsWrapper(env)
    env = NamedObjectWrapper(env)
    env = LastObsWrapper(env)
    # TODO note: this is a relative path, so this code needs to be run from a file in the uppermost directory.
    # if you want a different relative path, you'll have to specify it yourself.
    if use_executor and actions is not None:
        env = ExecutorWrapper(env, domain_file, MiniGridDetector,
                              MiniGridExecutor, render, actions)
    return env
示例#6
0
def make_single_env(game):
    """Make a preprocessed gym.Env."""
    if 'MiniGrid' in game:
        env = PreprocessEnv(FullyObsWrapper(gym.make(game)))
    else:
        env = gym.make(game)

    print(
        'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    )

    print('action space: %s obs space: %s' %
          (env.action_space, env.observation_space))
    print(
        'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
    )

    #sys.exit()

    return GrayscaleEnv(DownsampleEnv(env, 2))
示例#7
0
文件: q_network.py 项目: ollema/purl
    def __init__(self, env, args):
        super().__init__(
            env,
            args,
            # default values for this algorithm
            default_learning_rate=0.1,
            default_discount_factor=0.99,
            default_start_eps=0.5,
            default_end_eps=0.05,
            default_annealing_steps=2500,
            default_num_updates=4000,
        )

        try:
            # for MiniGrid environments
            self.env: MiniGridEnv = FullyObsWrapper(self.env)
            width, height = self.env.observation_space.shape[0:2]
            self.in_features = width * height * DIRECTIONS
            # really Discrete(7) for this env but we don't need the pick up, drop... actions
            self.env.action_space = Discrete(3)
            self.discrete_obs_space = False

        except Exception:
            # for other gym environments like FrozenLake-v0
            if isinstance(self.env.observation_space, Discrete):
                self.in_features = self.env.observation_space.n
                self.discrete_obs_space = True
            # for other enviroments, we don't know how in_features is calculated from the obs space
            else:
                raise RuntimeError(
                    f"Don't know how to handle this observation space{self.env.obeservation_space}"
                )

        self.model = {
            "q_network": Net(self.in_features,
                             self.env.action_space).to(device)
        }
示例#8
0
def test_comptency():
    # import py_trees
    # behaviour_tree = BehaviourTree(root)
    # # Remember to comment set_state in GenRecProp before
    # # running this test case
    one = BehaviourTree(Sequence(name=str(1)))
    two = Sequence(name=str(2))
    three = Sequence(name=str(3))
    four = Selector(name=str(4))
    five = Sequence(name=str(5))
    six = Sequence(name=str(6))
    # seven = Parallel(name=str(7))
    seven = Selector(name=str(7))
    exenodes = [
        CompetentNode(name=chr(ord('A') + i), planner=None)
        for i in range(0, 11)
    ]
    three.add_children(exenodes[:3])
    four.add_children(exenodes[3:6])
    six.add_children(exenodes[6:9])
    seven.add_children(exenodes[9:])
    two.add_children([three, four])
    five.add_children([six, seven])
    one.root.add_children([two, five])
    # py_trees.logging.level = py_trees.logging.Level.DEBUG
    # py_trees.display.print_ascii_tree(one.root)
    blackboard = Blackboard()
    env_name = 'MiniGrid-Goals-v0'
    env = gym.make(env_name)
    env = ReseedWrapper(env, seeds=[3])
    env = FullyObsWrapper(env)
    env.max_steps = min(env.max_steps, 200)
    env.agent_view_size = 1
    env.reset()
    # env.render(mode='human')
    state, reward, done, _ = env.step(2)
    # print(state['image'].shape, reward, done, _)
    # Find the key
    goalspec = 'F P_[KE][1,none,==]'
    # keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O']
    allkeys = [
        'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK',
        'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM'
    ]

    keys = ['LO', 'FW', 'KE']

    actions = [0, 1, 2, 3, 4, 5]

    def fn_c(child):
        pass

    def fn_eset(child):
        planner = GenRecPropMultiGoal(env,
                                      keys,
                                      goalspec,
                                      dict(),
                                      actions=actions,
                                      max_trace=40,
                                      seed=None,
                                      allkeys=allkeys,
                                      id=child.name)

        child.setup(0, planner, True, 50)

    def fn_einf(child):
        child.train = False
        child.planner.epoch = 5
        child.planner.tcount = 0

    def fn_ecomp(child):
        child.planner.compute_competency()
        print(child.name,
              child.planner.blackboard.shared_content['curve'][child.name])

    recursive_setup(one.root, fn_eset, fn_c)
    # Train
    for i in range(100):
        one.tick(pre_tick_handler=reset_env(env))
    print(i, 'Training', one.root.status)

    # Inference
    recursive_setup(one.root, fn_einf, fn_c)
    for i in range(5):
        one.tick(pre_tick_handler=reset_env(env))
    print(i, 'Inference', one.root.status)
    recursive_setup(one.root, fn_ecomp, fn_c)

    # Manually setting the competency
    ckeys = [chr(ord('A') + i) for i in range(0, 11)]
    manval = [
        np.array([0.84805786, 4.76735384, 0.20430223]),
        np.array([0.54378425, 4.26958399, 3.50727315]),
        np.array([0.50952059, 5.54225945, 5.28025611])
    ]
    j = 0
    for c in ckeys:
        blackboard.shared_content['curve'][c] = manval[j % 3]
        j += 1
    # Recursively compute competency for control nodes
    recursive_com(one.root, blackboard)
    # print(exenodes[0].planner.blackboard.shared_content['curve'])

    # Manually compare the recursively computed competency values
    # for the control
    # First sub-tree
    a = exenodes[0].planner.blackboard.shared_content['curve']['A']
    b = exenodes[0].planner.blackboard.shared_content['curve']['B']
    c = exenodes[0].planner.blackboard.shared_content['curve']['C']
    threec = sequence([a, b, c])
    # print('three', threec)
    # print(
    # 'three', exenodes[0].planner.blackboard.shared_content['curve']['3'])
    assert threec == exenodes[0].planner.blackboard.shared_content['curve'][
        '3']
    # Second sub-tree
    d = exenodes[0].planner.blackboard.shared_content['curve']['D']
    e = exenodes[0].planner.blackboard.shared_content['curve']['E']
    f = exenodes[0].planner.blackboard.shared_content['curve']['F']
    fourc = selector([d, e, f])
    # print(
    # 'four', exenodes[0].planner.blackboard.shared_content['curve']['4'])
    assert fourc == exenodes[0].planner.blackboard.shared_content['curve']['4']
    # Third sub-tree
    g = exenodes[0].planner.blackboard.shared_content['curve']['G']
    h = exenodes[0].planner.blackboard.shared_content['curve']['H']
    i = exenodes[0].planner.blackboard.shared_content['curve']['I']
    sixc = sequence([g, h, i])
    # print(
    # 'six', exenodes[0].planner.blackboard.shared_content['curve']['6'])
    assert sixc == exenodes[0].planner.blackboard.shared_content['curve']['6']
    # Fourth sub-tree
    j = exenodes[0].planner.blackboard.shared_content['curve']['J']
    k = exenodes[0].planner.blackboard.shared_content['curve']['K']
    sevenc = selector([j, k])
    # print(
    # 'seven', exenodes[0].planner.blackboard.shared_content['curve']['7'])
    assert sevenc == exenodes[0].planner.blackboard.shared_content['curve'][
        '7']

    twoc = sequence([threec, fourc])
    assert twoc == exenodes[0].planner.blackboard.shared_content['curve']['2']

    fivec = sequence([sixc, sevenc])
    assert fivec == exenodes[0].planner.blackboard.shared_content['curve']['5']

    onec = sequence([twoc, fivec])
    assert onec == exenodes[0].planner.blackboard.shared_content['curve']['1']

    print(onec)
writer = SummaryWriter(f"runs/{experiment_name}")
writer.add_text('hyperparameters', "|param|value|\n|-|-|\n%s" % (
        '\n'.join([f"|{key}|{value}|" for key, value in vars(args).items()])))
if args.prod_mode:
    import wandb
    wandb.init(project=args.wandb_project_name, entity=args.wandb_entity, sync_tensorboard=True, config=vars(args), name=experiment_name, monitor_gym=True, save_code=True)
    writer = SummaryWriter(f"/tmp/{experiment_name}")

# TRY NOT TO MODIFY: seeding
device = torch.device('cuda' if torch.cuda.is_available() and args.cuda else 'cpu')
env = gym.make(args.gym_id)
#env = wrap_atari(env)


if args.fully_observable:
    env = FullyObsWrapper(env)
    print("Fully Observable Obs space: ", env.observation_space)

env = ImgObsWrapper(env)
print("Obs space: ", env.observation_space)
#env = gym.wrappers.RecordEpisodeStatistics(env) # records episode reward in `info['episode']['r']`

if args.capture_video:
    env = Monitor(env, f'videos/{experiment_name}')

#env = wrap_deepmind(
#    env,
#    clip_rewards=True,
#    frame_stack=True,
#    scale=False,
#)
示例#10
0
                                      entryDoorWall=nextEntryWall,
                                      entryDoorPos=exitDoorPos)

            if success:
                break

        return True


class MultiRoomEnvN6(MultiRoomEnv):
    def __init__(self):
        super().__init__(minNumRooms=MIN_NUM_ROOMS, maxNumRooms=MAX_NUM_ROOMS)


register(id='MiniGrid-Maze-v0',
         entry_point=lambda: FullyObsWrapper(MultiRoomEnvN6()),
         reward_threshold=1000.0)


def BFS(grid, q, visited, paths):
    current_index = q.get()
    current_x, current_y = current_index[0], current_index[1]

    element = grid[current_x, current_y]
    visited[current_x, current_y] = 1

    if element == 9:
        return current_x, current_y

    for x in range(current_x - 1, current_x + 2):
        for y in range(current_y - 1, current_y + 2):
示例#11
0
def run_transfer(orig_level, transfer_level, transfer_goal, run_id,
                 domain_file, min_episodes, render, checkpoint_every,
                 log_every, ops_every):
    orig_pickle_filename = get_pickle_file(orig_level, run_id)
    agent_render = 'HUMAN' if render else None
    with open(orig_pickle_filename, "rb") as file:
        eps_so_far, brain = pickle.load(file)
        agent = Solver(None,
                       None,
                       None,
                       None,
                       None,
                       brain,
                       eps_so_far,
                       None,
                       MinigridStateHasher,
                       None,
                       0,
                       "",
                       "",
                       get_operator_filename(orig_level, run_id),
                       render=None,
                       ops_every=ops_every)
        inherited_executor = agent.brain.motor.executor
        inherited_executor.clear_learners()
        inherited_operators = [
            op for op in agent.brain.wm.task.operators
            if op.name.startswith("new_action")
        ]
        num_new_ops = 0
        for op in inherited_operators:
            inherited_executor.rename_op(
                op, "transfer_" + str(num_new_ops).zfill(4))
            num_new_ops += 1
        env = gym.make(transfer_level)
        env.seed(seed=seed())
        env = FullyObsWrapper(env)
        env = NamedObjectWrapper(env)
        env = LastObsWrapper(env)
        env.reset()
        os.makedirs("results" + os.sep + "spotter" + os.sep +
                    str(transfer_level) + os.sep + "operators",
                    exist_ok=True)
        os.makedirs("results" + os.sep + "spotter" + os.sep +
                    str(transfer_level) + os.sep + "pickles",
                    exist_ok=True)
        inherited_executor.set_environment(env)
        results_filename = get_results_filename(transfer_level, run_id)
        pickle_filename = get_pickle_file(transfer_level, run_id)
        agent_render = 'HUMAN' if render else None
        agent = Solver(env,
                       domain_bias=domain_file,
                       goal=transfer_goal,
                       detector=MiniGridDetector,
                       executor_class=MiniGridExecutor,
                       state_hasher=MinigridStateHasher,
                       executor=inherited_executor,
                       operators=inherited_operators,
                       min_episodes=min_episodes,
                       results_filename=results_filename,
                       operator_filename=get_operator_filename(
                           transfer_level, run_id),
                       pickle_filename=pickle_filename,
                       render=agent_render,
                       checkpoint_every=checkpoint_every,
                       log_every=log_every)
        agent.solve()
        agent.evaluate()
        # Final pickle of the agent's brain at the conclusion of the episode
        with open(pickle_filename, "wb") as file:
            pickle.dump((agent.episode_counter, agent.brain), file)
示例#12
0
from hrl.envs.four_rooms import FourRooms
from hrl.experiments import EXPERIMENT_DIR
from hrl.frameworks.options.SMDP import SMDPValueLearning
from hrl.frameworks.options.hard_coded_options import HallwayOption, PrimitiveOption
from hrl.frameworks.options.intra_option import IntraOptionValueLearning
from hrl.project_logger import ProjectLogger
from hrl.visualization import PlotterOneHot

SAVEPATH = Path(f'{EXPERIMENT_DIR}/value_learning')

if __name__ == '__main__':

    # Create environment
    tasks = iter([(15, 15), (10, 17), (17, 10), (17, 1), (8, 8)])
    env = FullyObsWrapper(FourRooms(goal_pos=next(tasks)))
    env.unwrapped.max_steps = 1000000

    # Create loggers
    LOGLEVEL = 10
    logger = ProjectLogger(level=LOGLEVEL, printing=False)
    logger.critical(env)
    plotter = PlotterOneHot(env=env)
    SAVEPATH /= env.unwrapped.__class__.__name__
    SAVEPATH.mkdir(parents=True, exist_ok=True)

    # Create hard-coded options
    options = [
        HallwayOption(o, env.observation_space.shape[::-1])
        for o in sorted(HallwayOption.hallway_options)
    ]
示例#13
0
def make_env(env_key, seed=None):
    env = FullyObsWrapper(gym.make(env_key))
    env.seed(seed)
    return env
示例#14
0
    """
    error = 0
    for i, option in enumerate(options):
        if isinstance(option, PrimitiveOption):
            continue
        abs_rew = np.abs(true_R[i] - R[i])
        error += np.sum(np.multiply(abs_rew, option.initiation_set))
    error /= len(options)
    return error


if __name__ == "__main__":

    # Specify the environment
    env = RandomRewards(
        FullyObsWrapper(FourRooms(agent_pos=(1, 1), goal_pos=(0, 0))))
    env.unwrapped.max_steps = 1000000
    # env.step = partial(stochastic_step, env)

    # Use hard-coded hallway options
    options = [
        HallwayOption(o, env.observation_space.shape[::-1])
        for o in sorted(HallwayOption.hallway_options)
    ]
    options += [
        PrimitiveOption(o, env.observation_space.shape[::-1])
        for o in sorted(PrimitiveOption.primitive_options)
    ]

    @ray.remote
    def single_run(env, options, seed, record_every: int = 1000):
示例#15
0
def find_key():
    env_name = 'MiniGrid-Goals-v0'
    env = gym.make(env_name)
    # env = ReseedWrapper(env, seeds=[3])   # Easy
    env = ReseedWrapper(env, seeds=[5])  # Medium
    # env = ReseedWrapper(env, seeds=[7])     # Hard
    env = FullyObsWrapper(env)
    env.max_steps = min(env.max_steps, 200)
    env.agent_view_size = 1
    env.reset()
    # env.render(mode='human')
    # time.sleep(10)
    # state, reward, done, _ = env.step(2)
    # print(state['image'].shape, reward, done, _)
    # Find the key
    goalspec = 'F P_[KE][1,none,==]'
    # keys = ['L', 'F', 'K', 'D', 'C', 'G', 'O']
    allkeys = [
        'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK',
        'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM'
    ]

    keys = ['LO', 'FW', 'KE']

    actions = [0, 1, 2]

    root = goalspec2BT(goalspec, planner=None, node=CompetentNode)
    behaviour_tree = BehaviourTree(root)
    child = behaviour_tree.root

    planner = GenRecPropMultiGoal(env,
                                  keys,
                                  child.name,
                                  dict(),
                                  actions=actions,
                                  max_trace=50,
                                  seed=None,
                                  allkeys=allkeys)

    def run(pepoch=50, iepoch=10):
        # pepoch = 50
        child.setup(0, planner, True, pepoch)
        # Train
        for i in range(pepoch):
            behaviour_tree.tick(pre_tick_handler=reset_env(env))
        # Inference
        child.train = False
        child.planner.epoch = iepoch
        child.planner.tcount = 0
        for i in range(iepoch):
            behaviour_tree.tick(pre_tick_handler=reset_env(env))

    competency = []
    epochs = [(80, 10)] * 2
    datas = []
    for i in range(2):
        run(epochs[i][0], epochs[i][1])
        datas.append(
            np.mean(
                planner.blackboard.shared_content['ctdata'][planner.goalspec],
                axis=0))
        competency.append(planner.compute_competency())
    print(competency)
    compare_curve(competency, datas)
    train_obs = []
    train_act = []
    elite_batch = []
    for example, discounted_reward in zip(batch, disc_rewards):
        if discounted_reward > reward_bound:
            train_obs.extend(map(lambda step: step.observation,
                                 example.steps))
            train_act.extend(map(lambda step: step.action,
                                 example.steps))
            elite_batch.append(example)

    return elite_batch, train_obs, train_act, reward_bound


if __name__ == "__main__":
    env = FullyObsWrapper(gym.make("MiniGrid-Empty-5x5-v0"))
    env = FlatteningFullyObsWrapper(env)
    env = ReducingActionWrapper(env)
    env = Monitor(env, directory="mon", force=True)
    obs_size = env.observation_space.shape[0]
    n_actions = env.action_space.n

    net = Net(obs_size, HIDDEN_SIZE, n_actions)
    objective = nn.CrossEntropyLoss()
    optimizer = optim.Adam(params=net.parameters(), lr=0.001)
    writer = SummaryWriter(comment="-minigrid-emtpy-5x5")

    full_batch = []
    for iter_no, batch in enumerate(iterate_batches(
            env, net, BATCH_SIZE)):
        reward_mean = float(np.mean(list(map(
示例#17
0
        nchannels = 1
        img = np.resize(img, (height, width, nchannels))
        print('RESIZED: ', img.shape)
        return img

    def to_grayscale(self, img):
        img = Image.fromarray(img, 'RGB').convert('L')
        img = np.array(img)
        return img

    def downsample(self, img, rate):
        return img[::rate, ::rate]


import gym
env = PreprocessEnv(FullyObsWrapper(gym.make('MiniGrid-Empty-8x8-v1')))
#env = gym.make('MiniGrid-Empty-8x8-v1')
print('FIRST obs: %s, act: %s' % (env.observation_space, env.action_space))
env.reset()
env.render()
for s in range(1000):
    if s % 100 == 0:
        #env.render()
        pass
    if s == 20:
        env.render()
        print('obs: %s, act: %s' % (env.observation_space, env.action_space))
    obs, action, reward, done = env.step(
        env.action_space.sample())  # take a random action
    if s == 20:
        print(obs)
示例#18
0
def create_gymenv(flags):
    if flags.env in [
            "seaquest", "breakout", "asterix", "freeway", "space_invaders"
    ]:
        env_type = "minatar"
    elif flags.env == "random":
        env_type = "random"
    elif "block-" in flags.env:
        env_type = "blockworld"
    elif flags.env in ["rtfm", "rtfm-onehop"]:
        env_type = "rtfm"
    elif flags.env == "boxworld":
        env_type = "boxworld"
    else:
        env_type = "minigrid"

    portal_pairs = []
    if env_type == "minigrid":
        env = gym.make(flags.env)
        #env = ReseedWrapper(env)
        env = FullyObsWrapper(env)
        env = PaddingWrapper(env)
        if flags.action == "moveto":
            env = MoveToActionWrapper(env)
        elif flags.action == "move_dir":
            env = MoveDirActionWrapper(env)
        if flags.env == "MiniGrid-LavaCrossingClosed-v0":
            env = ProtalWrapper(env, portal_pairs)
    elif env_type == "minatar":
        from environment.minatarwarpper import MinAtarEnv
        env = MinAtarEnv(flags.env, flags.sticky_prob)
    elif env_type == "random":
        from environment.random import RandomEnv
        env = RandomEnv()
    elif env_type == "blockworld":
        from environment.blockworld import BlockEnv, GridActionWrapper, BlockActionWrapper
        state_block_spec = False if flags.state != "block" and flags.action == "propositional" else True
        env = BlockEnv(flags.env,
                       nb_blocks=flags.nb_blocks,
                       variation=flags.variation,
                       rand_env=flags.rand_env,
                       state_block_spec=state_block_spec)
        if flags.state != "block" and flags.action == "relational":
            env = GridActionWrapper(env)
        if flags.state == "block" and flags.action == "relational":
            env = BlockActionWrapper(env)
    elif env_type in ["rtfm"]:
        from environment.rtfmkbenv import RTFMEnv, RTFMAbstractEnv, RTFMOneHopEnv
        with_vkb = False if flags.agent in ["CNN", "MHA"
                                            ] or flags.disable_wiki else True
        if with_vkb:
            if flags.env == "rtfm":
                env = RTFMAbstractEnv(flags.room_size)
            elif flags.env == "rtfm-onehop":
                env = RTFMOneHopEnv(flags.room_size)
            else:
                raise ValueError()
        else:
            env = RTFMEnv()

    if flags.agent in ["NLM", "KBMLP", "GCN"]:
        if env_type == "minigrid":
            env = DirectionWrapper(env)
        if flags.state == "absolute":
            env = AbsoluteVKBWrapper(env, flags.bg_code, portal_pairs)
        elif flags.state == "block":
            from environment.blockworld import BlockVKBWarpper
            env = BlockVKBWarpper(env)
        else:
            raise ValueError(f"state encoding cannot be {flags.state}")
    elif flags.agent in ["SNLM"]:
        if env_type == "minigrid":
            env = DirectionWrapper(env, type="onehot")
            env = OneHotFullyObsWrapper(env)

    return env
示例#19
0
from agent.planning_terminator import DepthPlanningTerminator
from agent.policy_terminator import StrictGoalTerminator
from env.minigrid.wrappers import OnehotWrapper, find, onehot2directedpoint
from env.minigrid import MinigridBacktrackingAgent, SimpleMinigridGenerator, VModel, QModel, Evaluator
from misc.typevars import Option

states = []
initials = []

settings = {'random': 2, 'device': torch.device("cuda:0")}

N_EPISODES = 10
env = gym.make('MiniGrid-LavaGapS7-v0')
# env = gym.make("MiniGrid-SimpleCrossingS9N2-v0")
env.seed(settings['random'])
env = FullyObsWrapper(env)
env = ImgObsWrapper(env)
env = OnehotWrapper(env)

env.render()

assert isinstance(env.observation_space, gym.spaces.Box)

low_level_agent = MinigridBacktrackingAgent()
shape = env.observation_space.shape
shape = (-1, shape[-1], shape[0], shape[1])
v_model = VModel(shape, 32, 2, device=settings['device'])
q_model = QModel(shape, 32, 2, device=settings['device'])
planning_terminator = DepthPlanningTerminator(max_depth=3)
evaluator = Evaluator(v_model,
                      q_model,
示例#20
0
def carry_key():
    env_name = 'MiniGrid-Goals-v0'
    env = gym.make(env_name)
    env = ReseedWrapper(env, seeds=[3])
    env = FullyObsWrapper(env)
    env.max_steps = min(env.max_steps, 200)
    env.agent_view_size = 1
    env.reset()
    # env.render(mode='human')
    state, reward, done, _ = env.step(2)

    # Find the key
    goalspec = 'F P_[KE][1,none,==] U F P_[CK][1,none,==]'
    allkeys = [
        'LO', 'FW', 'KE', 'DR', 'BOB', 'BOR', 'BAB', 'BAR', 'LV', 'GO', 'CK',
        'CBB', 'CBR', 'CAB', 'CAR', 'DO', 'RM'
    ]

    keys = ['LO', 'FW', 'KE', 'CK']

    actions = [0, 1, 2, 3, 4, 5]

    root = goalspec2BT(goalspec, planner=None, node=CompetentNode)
    behaviour_tree = BehaviourTree(root)
    epoch = 80

    def fn_c(child):
        pass

    def fn_eset(child):
        planner = GenRecPropMultiGoal(env,
                                      keys,
                                      child.name,
                                      dict(),
                                      actions=actions,
                                      max_trace=40,
                                      seed=None,
                                      allkeys=allkeys)

        child.setup(0, planner, True, epoch)

    def fn_einf(child):
        child.train = False
        child.planner.epoch = 5
        child.planner.tcount = 0

    def fn_ecomp(child):
        child.planner.compute_competency()

    recursive_setup(behaviour_tree.root, fn_eset, fn_c)
    # recursive_setup(behaviour_tree.root, fn_c, fn_c)
    # py_trees.logging.level = py_trees.logging.Level.DEBUG
    # py_trees.display.print_ascii_tree(behaviour_tree.root)

    # Train
    for i in range(100):
        behaviour_tree.tick(pre_tick_handler=reset_env(env))
    print(i, 'Training', behaviour_tree.root.status)

    # Inference
    recursive_setup(behaviour_tree.root, fn_einf, fn_c)
    for i in range(5):
        behaviour_tree.tick(pre_tick_handler=reset_env(env))
    print(i, 'Inference', behaviour_tree.root.status)
    recursive_setup(behaviour_tree.root, fn_ecomp, fn_c)
    # recursive_setup(behaviour_tree.root, fn_c, fn_c)
    blackboard = Blackboard()
    print(recursive_com(behaviour_tree.root, blackboard))
示例#21
0
from hrl.envs.four_rooms import FourRooms
from hrl.experiments import EXPERIMENT_DIR
from hrl.learning_algorithms.SMDP import SMDPModelLearning, SMDPPlanning
from hrl.frameworks.options.hard_coded_options import HallwayOption, PrimitiveOption
from hrl.project_logger import ProjectLogger
from hrl.utils import cache
from hrl.visualization.plotter_one_hot import PlotterOneHot
""" Evaluate the benefits of planning with options. """

SAVEPATH = Path(f'{EXPERIMENT_DIR}/SMDP_planning')

if __name__ == '__main__':

    # Create environment
    env = FullyObsWrapper(FourRooms(goal_pos=(15, 15)))

    # Create loggers
    LOGLEVEL = 20
    logger = ProjectLogger(level=LOGLEVEL, printing=False)
    logger.critical(env)
    plotter = PlotterOneHot(env=env)
    SAVEPATH /= env.unwrapped.__class__.__name__
    SAVEPATH.mkdir(parents=True, exist_ok=True)

    # Create hard-coded options
    options = [
        HallwayOption(o, env.observation_space.shape[::-1])
        for o in sorted(HallwayOption.hallway_options)
    ]
    options += [
示例#22
0
        actions, advantages = tf.split(acts_and_advs, 2, axis=-1)
        # sparse categorical CE loss obj that supports sample_weight arg on call()
        # from_logits argument ensures transformation into normalized probabilities
        weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True)
        # policy loss is defined by policy gradients, weighted by advantages
        # note: we only calculate the loss on the actions we've actually taken
        actions = tf.cast(actions, tf.int32)
        policy_loss = weighted_sparse_ce(actions, logits,
                                         sample_weight=advantages)
        # entropy loss can be calculated via CE over itself
        entropy_loss = kls.categorical_crossentropy(logits, logits,
                                                    from_logits=True)
        # here signs are flipped because optimizer minimizes
        return policy_loss - self.params['entropy'] * entropy_loss


if __name__ == '__main__':
    # Create environment
    env = SimplifyActionSpace(SimplifyObsSpace(FullyObsWrapper(FourRooms())))
    env.max_steps = 1000000
    
    # Create model
    model = Model(num_actions=env.action_space.n)
    
    # Create agent
    agent = A2CAgent(model)
    
    rewards_history = agent.train(env, updates=100000)
    print("Finished training, testing...")
    print(f"Took {agent.test(env)} steps")
示例#23
0
文件: sb.py 项目: konichuvak/hrl
def callback(_locals, _globals):
    n_steps = _locals['_']
    if n_steps and (n_steps % 1000 == 0):
        print(n_steps)
        print(_locals['episode_successes'])
        # env.render()
        # time.sleep(0.2)

    n_steps += 1
    # Returning False will stop training early
    return True


# Create log dir
log_dir = f"{EXPERIMENT_DIR}/sb/gym"
os.makedirs(log_dir, exist_ok=True)

# Create environment
env_name = 'MiniGrid-FourRooms-v1'
env = FullyObsWrapper(ImgObsWrapper(gym.make(env_name)))
env.max_steps = 100000
# env.step = partial(stochastic_step, env)
env = DummyVecEnv([lambda: env])

# Train a model
model = DQN(policy=MlpPolicy,
            env=env,
            tensorboard_log=f"{EXPERIMENT_DIR}/sb/tensorboard/{env_name}")
model.learn(total_timesteps=10000000, callback=callback)
示例#24
0
                    elif k == 1:
                        n = max(COLOR_TO_IDX.values()) + 1
                    elif k == 2:
                        n = 4
                    else:
                        raise Exception("Bad k")

                    npo[i, j, k] = Discrete(n)
        ospace = tuple(npo.flat)

        sz = np.cumsum([o.n for o in ospace])
        sz = sz - sz[0]
        self.sz = sz
        # from gym.spaces.box import Box
        self.observation_space = ospace

    def observation(self, obs):
        s = obs['image'].reshape((obs['image'].size, ))
        return s


if __name__ == "__main__":
    """ Example use: """
    env = gym.make("MiniGrid-Empty-5x5-v0")
    env = FullyObsWrapper(env)  # use this
    env = LinearSpaceWrapper(env)
    s = env.reset()
    print(s)
    # Use with for instance:
    # agent = LinearSemiGradSarsa(env, gamma=1, epsilon=0.1, alpha=0.5)
示例#25
0
    'N_EPISODES': 5,
    'TEST_FREQ': 1,
    'VIZ_FREQ': 1,
    'max_depth': 1,
    'environment_name': 'MiniGrid-SimpleCrossingS9N1-v0'
}

runtime = datetime.now().strftime("%Y-%m-%d @ %H-%M-%S")

writer = SummaryWriter(os.path.join("..", "runs", runtime))


env = gym.make(settings['environment_name'])
# env = gym.make("MiniGrid-SimpleCrossingS9N2-v0")
env.seed(settings['random'])
env = FullyObsWrapper(env)
env = ImgObsWrapper(env)
env = OnehotWrapper(env)

assert isinstance(env.observation_space, gym.spaces.Box)

low_level_agent = MinigridBacktrackingAgent()
shape = env.observation_space.shape
shape = (-1, shape[-1], shape[0], shape[1])
v_model = VModel(shape, 32, 2, device=settings['device'])
q_model = QModel(shape, 32, 2, device=settings['device'])
planning_terminator = DepthPlanningTerminator(max_depth=settings['max_depth'])
evaluator = Evaluator(v_model, q_model, planning_terminator, settings, get_beta=lambda step: 3, gamma=0.99)
generator = SimpleMinigridGenerator()
memory = CompleteMemory(max_length=100000)
def goal_met(s, o):
示例#26
0
if __name__ == "__main__":
    a = Ana()
    a.test()
# ==================================================================================================
import gym
import gym_minigrid
from gym_minigrid.wrappers import RGBImgObsWrapper, FullyObsWrapper, RGBImgPartialObsWrapper
import numpy as np
import cv2

env_key = "MiniGrid-FourRooms-v0"
seed = 0

env = gym.make(env_key, agent_pos=(1, 1), goal_pos=None, doors=True)
env.max_steps = 400
env = FullyObsWrapper(env)
env.seed(seed)

obs = env.reset()["image"]

while True:
    act = np.random.randint(3)
    obs, r, done, info = env.step(act)

    img = obs["image"] * 15
    img = cv2.resize(img, (0, 0), fx=20, fy=20)
    cv2.imshow("test", img)
    cv2.waitKey(1)
    if done:
        env.reset()
        print("RESET")
示例#27
0
def run_consecutive_levels(level_goal_episodes,
                           domain_file,
                           render=False,
                           checkpoint_every=500,
                           log_every=100,
                           freeze_task=False,
                           ops_every=1):
    inherited_executor = None
    inherited_operators = None
    num_new_ops = 0
    run_id = uuid.uuid4().hex
    for level, goal, min_episodes in level_goal_episodes:
        env = gym.make(level)
        env.seed(seed=seed())
        env = FullyObsWrapper(env)
        env = NamedObjectWrapper(env)
        env = LastObsWrapper(env)
        env.reset()
        os.makedirs("results" + os.sep + "spotter" + os.sep + str(level) +
                    os.sep + "operators",
                    exist_ok=True)
        os.makedirs("results" + os.sep + "spotter" + os.sep + str(level) +
                    os.sep + "pickles",
                    exist_ok=True)
        if inherited_executor:
            inherited_executor.set_environment(env)
        results_filename = get_results_filename(level, run_id)
        pickle_filename = get_pickle_file(level, run_id)
        agent_render = 'HUMAN' if render else None
        agent = Solver(env,
                       domain_bias=domain_file,
                       goal=goal,
                       detector=MiniGridDetector,
                       executor_class=MiniGridExecutor,
                       state_hasher=MinigridStateHasher,
                       executor=inherited_executor,
                       operators=inherited_operators,
                       min_episodes=min_episodes,
                       results_filename=results_filename,
                       operator_filename=get_operator_filename(level, run_id),
                       pickle_filename=pickle_filename,
                       render=agent_render,
                       checkpoint_every=checkpoint_every,
                       log_every=log_every,
                       freeze_task=freeze_task,
                       ops_every=ops_every)
        agent.solve()
        agent.evaluate()
        # Final pickle of the agent's brain at the conclusion of the episode
        with open(pickle_filename, "wb") as file:
            pickle.dump((agent.episode_counter, agent.brain), file)
        inherited_executor = agent.brain.motor.executor
        inherited_executor.clear_learners()
        inherited_operators = [
            op for op in agent.brain.wm.task.operators
            if op.name.startswith("new_action")
        ]
        for op in inherited_operators:
            inherited_executor.rename_op(
                op, "transfer_" + str(num_new_ops).zfill(4))
            num_new_ops += 1
示例#28
0
文件: dqn_oc.py 项目: konichuvak/hrl
 def setup_env(env):
     # ReseedWrapper
     env = Torch(FullyObsWrapper(SimplifyActionSpace(env)))
     # env.step = partial(stochastic_step, env)
     return env
示例#29
0
def full_state_train(env):
    return FullyObsWrapper(env)