Пример #1
0
    def __init__(self):
        self.world = CraftWorld()
        self.cookbook = Cookbook()
        self.subtask_index = util.Index()
        self.task_index = util.Index()
        dir_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(dir_path, "hints.yaml")) as hints_f:
            self.hints = yaml.load(hints_f)

        # initialize randomness
        # self.random = np.random.RandomState(0)
        # Think about this

        # organize task and subtask indices
        self.tasks_by_subtask = defaultdict(list)
        self.tasks = []
        for hint_key, hint in self.hints.items():
            goal = util.parse_fexp(hint_key)
            goal = (self.subtask_index.index(goal[0]),
                    self.cookbook.index[goal[1]])
            steps = [self.subtask_index.index(a) for a in hint]
            steps = tuple(steps)
            task = Task(goal, steps)
            for subtask in steps:
                self.tasks_by_subtask[subtask].append(task)
            self.tasks.append(task)
            self.task_index.index(task)
Пример #2
0
def create_maps(num_maps=1000):
    # create_maps
    cw = CraftWorld()
    map_set = []
    while len(map_set) < num_maps:
        #if len(map_set) % 25 == 0:
        #	print(len(map_set))
        goal = np.random.randint(14) + 7
        scenario = cw.sample_scenario_with_goal(goal)
        map_i = scenario.init()
        append = True
        for map_j in map_set:
            if (map_i.grid == map_j.grid).all():
                append = False
        if append:
            map_set.append(map_i)
    return map_set
Пример #3
0
class EnvironmentHandler():
    def __init__(self):
        self.cw = CraftWorld()

    def get_env(self):
        goal = np.random.randint(14) + 7
        scenario = self.cw.sample_scenario_with_goal(goal)
        # Assuming no initial inventory
        return scenario.init()

    def get_full_state_set(self, event):
        # Replicate the demonstration in different environments
        grid = np.zeros((WIDTH, HEIGHT, self.cw.cookbook.n_kinds))
        i_bd = self.cw.cookbook.index["boundary"]
        grid[0, :, i_bd] = 1
        grid[WIDTH - 1:, :, i_bd] = 1
        grid[:, 0, i_bd] = 1
        grid[:, HEIGHT - 1:, i_bd] = 1
        grid[5, 5, self.cw.cookbook.
             index[num_string_dict[event["object_before"]]]] = 1
        scenario = CraftScenario(grid, (5, 6), self.cw)
        # "dataset"
        state_set = []
        for i in range(7, 21):
            inventory = np.zeros(21, dtype=int)
            inventory[i] = 1
            state_set.append(scenario.init(inventory))

        for i in range(7, 21):
            for j in range(i + 1, 21):
                inventory = np.zeros(21, dtype=int)
                inventory[i] = 1
                inventory[j] = 1
                state_set.append(scenario.init(inventory))

        for _ in range(100):
            inventory = np.random.randint(4, size=21)
            state_set.append(scenario.init(inventory))
        return state_set
Пример #4
0
 def __init__(self):
     self.cw = CraftWorld()
Пример #5
0
class CraftEnv(gym.Env):
    metadata = {'render.modes': ['human']}  # What does this mean?

    def __init__(self):
        self.world = CraftWorld()
        self.cookbook = Cookbook()
        self.subtask_index = util.Index()
        self.task_index = util.Index()
        dir_path = os.path.dirname(os.path.realpath(__file__))
        with open(os.path.join(dir_path, "hints.yaml")) as hints_f:
            self.hints = yaml.load(hints_f)

        # initialize randomness
        # self.random = np.random.RandomState(0)
        # Think about this

        # organize task and subtask indices
        self.tasks_by_subtask = defaultdict(list)
        self.tasks = []
        for hint_key, hint in self.hints.items():
            goal = util.parse_fexp(hint_key)
            goal = (self.subtask_index.index(goal[0]),
                    self.cookbook.index[goal[1]])
            steps = [self.subtask_index.index(a) for a in hint]
            steps = tuple(steps)
            task = Task(goal, steps)
            for subtask in steps:
                self.tasks_by_subtask[subtask].append(task)
            self.tasks.append(task)
            self.task_index.index(task)

    def reset(self, task=None, difficulty=3):
        """
        Either give the task; or give the difficulty level == 1,2, or 3
        """
        if not task:
            # These difficulty levels are hard-coded of sorts, can easily write logic to overcome this
            if difficulty == 1:
                task = self.tasks[np.random.choice(4)]
            elif difficulty == 2:
                task = self.tasks[4 + np.random.choice(4)]
            elif difficulty == 3:
                task = self.tasks[8 + np.random.choice(2)]
            else:
                task = self.tasks[np.random.choice(len(self.tasks))]
        self.goal, _ = task
        goal_name, goal_arg = self.goal
        scenario = self.world.sample_scenario_with_goal(goal_arg)
        self.state = scenario.init()

        return self.state  #, task

    def step(self, action):
        r, s = self.state.step(action)
        pr_s = self.state
        self.state = s
        inv = np.where(s.inventory - pr_s.inventory)[0]
        if inv:
            print("Gathered {}".format(
                self.cookbook.index.ordered_contents[int(inv) - 1]))
        return s, r, s.satisfies(self.goal), {}

    def render(self, mode="human", close=False):
        # Different modes -- Render in terminal vs a separate window
        def _visualize(win):
            curses.start_color()
            for i in range(1, 8):
                curses.init_pair(i, i, curses.COLOR_BLACK)
                curses.init_pair(i + 10, curses.COLOR_BLACK, i)
            state = self.state
            win.clear()
            for y in range(HEIGHT):
                for x in range(WIDTH):
                    if not (state.grid[x, y, :].any() or (x, y) == state.pos):
                        continue
                    thing = state.grid[x, y, :].argmax()
                    if (x, y) == state.pos:
                        if state.dir == LEFT:
                            ch1 = "<"
                            ch2 = "@"
                        elif state.dir == RIGHT:
                            ch1 = "@"
                            ch2 = ">"
                        elif state.dir == UP:
                            ch1 = "^"
                            ch2 = "@"
                        elif state.dir == DOWN:
                            ch1 = "@"
                            ch2 = "v"
                        color = curses.color_pair(0)
                    elif thing == self.cookbook.index["boundary"]:
                        ch1 = ch2 = curses.ACS_BOARD
                        color = curses.color_pair(10 + thing)
                    else:
                        name = self.cookbook.index.get(thing)
                        ch1 = name[0]
                        ch2 = name[-1]
                        color = curses.color_pair(10 + thing)

                    win.addch(HEIGHT - y, x * 2, ch1, color)
                    win.addch(HEIGHT - y, x * 2 + 1, ch2, color)
            win.refresh()

        curses.wrapper(_visualize)
        inventory = self.state.inventory

        print("Inventory:\nIron:{}   Grass:{}   Wood:{}   Gold:{}   Gem:{}   Plank:{}   Stick:{}   Axe:{}   Rope:{}   Bed:{}   Shears:{}   Cloth:{}   Bridge:{}   Ladder:{}"\
            .format(inventory[7], inventory[8], inventory[9], inventory[10], inventory[11], inventory[12], inventory[13], inventory[14], inventory[15],\
                inventory[16], inventory[17], inventory[18], inventory[19], inventory[20]))
Пример #6
0
class EnvironmentHandler():
    def __init__(self):
        self.cw = CraftWorld()

    def get_env(self):
        goal = np.random.randint(14) + 7
        scenario = self.cw.sample_scenario_with_goal(goal)
        # Assuming no initial inventory
        return scenario.init()

    def train(self, event, agent):
        # Replicate the demonstration in different environments
        grid = np.zeros((WIDTH, HEIGHT, self.cw.cookbook.n_kinds))
        i_bd = self.cw.cookbook.index["boundary"]
        grid[0, :, i_bd] = 1
        grid[WIDTH - 1:, :, i_bd] = 1
        grid[:, 0, i_bd] = 1
        grid[:, HEIGHT - 1:, i_bd] = 1
        grid[5, 5, self.cw.cookbook.
             index[num_string_dict[event["object_before"]]]] = 1
        scenario = CraftScenario(grid, (5, 6), self.cw)
        # "dataset"
        state_set = []
        for i in range(7, 21):
            inventory = np.zeros(21, dtype=int)
            inventory[i] = 1
            state_set.append(scenario.init(inventory))

        for i in range(7, 21):
            for j in range(i + 1, 21):
                inventory = np.zeros(21, dtype=int)
                inventory[i] = 1
                inventory[j] = 1
                state_set.append(scenario.init(inventory))

        for _ in range(100):
            inventory = np.random.randint(4, size=21)
            state_set.append(scenario.init(inventory))

        prev_inventory_set = np.empty((0, 21))
        difference_set = np.empty((0, 22))

        for i, ss in enumerate(state_set):
            _, sss = ss.step(4)
            # object_in_front_difference should only be -1 or 0, or it is disaster
            object_in_front_difference = np.clip(
                sss.grid[5, 5].argmax() - ss.grid[5, 5].argmax(), -1, 1)
            transition = np.expand_dims(np.append(sss.inventory - ss.inventory,
                                                  object_in_front_difference),
                                        axis=0)
            prev_inventory_set = np.append(prev_inventory_set,
                                           np.expand_dims(ss.inventory,
                                                          axis=0),
                                           axis=0)
            difference_set = np.append(difference_set, transition, axis=0)

        unique_transitions = np.unique(difference_set, axis=0)
        # We want: the simplest core set of transitions, and the minimum conditions required for them to occur
        # First we arrange them in the order of simplicity
        # and find the minimum conditions

        costs = np.zeros(len(unique_transitions))
        for i, tr in enumerate(unique_transitions):
            costs[i] += abs(tr[7:12]).sum()
            costs[i] += 2 * abs(tr[12:]).sum()
        sorted_indices = costs.argsort()
        # Now we get the core transitions
        core_transitions = np.empty((unique_transitions[0].shape[0], 0),
                                    dtype=int)
        pre_requisite_set = np.empty((0, 21), dtype=int)
        desc_set = []
        for ind in sorted_indices:
            matrix = np.append(core_transitions,
                               np.expand_dims(unique_transitions[ind].copy(),
                                              axis=1),
                               axis=1)
            if np.linalg.matrix_rank(matrix) == matrix.shape[1]:
                core_transitions = matrix.copy()
                # Also find the pre-requisite condition
                tr_indices = np.where(
                    (unique_transitions[ind] == difference_set).all(axis=1))[0]
                prev_inventory_subset = np.empty((0, 21))
                for tr_ind in tr_indices:
                    prev_inventory_subset = np.append(
                        prev_inventory_subset,
                        np.expand_dims(prev_inventory_set[tr_ind], axis=0),
                        axis=0)
                pre_requisite = np.min(prev_inventory_subset, axis=0)
                pre_requisite_set = np.append(pre_requisite_set,
                                              np.expand_dims(pre_requisite,
                                                             axis=0),
                                              axis=0)
                # Coming up with the description of the event
                objs_gathered = np.where(unique_transitions[ind] == 1)[0]
                objs_used_up = np.where(unique_transitions[ind][:-1] == -1)[0]
                text_gathered = ""
                text_used_up = ""
                for obj in objs_gathered:
                    text_gathered += number_inventory[obj] + ", "
                for obj in objs_used_up:
                    text_used_up += number_inventory[obj] + ", "
                if len(text_gathered) > 0:
                    text_gathered = text_gathered[:-2]
                else:
                    text_gathered = None
                if len(text_used_up) > 0:
                    text_used_up = text_used_up[:-2]
                else:
                    text_used_up = None
                # Now for the description
                if unique_transitions[ind][-1] == -1:
                    if text_gathered:
                        desc_set.append("Got: {}. Used up: {}".format(
                            text_gathered, text_used_up))
                    else:
                        desc_set.append("Removed {} from the environment. Used up: {}".\
                         format(num_string_dict[event["object_before"]], text_used_up))
                else:
                    desc_set.append("Used {} to make {} at {}".\
                     format(text_used_up, text_gathered, num_string_dict[event["object_before"]]))

        try:
            agent.rule_dict[event["object_before"]] = (core_transitions.T,
                                                       pre_requisite_set,
                                                       desc_set)
            return True
        except:
            return False