def __init__(self): self.world = CraftWorld() self.cookbook = Cookbook() self.subtask_index = util.Index() self.task_index = util.Index() dir_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(dir_path, "hints.yaml")) as hints_f: self.hints = yaml.load(hints_f) # initialize randomness # self.random = np.random.RandomState(0) # Think about this # organize task and subtask indices self.tasks_by_subtask = defaultdict(list) self.tasks = [] for hint_key, hint in self.hints.items(): goal = util.parse_fexp(hint_key) goal = (self.subtask_index.index(goal[0]), self.cookbook.index[goal[1]]) steps = [self.subtask_index.index(a) for a in hint] steps = tuple(steps) task = Task(goal, steps) for subtask in steps: self.tasks_by_subtask[subtask].append(task) self.tasks.append(task) self.task_index.index(task)
def create_maps(num_maps=1000): # create_maps cw = CraftWorld() map_set = [] while len(map_set) < num_maps: #if len(map_set) % 25 == 0: # print(len(map_set)) goal = np.random.randint(14) + 7 scenario = cw.sample_scenario_with_goal(goal) map_i = scenario.init() append = True for map_j in map_set: if (map_i.grid == map_j.grid).all(): append = False if append: map_set.append(map_i) return map_set
class EnvironmentHandler(): def __init__(self): self.cw = CraftWorld() def get_env(self): goal = np.random.randint(14) + 7 scenario = self.cw.sample_scenario_with_goal(goal) # Assuming no initial inventory return scenario.init() def get_full_state_set(self, event): # Replicate the demonstration in different environments grid = np.zeros((WIDTH, HEIGHT, self.cw.cookbook.n_kinds)) i_bd = self.cw.cookbook.index["boundary"] grid[0, :, i_bd] = 1 grid[WIDTH - 1:, :, i_bd] = 1 grid[:, 0, i_bd] = 1 grid[:, HEIGHT - 1:, i_bd] = 1 grid[5, 5, self.cw.cookbook. index[num_string_dict[event["object_before"]]]] = 1 scenario = CraftScenario(grid, (5, 6), self.cw) # "dataset" state_set = [] for i in range(7, 21): inventory = np.zeros(21, dtype=int) inventory[i] = 1 state_set.append(scenario.init(inventory)) for i in range(7, 21): for j in range(i + 1, 21): inventory = np.zeros(21, dtype=int) inventory[i] = 1 inventory[j] = 1 state_set.append(scenario.init(inventory)) for _ in range(100): inventory = np.random.randint(4, size=21) state_set.append(scenario.init(inventory)) return state_set
def __init__(self): self.cw = CraftWorld()
class CraftEnv(gym.Env): metadata = {'render.modes': ['human']} # What does this mean? def __init__(self): self.world = CraftWorld() self.cookbook = Cookbook() self.subtask_index = util.Index() self.task_index = util.Index() dir_path = os.path.dirname(os.path.realpath(__file__)) with open(os.path.join(dir_path, "hints.yaml")) as hints_f: self.hints = yaml.load(hints_f) # initialize randomness # self.random = np.random.RandomState(0) # Think about this # organize task and subtask indices self.tasks_by_subtask = defaultdict(list) self.tasks = [] for hint_key, hint in self.hints.items(): goal = util.parse_fexp(hint_key) goal = (self.subtask_index.index(goal[0]), self.cookbook.index[goal[1]]) steps = [self.subtask_index.index(a) for a in hint] steps = tuple(steps) task = Task(goal, steps) for subtask in steps: self.tasks_by_subtask[subtask].append(task) self.tasks.append(task) self.task_index.index(task) def reset(self, task=None, difficulty=3): """ Either give the task; or give the difficulty level == 1,2, or 3 """ if not task: # These difficulty levels are hard-coded of sorts, can easily write logic to overcome this if difficulty == 1: task = self.tasks[np.random.choice(4)] elif difficulty == 2: task = self.tasks[4 + np.random.choice(4)] elif difficulty == 3: task = self.tasks[8 + np.random.choice(2)] else: task = self.tasks[np.random.choice(len(self.tasks))] self.goal, _ = task goal_name, goal_arg = self.goal scenario = self.world.sample_scenario_with_goal(goal_arg) self.state = scenario.init() return self.state #, task def step(self, action): r, s = self.state.step(action) pr_s = self.state self.state = s inv = np.where(s.inventory - pr_s.inventory)[0] if inv: print("Gathered {}".format( self.cookbook.index.ordered_contents[int(inv) - 1])) return s, r, s.satisfies(self.goal), {} def render(self, mode="human", close=False): # Different modes -- Render in terminal vs a separate window def _visualize(win): curses.start_color() for i in range(1, 8): curses.init_pair(i, i, curses.COLOR_BLACK) curses.init_pair(i + 10, curses.COLOR_BLACK, i) state = self.state win.clear() for y in range(HEIGHT): for x in range(WIDTH): if not (state.grid[x, y, :].any() or (x, y) == state.pos): continue thing = state.grid[x, y, :].argmax() if (x, y) == state.pos: if state.dir == LEFT: ch1 = "<" ch2 = "@" elif state.dir == RIGHT: ch1 = "@" ch2 = ">" elif state.dir == UP: ch1 = "^" ch2 = "@" elif state.dir == DOWN: ch1 = "@" ch2 = "v" color = curses.color_pair(0) elif thing == self.cookbook.index["boundary"]: ch1 = ch2 = curses.ACS_BOARD color = curses.color_pair(10 + thing) else: name = self.cookbook.index.get(thing) ch1 = name[0] ch2 = name[-1] color = curses.color_pair(10 + thing) win.addch(HEIGHT - y, x * 2, ch1, color) win.addch(HEIGHT - y, x * 2 + 1, ch2, color) win.refresh() curses.wrapper(_visualize) inventory = self.state.inventory print("Inventory:\nIron:{} Grass:{} Wood:{} Gold:{} Gem:{} Plank:{} Stick:{} Axe:{} Rope:{} Bed:{} Shears:{} Cloth:{} Bridge:{} Ladder:{}"\ .format(inventory[7], inventory[8], inventory[9], inventory[10], inventory[11], inventory[12], inventory[13], inventory[14], inventory[15],\ inventory[16], inventory[17], inventory[18], inventory[19], inventory[20]))
class EnvironmentHandler(): def __init__(self): self.cw = CraftWorld() def get_env(self): goal = np.random.randint(14) + 7 scenario = self.cw.sample_scenario_with_goal(goal) # Assuming no initial inventory return scenario.init() def train(self, event, agent): # Replicate the demonstration in different environments grid = np.zeros((WIDTH, HEIGHT, self.cw.cookbook.n_kinds)) i_bd = self.cw.cookbook.index["boundary"] grid[0, :, i_bd] = 1 grid[WIDTH - 1:, :, i_bd] = 1 grid[:, 0, i_bd] = 1 grid[:, HEIGHT - 1:, i_bd] = 1 grid[5, 5, self.cw.cookbook. index[num_string_dict[event["object_before"]]]] = 1 scenario = CraftScenario(grid, (5, 6), self.cw) # "dataset" state_set = [] for i in range(7, 21): inventory = np.zeros(21, dtype=int) inventory[i] = 1 state_set.append(scenario.init(inventory)) for i in range(7, 21): for j in range(i + 1, 21): inventory = np.zeros(21, dtype=int) inventory[i] = 1 inventory[j] = 1 state_set.append(scenario.init(inventory)) for _ in range(100): inventory = np.random.randint(4, size=21) state_set.append(scenario.init(inventory)) prev_inventory_set = np.empty((0, 21)) difference_set = np.empty((0, 22)) for i, ss in enumerate(state_set): _, sss = ss.step(4) # object_in_front_difference should only be -1 or 0, or it is disaster object_in_front_difference = np.clip( sss.grid[5, 5].argmax() - ss.grid[5, 5].argmax(), -1, 1) transition = np.expand_dims(np.append(sss.inventory - ss.inventory, object_in_front_difference), axis=0) prev_inventory_set = np.append(prev_inventory_set, np.expand_dims(ss.inventory, axis=0), axis=0) difference_set = np.append(difference_set, transition, axis=0) unique_transitions = np.unique(difference_set, axis=0) # We want: the simplest core set of transitions, and the minimum conditions required for them to occur # First we arrange them in the order of simplicity # and find the minimum conditions costs = np.zeros(len(unique_transitions)) for i, tr in enumerate(unique_transitions): costs[i] += abs(tr[7:12]).sum() costs[i] += 2 * abs(tr[12:]).sum() sorted_indices = costs.argsort() # Now we get the core transitions core_transitions = np.empty((unique_transitions[0].shape[0], 0), dtype=int) pre_requisite_set = np.empty((0, 21), dtype=int) desc_set = [] for ind in sorted_indices: matrix = np.append(core_transitions, np.expand_dims(unique_transitions[ind].copy(), axis=1), axis=1) if np.linalg.matrix_rank(matrix) == matrix.shape[1]: core_transitions = matrix.copy() # Also find the pre-requisite condition tr_indices = np.where( (unique_transitions[ind] == difference_set).all(axis=1))[0] prev_inventory_subset = np.empty((0, 21)) for tr_ind in tr_indices: prev_inventory_subset = np.append( prev_inventory_subset, np.expand_dims(prev_inventory_set[tr_ind], axis=0), axis=0) pre_requisite = np.min(prev_inventory_subset, axis=0) pre_requisite_set = np.append(pre_requisite_set, np.expand_dims(pre_requisite, axis=0), axis=0) # Coming up with the description of the event objs_gathered = np.where(unique_transitions[ind] == 1)[0] objs_used_up = np.where(unique_transitions[ind][:-1] == -1)[0] text_gathered = "" text_used_up = "" for obj in objs_gathered: text_gathered += number_inventory[obj] + ", " for obj in objs_used_up: text_used_up += number_inventory[obj] + ", " if len(text_gathered) > 0: text_gathered = text_gathered[:-2] else: text_gathered = None if len(text_used_up) > 0: text_used_up = text_used_up[:-2] else: text_used_up = None # Now for the description if unique_transitions[ind][-1] == -1: if text_gathered: desc_set.append("Got: {}. Used up: {}".format( text_gathered, text_used_up)) else: desc_set.append("Removed {} from the environment. Used up: {}".\ format(num_string_dict[event["object_before"]], text_used_up)) else: desc_set.append("Used {} to make {} at {}".\ format(text_used_up, text_gathered, num_string_dict[event["object_before"]])) try: agent.rule_dict[event["object_before"]] = (core_transitions.T, pre_requisite_set, desc_set) return True except: return False