def _transition_func(self, state, action): ''' Args: state (State) action (str) Returns (State) ''' # Grab top discs on source and dest pegs. source_index = int(action[0]) dest_index = int(action[1]) source_top = state[source_index][-1] dest_top = state[dest_index][-1] # Make new state. new_state_ls = state.get_data()[:] if dest_top < source_top: new_state_ls[source_index] = new_state_ls[source_index][:-1] if new_state_ls[source_index] == "": new_state_ls[source_index] = " " new_state_ls[dest_index] += source_top new_state_ls[dest_index] = new_state_ls[dest_index].replace( " ", "") new_state = State(new_state_ls) # Set terminal. if self._is_goal_state( state): # new_state[1] == "abc" or new_state[2] == "abc": new_state.set_terminal(True) return new_state
def __init__(self, belief_distribution): ''' Args: belief_distribution (defaultdict) ''' self.distribution = belief_distribution State.__init__(self, data=list(belief_distribution.values()))
def __init__(self, x, x_dot, theta, theta_dot): #using round to discretize each component of the state self.x = round(x, 1) self.x_dot = round(x_dot, 1) self.theta = round(theta, 3) self.theta_dot = round(theta_dot, 1) State.__init__(self, data=[self.x, self.x_dot, self.theta, self.theta_dot])
def __init__(self, position, velocity, done): self.position = position self.velocity = velocity State.__init__(self, np.concatenate((position, velocity), axis=0), is_terminal=done)
def __init__(self, belief_distribution): ''' Args: belief_distribution (defaultdict) ''' self.distribution = belief_distribution State.__init__(self, data=belief_distribution.values())
def _transition_func(self, state, action): ''' Args: state (State) action (str) Returns (State) ''' # Grab top discs on source and dest pegs. source_index = int(action[0]) dest_index = int(action[1]) source_top = state[source_index][-1] dest_top = state[dest_index][-1] # Make new state. new_state_ls = state.get_data()[:] if dest_top < source_top: new_state_ls[source_index] = new_state_ls[source_index][:-1] if new_state_ls[source_index] == "": new_state_ls[source_index] = " " new_state_ls[dest_index] += source_top new_state_ls[dest_index] = new_state_ls[dest_index].replace(" ", "") new_state = State(new_state_ls) # Set terminal. if self._is_goal_state(state): # new_state[1] == "abc" or new_state[2] == "abc": new_state.set_terminal(True) return new_state
def __init__(self, observation, is_terminal=False): self.position = observation["position"] self.velocity = observation["velocity"] self.to_target = observation["to_target"] data = np.concatenate((self.position, self.velocity, self.to_target), axis=0) State.__init__(self, data=data, is_terminal=is_terminal)
def __init__(self, x, y, x_dot, y_dot, on_platform_type, is_terminal=False): self.x = x self.y = y self.x_dot = x_dot self.y_dot = y_dot self.on_platform_type = on_platform_type State.__init__(self, data=[self.x, self.y, self.x_dot, self.y_dot, self.on_platform_type], is_terminal=is_terminal)
def __init__(self, x, y, xdot, ydot, is_terminal=False): self.x = x self.y = y self.xdot = xdot self.ydot = ydot data = np.asarray([x, y, xdot, ydot]) State.__init__(self, data=data, is_terminal=is_terminal)
def __init__(self, objects): ''' Args: objects (dict of OOMDPObject instances): {key=object class (str):val = object instances} ''' self.objects = objects self.update() State.__init__(self, data=self.data)
def __init__(self, location, photo_block=None): """ :param location: A tuple, the coordinate (x,y,z) of drone :param photo_block: A DroneBlock """ self.x = location[0] self.y = location[1] self.z = location[2] self.photo_block = photo_block State.__init__(self, data=[location, photo_block])
def __init__(self, xr, yr, u, r, d, l, xg, yg): State.__init__(self, data=[xr, yr, u, r, d, l, xg, yg]) self.xr = round(xr, 5) self.yr = round(yr, 5) self.u = u self.r = r self.d = d self.l = l self.xg = round(xg, 5) self.yg = round(yg, 5)
def __init__(self, x, y, color): ''' Args: x (int) y (int) color (int) ''' State.__init__(self, data=[x, y, color]) self.x = round(x, 3) self.y = round(y, 3) self.color = color
def __init__(self, board): ''' init is just the initialiser method that takes in the board of the 2048 game. Parameters ---------- board : nparray the board represents the 2048 numpy array. ''' State.__init__(self, data=board.flatten().tolist()) self.board = board
def __init__(self, robot, doors, rooms, blocks): ''' Args: robot (CleanupL1Robot) doors (list): list of all the CleanupL1Door objects rooms (list): list of all the CleanupL1Room objects blocks (list): list of all the CleanupL1Block objects ''' self.robot = robot self.doors = doors self.rooms = rooms self.blocks = blocks State.__init__(self, data=[robot, doors, rooms, blocks])
def get_init_state(self): features = [self.init_loc[0], self.init_loc[1]] for rock in self.init_rocks: int_rock = [int(f) for f in rock] features += list(int_rock) return State(data=features)
def __init__(self, task, x, y, blocks=[], doors=[], rooms=[]): ''' :param task: The given CleanUpTask :param x: Agent x coordinate :param y: Agent y coordinate :param blocks: List of blocks :param doors: List of doors :param rooms: List of rooms ''' self.x = x self.y = y self.blocks = blocks self.doors = doors self.rooms = rooms self.task = task State.__init__(self, data=[task, (x, y), blocks, doors, rooms])
def __init__(self, num_pegs=3, num_discs=3, gamma=0.95): ''' Args: num_pegs (int) num_discs (int) gamma (float) ''' self.num_pegs = num_pegs self.num_discs = num_discs HanoiMDP.ACTIONS = [ str(x) + str(y) for x, y in itertools.product( range(self.num_pegs), range(self.num_pegs)) if x != y ] # Setup init state. init_state = [" " for peg in range(num_pegs)] x = "" for i in range(num_discs): x += chr(97 + i) init_state[0] = x init_state = State(data=init_state) MDP.__init__(self, HanoiMDP.ACTIONS, self._transition_func, self._reward_func, init_state=init_state, gamma=gamma)
def make_state(self, x, y): features = [x, y] for rock in self.rocks: int_rock = [int(f) for f in rock] features += list(int_rock) return State(data=features)
def __init__(self, position, theta, velocity, theta_dot, done): """ Args: position (np.ndarray) theta (float) velocity (np.ndarray) theta_dot (float) done (bool) """ self.position = position self.theta = theta self.velocity = velocity self.theta_dot = theta_dot features = [ position[0], position[1], theta, velocity[0], velocity[1], theta_dot ] State.__init__(self, data=features, is_terminal=done)
def phi(self, state): ''' Args: state (State) Returns: state (State) ''' # Setup phi for new states. if state not in self._phi.keys(): if len(self._phi.values()) > 0: self._phi[state] = max(self._phi.values()) + 1 else: self._phi[state] = 1 abstr_state = State(self._phi[state]) abstr_state.set_terminal(state.is_terminal()) return abstr_state
def _transition_func(self, state, action): ''' Args: state (State) action (str) Returns (State) ''' if state.is_terminal(): return state if action == "sample": # Sample action. rock_index = self._get_rock_at_agent_loc(state) if rock_index != None: # Set to false. new_data = state.data[:] new_data[rock_index] = False next_state = State(data=new_data) else: next_state = State(data=state.data) elif action == "up" and state.data[1] < self.height: next_state = State(data=[state.data[0], state.data[1] + 1] + state.data[2:]) elif action == "down" and state.data[1] > 1: next_state = State(data=[state.data[0], state.data[1] - 1] + state.data[2:]) elif action == "right" and state.data[0] < self.width: next_state = State(data=[state.data[0] + 1, state.data[1]] + state.data[2:]) elif action == "left" and state.data[0] > 1: next_state = State(data=[state.data[0] - 1, state.data[1]] + state.data[2:]) else: next_state = State(data=state.data) if next_state[0] > 7: next_state.set_terminal(True) return next_state
def __init__(self, root_grounded_task, policy_generators, base_mdp): ''' AbstractMDP solver class Args: root_grounded_task (RootTaskNode) policy_generators (list) of type objects (one for each level below the root) base_mdp (MDP): Lowest level environment MDP ''' self.root_grounded_task = root_grounded_task self.policy_generators = policy_generators self.base_mdp = base_mdp self.state_stack = [] self.policy_stack = [] for i in range(len(policy_generators)): self.state_stack.append(State()) self.policy_stack.append(defaultdict()) self.max_level = len(self.policy_generators) - 1 self.action_to_task_map = defaultdict() self._construct_action_to_node_map(root_grounded_task) self.max_iterate = 100 # YS
def _transition_func(self, state, action): ''' transition_func is a method that essentially alows the rl agent to make a move, which creates the new state where the board is moved after the agent inputs a move. Parameters ---------- state : state Represents the old state of the board state action : str Represents the move that the rl agent makes in order to go to the next state. Returns ---------- state : state Represents the new state after the action beforehand is taken. ''' b = Board(np.asarray(state.data).reshape((4, 4))) return State(self, b.moveAndUpdateBoard(action).board.flatten().tolist())
def __init__(self, num_arms=10, distr_family=np.random.normal, distr_params=None): ''' Args: num_arms (int): Number of arms. distr_family (lambda): A function from numpy which, when given entities from @distr_params, samples from the distribution family. distr_params (dict): If None is given, default mu/sigma for normal distribution are initialized randomly. ''' BanditMDP.ACTIONS = [str(i) for i in range(1, num_arms + 1)] MDP.__init__(self, BanditMDP.ACTIONS, self._transition_func, self._reward_func, init_state=State(1), gamma=1.0) self.num_arms = num_arms self.distr_family = distr_family self.distr_params = self.init_distr_params( ) if distr_params is None else distr_params
def __init__(self, room_number, q, is_terminal=False): State.__init__(self, data=[room_number, q], is_terminal=is_terminal) self.agent_in_room_number = room_number self.q = q # logic state
def __init__(self, x, y): State.__init__(self, data=[x, y]) self.x = round(x, 5) self.y = round(y, 5)
def __init__(self, floor_number, q, is_terminal=False): State.__init__(self, data=[floor_number, q], is_terminal=is_terminal) self.agent_on_floor_number = floor_number self.q = q # logic state
def __init__(self): MarkovGameMDP.__init__(self, RockPaperScissorsMDP.ACTIONS, self._transition_func, self._reward_func, init_state=State())
def __init__(self, data=[], is_terminal=False): self.data = data State.__init__(self, data=data, is_terminal=is_terminal)
def __init__(self, models): State.__init__(self, data=[models]) self.models = models
def __init__(self, a_x, a_y, b_x, b_y): State.__init__(self, data=[a_x, a_y, b_x, b_y]) self.a_x = a_x self.a_y = a_y self.b_x = b_x self.b_y = b_y
def __init__(self, x, y, color): self.color = color State.__init__(self, data=[x, y, color]) self.x = round(x, 3) self.y = round(y, 3)
def __init__(self, room_number, is_terminal=False, items=[]): State.__init__(self, data=[room_number], is_terminal=is_terminal) self.agent_in_room_number = room_number self.items = items
def __init__(self, x, y, phi=lambda state: [state.x, state.y]): State.__init__(self, data=[x, y]) self.x = x self.y = y self.phi = phi
def __init__(self, name): self.name = name is_terminal = name == 'goal' State.__init__(self, data=name, is_terminal=is_terminal)
def __init__(self, data,is_terminal=False): State.__init__(self, data=data, is_terminal=is_terminal)