def __init__(self, name='Maximization Bias MDP Sim', width=12, Nb_choices=10, s_hash_rowL=None, row_tickL=None, col_tickL=None, x_axis_label='', y_axis_label=''): """ A Black Box Interface to a Simulation """ self.Nb_choices = Nb_choices # number of choices from B # -------- make layout template for states --------- s_hash_rowL = [['Lterm', 'B', 'A', 'Rterm']] # layout rows for makeing 2D output # call parent object Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL) # state hash is self.action_state_set = set(['B', 'A']) # a set of state hashes self.terminal_set = set(['Lterm', 'Rterm']) self.start_state_hash = 'A'
def __init__(self, name='Random Walk 1000 Sim', s_hash_rowL=None, row_tickL=None, col_tickL=None, x_axis_label='', y_axis_label=''): # -------- make layout template for states --------- # break 1000 states into 40 rows of 25 s_hash_rowL = [] # layout rows for makeing 2D output N = 1 for i in range(40): rowL = [] for j in range(25): rowL.append( N ) N += 1 s_hash_rowL.append(rowL )# layout rows for makeing 2D output # call parent object Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL) # state hash self.action_state_set = set( list(range(2,1000)) ) # a set of state hashes self.terminal_set = set([1, 1000]) self.start_state_hash = 500
def __init__(self, name='Sample Gridworld Sim', step_reward=-0.04, random_transition_prob=0.2): """A Simulation of a Sample Gridworld""" self.step_reward = step_reward # probability of moving in random direction. self.random_transition_prob = random_transition_prob # call parent object Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL, row_tickL=row_tickL, col_tickL=col_tickL, x_axis_label=x_axis_label, y_axis_label=y_axis_label) # state hash is self.action_state_set = set(actionD.keys()) # a set of state hashes self.terminal_set = set(rewardD.keys()) # if there is a start state, define it. self.start_state_hash = 'Start'
def __init__(self, name='BlackJack Simulation', s_hash_rowL=s_hash_rowL, row_tickL=row_tickL, col_tickL=col_tickL, x_axis_label='Dealer Showing', y_axis_label='Usable Ace Player Sum No Usable Ace'): """ A Black Box Interface to a Simulation """ Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL, row_tickL=row_tickL, x_axis_label=x_axis_label, y_axis_label=y_axis_label, col_tickL=col_tickL) self.default_policyD = { } # will define as Hit on everything except 20 or 21 # state hash is (# cars at 1st site, # cars at 2nd site) self.action_state_set = set() # a set of action state hashes for usable_ace in (True, False): if usable_ace: low_lim = 12 else: low_lim = 11 for player_sum in range(low_lim, 22, 1): for dealer_showing in range(1, 11, 1): s_hash = (player_sum, usable_ace, dealer_showing) self.action_state_set.add(s_hash) if player_sum < 20: self.default_policyD[s_hash] = 'Hit' else: self.default_policyD[s_hash] = 'S' terminalL = [] # terminal state hashes. for usable_ace in (True, False): for player_sum in ['Win', 'Draw', 'Lose']: for dealer_showing in range(1, 11, 1): s_hash = (player_sum, usable_ace, dealer_showing) terminalL.append(s_hash) self.terminal_set = set(terminalL) self.bj_hand = BlackJack()
def __init__(self, name='Car Rental Sim Variable Rtns', s_hash_rowL=s_hash_rowL, x_axis_label='#Cars at Second Location', y_axis_label='#Cars at First Location'): """ A Black Box Interface to a Simulation """ Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL) # state hash is (# cars at 1st site, # cars at 2nd site) self.action_state_set = set() # a set of state hashes for s1 in range( MAX_CARS + 1 ): # 20 cars max for s2 in range( MAX_CARS + 1 ): # 20 cars max self.action_state_set.add( (s1, s2) ) self.terminal_set = set()
def __init__(self, name='Cliff Walking Sim', width=12, height=4, s_hash_rowL=None, row_tickL=None, col_tickL=None, x_axis_label='', y_axis_label='', step_reward=-1): """ A Black Box Interface to a Simulation """ self.step_reward = step_reward self.width = width self.height = height # -------- make layout template for states --------- s_hash_rowL = [] # layout rows for makeing 2D output for i in range(height - 1): rowL = [] for j in range(width): s_hash = (i, j) rowL.append(s_hash) # use insert to put (0,0) at lower left, append for upper left s_hash_rowL.append(rowL) # layout rows for makeing 2D output rowL = ['S'] for j in range(width - 2): rowL.append('"Cliff"') rowL.append('G') s_hash_rowL.append(rowL) # layout rows for makeing 2D output # call parent object Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL) # state hash self.action_state_set = set(['S']) # a set of state hashes for s1 in range(height - 1): # 20 cars max for s2 in range(width): # 20 cars max self.action_state_set.add((s1, s2)) self.terminal_set = set(['G']) self.start_state_hash = 'S'
def get_policy_score(self, policy=None, start_state_hash=None, step_limit=1000): r_sum, n_steps = 0, 0 if start_state_hash is None: sL = starting_lineL else: sL = [start_state_hash] for ss in sL: (r, n, msg) = Simulation.get_policy_score(self, policy, start_state_hash=ss, step_limit=step_limit) r_sum += r n_steps += n msg = '' # any special message(s) return (r_sum, n_steps, msg)
def __init__(self, name='RaceTrack_2 Simulation', s_hash_rowL=s_hash_rowL, enable_random_zero_deltav=True, row_tickL=row_tickL, col_tickL=col_tickL, x_axis_label='Starting Line', y_axis_label='Finish '): """ A Black Box Interface to a Simulation """ Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL, row_tickL=row_tickL, x_axis_label=x_axis_label, y_axis_label=y_axis_label, col_tickL=col_tickL) self.racetrack_area = racetrack_area # set( [(i1,j1), (i2,j2), ...] ) self.starting_lineL = starting_lineL # [(x1,y1,0,0), (x2,y2,0,0), ...] self.finish_lineL = finish_lineL # [(x1,y1), (x2,y2), ...] self.start_state_hash = starting_lineL[0] # if enabled, with prob=0.1, deltav will be set to (0,0) self.enable_random_zero_deltav = enable_random_zero_deltav self.default_policyD = {} # state hash self.action_state_set = set() # a set of action state hashes terminalL = [('Done', 'Done', 0, 0)] # terminal state hashes. for i in range(30): for j in range(w_track): for vx in range(5): for vy in range(5): s_hash = (i, j, vx, vy) aL = self.get_state_legal_action_list(s_hash) if aL: self.action_state_set.add((i, j, vx, vy)) self.default_policyD[(i, j, vx, vy)] = (4 - vx, 4 - vy) self.terminal_set = set(terminalL) # make sure all default_policyD entries are legal delete_s_hashL = [] for s_hash, a_desc in self.default_policyD.items(): (x, y, vx, vy) = s_hash if not (x, y) in self.racetrack_area: delete_s_hashL.append(s_hash) else: aL = self.get_state_legal_action_list(s_hash) if a_desc not in aL: if aL: self.default_policyD[s_hash] = aL[0] #print('replaced default_policyD["%s"]'%str(s_hash),a_desc,' with ',aL[0]) else: delete_s_hashL.append(s_hash) for s_hash in delete_s_hashL: del self.default_policyD[s_hash]
def __init__(self, name='Blocking Maze Sim', step_reward=0.0, width=9, height=6, goal=(0, 8), start=(5, 3), wall_row=3, row_tickL=None, col_tickL=None, x_axis_label='', y_axis_label=''): """ A Black Box Interface to a Simulation Blocking Maze changes route to goal state as gates are opened and closed. Starts with Right Gate Open """ self.step_reward = step_reward self.width = width self.height = height self.goal = goal self.start = start self.wall_row = wall_row # far right open at time=0, far left is closed self.s_hash_gate_L = (wall_row, 0) self.s_hash_gate_R = (wall_row, width - 1) self.Lgate_is_open = False self.Rgate_is_open = True # -------- make layout template for states --------- s_hash_rowL = [] # layout rows for makeing 2D output for i in range(height): rowL = [] for j in range(width): if i == wall_row: if j == 0: s_hash = 'Gate_L' elif j == width - 1: s_hash = 'Gate_R' else: s_hash = '"Wall"' else: if (i, j) == self.goal: s_hash = 'Goal' elif (i, j) == self.start: s_hash = 'Start' else: s_hash = (i, j) rowL.append(s_hash) # use insert to put (0,0) at lower left, append for upper left s_hash_rowL.append(rowL) # layout rows for makeing 2D output # call parent object Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL, row_tickL=row_tickL, col_tickL=col_tickL, x_axis_label=x_axis_label, y_axis_label=y_axis_label) # state hash of states with actions. self.action_state_set = set() # a list of state hashes for s1 in range(height): for s2 in range(width): if s1 == wall_row: if s2 == 0: s_hash = 'Gate_L' self.action_state_set.add(s_hash) elif s2 == width - 1: s_hash = 'Gate_R' self.action_state_set.add(s_hash) else: s_hash = (s1, s2) if s_hash == self.start: s_hash = 'Start' elif s_hash == self.goal: s_hash = 'Goal' if s_hash != 'Goal': self.action_state_set.add(s_hash) self.terminal_set = set(['Goal']) self.start_state_hash = 'Start'