示例#1
0
    def __init__(self,
                 name='Maximization Bias MDP Sim',
                 width=12,
                 Nb_choices=10,
                 s_hash_rowL=None,
                 row_tickL=None,
                 col_tickL=None,
                 x_axis_label='',
                 y_axis_label=''):
        """
        A Black Box Interface to a Simulation
        """
        self.Nb_choices = Nb_choices  # number of choices from B

        # -------- make layout template for states ---------
        s_hash_rowL = [['Lterm', 'B', 'A',
                        'Rterm']]  # layout rows for makeing 2D output

        # call parent object
        Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL)

        # state hash is
        self.action_state_set = set(['B', 'A'])  # a set of state hashes

        self.terminal_set = set(['Lterm', 'Rterm'])

        self.start_state_hash = 'A'
示例#2
0
    def __init__(self, name='Random Walk 1000 Sim', 
                 s_hash_rowL=None,
                 row_tickL=None, col_tickL=None, 
                 x_axis_label='', y_axis_label=''):


        # -------- make layout template for states ---------
        # break 1000 states into 40 rows of 25
        s_hash_rowL = [] # layout rows for makeing 2D output
        N = 1
        for i in range(40):
            rowL = []
            for j in range(25):
                rowL.append( N )
                N += 1
            s_hash_rowL.append(rowL )# layout rows for makeing 2D output
                
        # call parent object
        Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL)
        
        # state hash
        self.action_state_set = set( list(range(2,1000)) ) # a set of state hashes
    
        self.terminal_set = set([1, 1000])
        
        self.start_state_hash = 500
示例#3
0
    def __init__(self,
                 name='Sample Gridworld Sim',
                 step_reward=-0.04,
                 random_transition_prob=0.2):
        """A Simulation of a Sample Gridworld"""

        self.step_reward = step_reward

        # probability of moving in random direction.
        self.random_transition_prob = random_transition_prob

        # call parent object
        Simulation.__init__(self,
                            name=name,
                            s_hash_rowL=s_hash_rowL,
                            row_tickL=row_tickL,
                            col_tickL=col_tickL,
                            x_axis_label=x_axis_label,
                            y_axis_label=y_axis_label)

        # state hash is
        self.action_state_set = set(actionD.keys())  # a set of state hashes

        self.terminal_set = set(rewardD.keys())

        # if there is a start state, define it.
        self.start_state_hash = 'Start'
示例#4
0
    def __init__(self,
                 name='BlackJack Simulation',
                 s_hash_rowL=s_hash_rowL,
                 row_tickL=row_tickL,
                 col_tickL=col_tickL,
                 x_axis_label='Dealer Showing',
                 y_axis_label='Usable Ace   Player Sum    No Usable Ace'):
        """
        A Black Box Interface to a Simulation
        """
        Simulation.__init__(self,
                            name=name,
                            s_hash_rowL=s_hash_rowL,
                            row_tickL=row_tickL,
                            x_axis_label=x_axis_label,
                            y_axis_label=y_axis_label,
                            col_tickL=col_tickL)

        self.default_policyD = {
        }  # will define as Hit on everything except 20 or 21

        # state hash is (# cars at 1st site, # cars at 2nd site)
        self.action_state_set = set()  # a set of action state hashes
        for usable_ace in (True, False):
            if usable_ace:
                low_lim = 12
            else:
                low_lim = 11

            for player_sum in range(low_lim, 22, 1):
                for dealer_showing in range(1, 11, 1):
                    s_hash = (player_sum, usable_ace, dealer_showing)
                    self.action_state_set.add(s_hash)

                    if player_sum < 20:
                        self.default_policyD[s_hash] = 'Hit'
                    else:
                        self.default_policyD[s_hash] = 'S'

        terminalL = []  # terminal state hashes.
        for usable_ace in (True, False):
            for player_sum in ['Win', 'Draw', 'Lose']:
                for dealer_showing in range(1, 11, 1):
                    s_hash = (player_sum, usable_ace, dealer_showing)
                    terminalL.append(s_hash)

        self.terminal_set = set(terminalL)

        self.bj_hand = BlackJack()
示例#5
0
 def __init__(self, name='Car Rental Sim Variable Rtns', s_hash_rowL=s_hash_rowL, 
              x_axis_label='#Cars at Second Location',
              y_axis_label='#Cars at First Location'):
     """
     A Black Box Interface to a Simulation
     """
     Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL)
     
     # state hash is (# cars at 1st site, # cars at 2nd site)
     self.action_state_set = set() # a set of state hashes
     for s1 in range( MAX_CARS + 1 ): # 20 cars max
         for s2 in range( MAX_CARS + 1 ): # 20 cars max
             self.action_state_set.add( (s1, s2) )
 
     self.terminal_set = set()
示例#6
0
    def __init__(self,
                 name='Cliff Walking Sim',
                 width=12,
                 height=4,
                 s_hash_rowL=None,
                 row_tickL=None,
                 col_tickL=None,
                 x_axis_label='',
                 y_axis_label='',
                 step_reward=-1):
        """
        A Black Box Interface to a Simulation
        """
        self.step_reward = step_reward
        self.width = width
        self.height = height

        # -------- make layout template for states ---------
        s_hash_rowL = []  # layout rows for makeing 2D output
        for i in range(height - 1):
            rowL = []
            for j in range(width):
                s_hash = (i, j)
                rowL.append(s_hash)
            # use insert to put (0,0) at lower left, append for upper left
            s_hash_rowL.append(rowL)  # layout rows for makeing 2D output

        rowL = ['S']
        for j in range(width - 2):
            rowL.append('"Cliff"')
        rowL.append('G')
        s_hash_rowL.append(rowL)  # layout rows for makeing 2D output

        # call parent object
        Simulation.__init__(self, name=name, s_hash_rowL=s_hash_rowL)

        # state hash
        self.action_state_set = set(['S'])  # a set of state hashes
        for s1 in range(height - 1):  # 20 cars max
            for s2 in range(width):  # 20 cars max
                self.action_state_set.add((s1, s2))

        self.terminal_set = set(['G'])

        self.start_state_hash = 'S'
示例#7
0
    def get_policy_score(self,
                         policy=None,
                         start_state_hash=None,
                         step_limit=1000):

        r_sum, n_steps = 0, 0

        if start_state_hash is None:
            sL = starting_lineL
        else:
            sL = [start_state_hash]

        for ss in sL:
            (r, n, msg) = Simulation.get_policy_score(self,
                                                      policy,
                                                      start_state_hash=ss,
                                                      step_limit=step_limit)
            r_sum += r
            n_steps += n

        msg = ''  # any special message(s)
        return (r_sum, n_steps, msg)
示例#8
0
    def __init__(self,
                 name='RaceTrack_2 Simulation',
                 s_hash_rowL=s_hash_rowL,
                 enable_random_zero_deltav=True,
                 row_tickL=row_tickL,
                 col_tickL=col_tickL,
                 x_axis_label='Starting Line',
                 y_axis_label='Finish                         '):
        """
        A Black Box Interface to a Simulation
        """
        Simulation.__init__(self,
                            name=name,
                            s_hash_rowL=s_hash_rowL,
                            row_tickL=row_tickL,
                            x_axis_label=x_axis_label,
                            y_axis_label=y_axis_label,
                            col_tickL=col_tickL)

        self.racetrack_area = racetrack_area  # set( [(i1,j1), (i2,j2), ...] )
        self.starting_lineL = starting_lineL  # [(x1,y1,0,0), (x2,y2,0,0), ...]
        self.finish_lineL = finish_lineL  # [(x1,y1), (x2,y2), ...]

        self.start_state_hash = starting_lineL[0]

        # if enabled, with prob=0.1, deltav will be set to (0,0)
        self.enable_random_zero_deltav = enable_random_zero_deltav

        self.default_policyD = {}

        # state hash
        self.action_state_set = set()  # a set of action state hashes
        terminalL = [('Done', 'Done', 0, 0)]  # terminal state hashes.

        for i in range(30):
            for j in range(w_track):
                for vx in range(5):
                    for vy in range(5):
                        s_hash = (i, j, vx, vy)
                        aL = self.get_state_legal_action_list(s_hash)
                        if aL:
                            self.action_state_set.add((i, j, vx, vy))

                            self.default_policyD[(i, j, vx, vy)] = (4 - vx,
                                                                    4 - vy)

        self.terminal_set = set(terminalL)

        # make sure all default_policyD entries are legal
        delete_s_hashL = []
        for s_hash, a_desc in self.default_policyD.items():
            (x, y, vx, vy) = s_hash
            if not (x, y) in self.racetrack_area:
                delete_s_hashL.append(s_hash)
            else:
                aL = self.get_state_legal_action_list(s_hash)
                if a_desc not in aL:
                    if aL:
                        self.default_policyD[s_hash] = aL[0]
                        #print('replaced default_policyD["%s"]'%str(s_hash),a_desc,' with ',aL[0])
                    else:
                        delete_s_hashL.append(s_hash)

        for s_hash in delete_s_hashL:
            del self.default_policyD[s_hash]
示例#9
0
    def __init__(self,
                 name='Blocking Maze Sim',
                 step_reward=0.0,
                 width=9,
                 height=6,
                 goal=(0, 8),
                 start=(5, 3),
                 wall_row=3,
                 row_tickL=None,
                 col_tickL=None,
                 x_axis_label='',
                 y_axis_label=''):
        """
        A Black Box Interface to a Simulation
        Blocking Maze changes route to goal state as gates are opened and closed.
        Starts with Right Gate Open
        """
        self.step_reward = step_reward

        self.width = width
        self.height = height
        self.goal = goal
        self.start = start
        self.wall_row = wall_row  # far right open at time=0, far left is closed

        self.s_hash_gate_L = (wall_row, 0)
        self.s_hash_gate_R = (wall_row, width - 1)

        self.Lgate_is_open = False
        self.Rgate_is_open = True

        # -------- make layout template for states ---------
        s_hash_rowL = []  # layout rows for makeing 2D output
        for i in range(height):
            rowL = []
            for j in range(width):
                if i == wall_row:
                    if j == 0:
                        s_hash = 'Gate_L'
                    elif j == width - 1:
                        s_hash = 'Gate_R'
                    else:
                        s_hash = '"Wall"'
                else:
                    if (i, j) == self.goal:
                        s_hash = 'Goal'
                    elif (i, j) == self.start:
                        s_hash = 'Start'
                    else:
                        s_hash = (i, j)
                rowL.append(s_hash)
            # use insert to put (0,0) at lower left, append for upper left
            s_hash_rowL.append(rowL)  # layout rows for makeing 2D output

        # call parent object
        Simulation.__init__(self,
                            name=name,
                            s_hash_rowL=s_hash_rowL,
                            row_tickL=row_tickL,
                            col_tickL=col_tickL,
                            x_axis_label=x_axis_label,
                            y_axis_label=y_axis_label)

        # state hash of states with actions.
        self.action_state_set = set()  # a list of state hashes
        for s1 in range(height):
            for s2 in range(width):
                if s1 == wall_row:
                    if s2 == 0:
                        s_hash = 'Gate_L'
                        self.action_state_set.add(s_hash)
                    elif s2 == width - 1:
                        s_hash = 'Gate_R'
                        self.action_state_set.add(s_hash)
                else:
                    s_hash = (s1, s2)
                    if s_hash == self.start:
                        s_hash = 'Start'
                    elif s_hash == self.goal:
                        s_hash = 'Goal'

                    if s_hash != 'Goal':
                        self.action_state_set.add(s_hash)

        self.terminal_set = set(['Goal'])

        self.start_state_hash = 'Start'