示例#1
0
 def s_hash_print(self, none_str='*'):
     
     rows_outL = []
     for row in self.s_hash_rowL:
         outL = []
         for s_hash in row:
             if not self.environment.is_legal_state( s_hash ):
                 if is_literal_str( s_hash ):
                     outL.append( s_hash[1:-1] )
                 else:
                     outL.append( none_str )
             else:
                 if s_hash in self.named_s_hashD:
                     outL.append( self.named_s_hashD[s_hash]  )
                 else:
                     outL.append( str(s_hash)  )
         rows_outL.append( outL )
     
     if rows_outL:
         lmaxL = print_string_rows( rows_outL, 
                                    row_tickL=self.row_tickL, const_col_w=True,
                                    col_tickL=self.col_tickL,
                                    header=self.environment.name, 
                                    y_axis_label=self.y_axis_label,
                                    x_axis_label='State-Hash')
         return lmaxL # return the number of characters in each row.
     else:
         return []
示例#2
0
 def summ_print(self, fmt_V='%g', none_str='*', show_states=True):
     print()
     print('___ "%s" State-Value Summary ___'%self.environment.name  )
     
     if self.environment.layout is not None:
         # make summ_print using environment.layout
         if show_states:
             self.environment.layout.s_hash_print( none_str='*' )
         
         
         rows_outL = []
         for row in self.environment.layout.s_hash_rowL:
             outL = []
             for s_hash in row:
                 if s_hash not in self.environment.SC.stateD:
                     outL.append( none_str )
                 else:
                     outL.append( fmt_V%self.VsD[ s_hash ] )
             rows_outL.append( outL )
         
         print_string_rows( rows_outL, row_tickL=self.environment.layout.row_tickL, 
                            col_tickL=self.environment.layout.col_tickL, 
                            const_col_w=True,
                            line_chr='_', left_pad='    ', 
                            header=self.environment.name + ' State-Value Summary, V(s)', 
                            y_axis_label=self.environment.layout.y_axis_label,
                            x_axis_label=self.environment.layout.x_axis_label, justify='right')
         
     
     # ------------------------- simple output w/o a layout ------------
     else:
         lmax_hash = 6
         
         outL = [] # list of tuples = (s_hash, V)
         for s_hash,V in self.VsD.items():
             outL.append( (s_hash, V) )
             lmax_hash = max(lmax_hash, len(str(s_hash)))
         fmt_hash = '%' + '%is'%lmax_hash
                 
         outL.sort() # sort in-place
         for (s_hash,  V) in outL:
             print('    ', fmt_hash%str(s_hash), fmt_V%V )
示例#3
0
 def param_print(self, paramD, 
                 row_tickL=None, const_col_w=True,
                 col_tickL=None, 
                 header='', 
                 x_axis_label='', y_axis_label='',
                 none_str='*'):
     """
     parameter values are in dictionary paramD
     paramD index=s_hash, value=string
     """
     rows_outL = []
     for row in self.s_hash_rowL:
         outL = []
         for s_hash in row:
             if (s_hash in paramD) and self.environment.is_legal_state( s_hash ):
                 outL.append( str( paramD[s_hash] )  )
             else:
                 if is_literal_str( s_hash ):
                     outL.append( s_hash[1:-1] )
                 elif s_hash in self.named_s_hashD:
                     outL.append( self.named_s_hashD[s_hash] )
                 else:
                     outL.append( none_str )
                 
         rows_outL.append( outL )
     
     if row_tickL is None:
         row_tickL = self.row_tickL
     
     if col_tickL is None:
         col_tickL = self.col_tickL
         
     if not x_axis_label:
         x_axis_label = self.x_axis_label
         
     if not y_axis_label:
         y_axis_label = self.y_axis_label
     
     if rows_outL:
         lmaxL = print_string_rows( rows_outL, row_tickL=row_tickL, 
                                    const_col_w=const_col_w,
                                    col_tickL=col_tickL,
                                    header=header, 
                                    x_axis_label=x_axis_label,
                                    y_axis_label=y_axis_label)
         return lmaxL # return the number of characters in each row.
     else:
         return []
示例#4
0
    def summ_print(self,
                   fmt_V='%g',
                   none_str='*',
                   show_states=True,
                   show_last_change=True,
                   show_policy=True):
        print()
        print('___ "%s" Alpha-Based State-Value Summary ___' %
              self.environment.name)

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print(none_str='*')

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            rows_outL = []
            last_delta_rows_outL = []  # if show_last_change == True
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                ld_outL = []
                ld_outL.append(none_str)
                for s_hash in row:
                    if not self.environment.is_legal_state(s_hash):
                        if is_literal_str(s_hash):
                            outL.append(s_hash[1:-1])
                            ld_outL.append(s_hash[1:-1])
                        else:
                            outL.append(none_str)
                            ld_outL.append(none_str)
                    else:
                        outL.append(fmt_V % self.VsD[s_hash])
                        delta = self.last_delta_VsD.get(s_hash, None)
                        if delta is None:
                            ld_outL.append('None')
                        else:
                            ld_outL.append(fmt_V % delta)

                rows_outL.append(outL)
                last_delta_rows_outL.append(ld_outL)

            print_string_rows(rows_outL,
                              row_tickL=row_tickL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              col_tickL=col_tickL,
                              header=self.environment.name +
                              ' State-Value Summary, V(s)',
                              x_axis_label=x_axis_label,
                              y_axis_label=y_axis_label,
                              justify='right')
            if show_last_change:
                print_string_rows(last_delta_rows_outL,
                                  row_tickL=row_tickL,
                                  const_col_w=True,
                                  line_chr='_',
                                  left_pad='    ',
                                  col_tickL=col_tickL,
                                  header=self.environment.name +
                                  ' Last Change to V(s) Summary',
                                  x_axis_label=x_axis_label,
                                  y_axis_label=y_axis_label,
                                  justify='right')

            if show_policy:
                policy = self.get_policy()
                policy.summ_print(verbosity=0, environment=self.environment)

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6
            lmax_V = 6

            outL = []  # list of tuples = (s_hash, V)
            for s_hash, V in self.VsD.items():
                outL.append((s_hash, V))

                lmax_hash = max(lmax_hash, len(str(s_hash)))
                lmax_V = max(lmax_V, len(fmt_V % V))

            fmt_hash = '%' + '%is' % lmax_hash
            fmt_strV = '%' + '%is' % lmax_V

            outL.sort()  # sort in-place
            for (s_hash, V) in outL:
                V = fmt_V % V
                print('    ', fmt_hash % str(s_hash), fmt_strV % V, end='')
                if show_last_change:
                    print(' Last Delta = %s' %
                          self.last_delta_VsD.get(s_hash, None))
                else:
                    print()
示例#5
0
 def summ_print(self, verbosity=2, environment=None, 
                show_env_states=True, none_str='*'): # pragma: no cover
     """Show State objects in sorted state_hash order."""
     print('___ Policy Summary ___' )
     print('    Nstate-actions=%i'%len(self.state_actionsD) )
     
     #self.state_coll.summ_print()
     #self.action_coll.summ_print()
     sL = sorted( [(S.hash,S) for S in self.state_actionsD.keys()], key=NaturalOrStrKey )
     if verbosity==2:
         for s_hash,S in sL:
             SA = self.state_actionsD[ S ]
             SA.summ_print()
             exL = [str(self.get_single_action(S.hash)) for i in range(16) ]
             print('        ex. actions:', ' '.join(exL))
     elif verbosity==1:
         print('        State Action')
         for s_hash,S in sL:
             SA = self.state_actionsD[ S ]
             
             # force a single action
             a_desc = self.get_single_action(S.hash)
             
             print('%13s'%str(s_hash),' %s'%a_desc, end=' ')
             if len(SA)>1:
                 optL = sorted( [ A.desc for (A,prob) in SA.action_probD.items()], key=NaturalOrStrKey )
                 print('from:',', '.join(optL))
             else:
                 print()
         
     
     if (environment is not None) and  (environment.layout is not None):
         # make summ_print using environment.layout
         if show_env_states:
             environment.layout.s_hash_print( none_str='*' )
         
         
         rows_outL = []
         for row in environment.layout.s_hash_rowL:
             outL = []
             for s_hash in row:
                 if not environment.is_legal_state( s_hash ):
                     if is_literal_str( s_hash ):
                         outL.append( s_hash[1:-1] )
                     else:
                         outL.append( none_str )
                 else:
                     a_desc = self.get_single_action(s_hash)
                     if a_desc is None:
                         outL.append( '  *' )
                     else:
                         outL.append( self.get_state_summ_str( s_hash, verbosity=verbosity ) )
                         
             rows_outL.append( outL )
         
         
         row_tickL = environment.layout.row_tickL
         col_tickL = environment.layout.col_tickL
         y_axis_label = environment.layout.y_axis_label
         
         if not environment.layout.x_axis_label:
             x_axis_label = 'Actions'
         else:
             x_axis_label = environment.layout.x_axis_label
         
         print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True, 
                            line_chr='_', left_pad='    ', 
                            header=environment.name + ' Policy Summary', 
                            x_axis_label=x_axis_label, justify='right',
                            col_tickL=col_tickL, y_axis_label=y_axis_label)
示例#6
0
    def summ_print(self, fmt_Q='%.3f', none_str='*', show_states=True, 
                   show_last_change=True, show_policy=True):
        print()
        print('___ "%s" Action-Value Summary ___'%self.environment.name  )

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print( none_str='*' )

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            d_max = self.get_max_last_delta_overall()
            if d_max==0.0:
                d_max = 1.0E-10

            rows_outL = []
            last_delta_rows_outL = [] # if show_last_change == True
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                ld_outL = []
                for s_hash in row:
                    if not self.environment.is_legal_state( s_hash ):
                        if is_literal_str( s_hash ):
                            outL.append( s_hash[1:-1] )
                            ld_outL.append( s_hash[1:-1] )
                        else:
                            outL.append( none_str )
                            ld_outL.append( none_str )
                    else: # s_hash is a legal state hash
                        aL = self.environment.get_state_legal_action_list( s_hash )
                        sL = [str(s_hash)]
                        ld_sL = [str(s_hash)]
                        for a_desc in aL:
                            qsa = self.QsaEst( s_hash, a_desc )
                            s = fmt_Q%qsa
                            sL.append( '%s='%str(a_desc) + s.strip()  )
                            try:
                                d_val = int(100.0*self.last_delta_QsaD[s_hash].get( a_desc )/d_max)
                                if d_val > 0:
                                    lds = '%i%%'%d_val
                                    ld_sL.append( '%s='%str(a_desc) + lds.strip()  )
                                else:
                                    ld_sL.append( '%s~0'%str(a_desc) )
                            except:
                                ld_sL.append( '%s=None'%str(a_desc) )
                                
                        outL.append(  '\n'.join(sL).strip()  )
                        ld_outL.append(  '\n'.join(ld_sL).strip()  )
                rows_outL.append( outL )
                last_delta_rows_outL.append( ld_outL )

            print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True,
                               line_chr='_', left_pad='    ', col_tickL=col_tickL,
                               header=self.environment.name + ' Action-Value Summary, Q(s,a)',
                               x_axis_label=x_axis_label, y_axis_label=y_axis_label,
                               justify='right')

            if show_last_change:
                print_string_rows( last_delta_rows_outL, row_tickL=row_tickL, const_col_w=True,
                                   line_chr='_', left_pad='    ', col_tickL=col_tickL,
                                   header=self.environment.name + ' Last %% of Max Change to Q(s,a) Summary, (max change=%g)'%d_max,
                                   x_axis_label=x_axis_label, y_axis_label=y_axis_label,
                                   justify='right')

            if show_policy:
                policy = self.get_policy()
                policy.summ_print(verbosity=0, environment=self.environment)

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6

            outL = [] # list of strings "(s_hash,a_desc)=Q"
            for s_hash in self.environment.iter_all_states():
                aL = self.environment.get_state_legal_action_list( s_hash )
                for a_desc in aL:
                    qsa = self.QsaEst( s_hash, a_desc )
                
                    q = fmt_Q%qsa
                    s = '(%s, %s)='%(str(s_hash),str(a_desc)) + q.strip()
                    if show_last_change:
                        s = s + ' Last Delta = %s'%self.last_delta_QsaD[s_hash].get( a_desc, None)
                    
                    outL.append( s )
                    lmax_hash = max(lmax_hash, len(s))
            outL.sort() # sort in-place
            for s in outL:
                print('    ', s )
    def summ_print(self,
                   fmt_Q='%g',
                   none_str='*',
                   show_states=True,
                   showRunningAve=True):
        print()
        print('___ "%s" Action-Value Summary ___' % self.environment.name)

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print(none_str='*')

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            rows_outL = []
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                for s_hash in row:
                    if not self.environment.is_legal_state(s_hash):
                        #outL.append( none_str )
                        if is_literal_str(s_hash):
                            outL.append(s_hash[1:-1])
                        else:
                            outL.append(none_str)
                    else:
                        #aL = self.environment.get_state_legal_action_list( s_hash )
                        aD = self.Qsa_RaveD[s_hash]
                        sL = [str(s_hash)]
                        for a_desc, Q in aD.items():
                            s = fmt_Q % Q.get_ave()
                            sL.append('%s=' % str(a_desc) + s.strip())
                        outL.append('\n'.join(sL).strip())
                rows_outL.append(outL)

            print_string_rows(rows_outL,
                              row_tickL=row_tickL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              col_tickL=col_tickL,
                              header=self.environment.name +
                              ' Action-Value Summary, Q(s,a)',
                              x_axis_label=x_axis_label,
                              y_axis_label=y_axis_label,
                              justify='right')

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6

            outL = []  # list of strings "(s_hash,a_desc)=Q"
            for s_hash in self.Qsa_RaveD.keys():
                for a_desc, Q in self.Qsa_RaveD[s_hash].items():
                    q = fmt_Q % Q.get_ave()
                    s = '(%s, %s)=' % (str(s_hash), str(a_desc)) + q.strip()
                    outL.append(s)
                    lmax_hash = max(lmax_hash, len(s))
                outL.sort()  # sort in-place
            for s in outL:
                print('    ', s)
    def summ_print(self,
                   fmt_V='%g',
                   none_str='*',
                   show_states=True,
                   showRunningAve=True):
        print()
        print('___ "%s" State-Value Summary ___' % self.environment.name)

        if self.environment.layout is not None:
            # make summ_print using environment.layout
            if show_states:
                self.environment.layout.s_hash_print(none_str='*')

            row_tickL = self.environment.layout.row_tickL
            col_tickL = self.environment.layout.col_tickL
            x_axis_label = self.environment.layout.x_axis_label
            y_axis_label = self.environment.layout.y_axis_label

            rows_outL = []
            for row in self.environment.layout.s_hash_rowL:
                outL = []
                for s_hash in row:
                    if not self.environment.is_legal_state(s_hash):
                        #outL.append( none_str )
                        if is_literal_str(s_hash):
                            outL.append(s_hash[1:-1])
                        else:
                            outL.append(none_str)
                    else:
                        outL.append(fmt_V % self.Vs_RaveD[s_hash].get_ave())
                rows_outL.append(outL)

            print_string_rows(rows_outL,
                              row_tickL=row_tickL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              col_tickL=col_tickL,
                              header=self.environment.name +
                              ' State-Value Summary, V(s)',
                              x_axis_label=x_axis_label,
                              y_axis_label=y_axis_label,
                              justify='right')

        # ------------------------- simple output w/o a layout ------------
        else:
            lmax_hash = 6

            outL = []  # list of tuples = (s_hash, V)
            for s_hash, V in self.Vs_RaveD.items():
                outL.append((s_hash, V))
                lmax_hash = max(lmax_hash, len(str(s_hash)))
            fmt_hash = '%' + '%is' % lmax_hash

            outL.sort()  # sort in-place
            for (s_hash, V) in outL:
                print('    ', fmt_hash % str(s_hash), fmt_V % V)

        if showRunningAve:
            for s_hash, RA in self.Vs_RaveD.items(
            ):  # index=state_hash, value=RunningAve object
                RA.summ_print()
示例#9
0
# ------------------------

usable_aceL = []
for player_sum in range(21, 11, -1):
    rowL = []
    for dealer_showing in range(1, 11):
        rowL.append(bj_policyD[(player_sum, True, dealer_showing)])
    usable_aceL.append(rowL)

row_tickL = [i for i in range(21, 10, -1)]

print_string_rows(usable_aceL,
                  row_tickL=row_tickL,
                  const_col_w=True,
                  line_chr='=',
                  left_pad='',
                  header='Usable Ace Policy',
                  x_axis_label='A  2   3   4   5   6   7   8   9  10 ',
                  justify='left')  # left, right, center
print('              Dealer Showing')
print()

# ------------------------

no_usable_aceL = []
for player_sum in range(21, 10, -1):
    rowL = []
    for dealer_showing in range(1, 11):
        rowL.append(bj_policyD[(player_sum, False, dealer_showing)])
    no_usable_aceL.append(rowL)
示例#10
0
    def layout_print(self,
                     vname='reward',
                     fmt='',
                     show_env_states=True,
                     none_str='*'):
        """print the value "vname" formatted by the environment layout (if present). """

        if self.layout is None:
            print(
                '...ERROR... "%s" tried to layout_print w/o a defined layout' %
                self.name)
            return

        if show_env_states:
            self.layout.s_hash_print(none_str=none_str)

        msgD = {}  # initialize special message dictionary to empty

        if vname == 'reward':
            valD, msgD = self.get_estimated_rewards(
            )  # index=s_hash, value=float reward estimate.
        else:
            valD = {}  # empty if not recognized vname

        x_axis_label = self.layout.x_axis_label
        y_axis_label = self.layout.y_axis_label
        row_tickL = self.layout.row_tickL
        col_tickL = self.layout.col_tickL

        rows_outL = []
        for row in self.layout.s_hash_rowL:
            outL = []
            for s_hash in row:
                if s_hash not in self.SC.stateD:
                    if is_literal_str(s_hash):
                        outL.append(s_hash[1:-1])
                    else:
                        outL.append(none_str)
                else:
                    val = valD.get(s_hash, None)
                    if val is None:
                        outL.append(none_str)
                    else:
                        if fmt:
                            outL.append(fmt % val)
                        else:
                            outL.append(str(val))
                if msgD.get(s_hash, ''):
                    outL[-1] = outL[-1] + msgD.get(s_hash, '')

            rows_outL.append(outL)

        if rows_outL:
            print_string_rows(rows_outL,
                              const_col_w=True,
                              line_chr='_',
                              left_pad='    ',
                              y_axis_label=y_axis_label,
                              row_tickL=row_tickL,
                              col_tickL=col_tickL,
                              header=self.name + ' %s Summary' % vname.title(),
                              x_axis_label=x_axis_label,
                              justify='right')
示例#11
0
def epi_summ_print(episode,
                   policy,
                   environment,
                   show_rewards=False,
                   show_env_states=True,
                   none_str='*'):
    """print the environment layout with the episode shown. """

    if environment.layout is None:
        print('...ERROR... "%s" tried to layout_print w/o a defined layout' %
              environment.name)
        return

    if show_env_states:
        environment.layout.s_hash_print(none_str=none_str)

    state_visitD = {
    }  # index=s_hash, value=list of (N, action) OR (N, action, reward)

    Nvis = 1
    for (s_hash, a_desc, reward, sn_hash) in episode.iter_all_sars():
        if s_hash not in state_visitD:
            state_visitD[s_hash] = []

        if show_rewards:
            state_visitD[s_hash].append('[%i->%s %g]' %
                                        (Nvis, str(a_desc), reward))
        else:
            state_visitD[s_hash].append('[%i->%s]' % (Nvis, str(a_desc)))

        Nvis += 1

    if sn_hash not in state_visitD:
        state_visitD[sn_hash] = []
    state_visitD[sn_hash].append('T->' + str(sn_hash))

    # start setting up output grid
    x_axis_label = environment.layout.x_axis_label
    y_axis_label = environment.layout.y_axis_label
    row_tickL = environment.layout.row_tickL
    col_tickL = environment.layout.col_tickL

    state_hash_set = set(environment.get_all_action_state_hashes())

    rows_outL = []
    for row in environment.layout.s_hash_rowL:
        outL = []
        for s_hash in row:
            #if s_hash not in environment.SC.stateD:
            if s_hash not in state_hash_set:
                outL.append(none_str)
            else:
                val = state_visitD.get(s_hash, None)
                if val is None:
                    outL.append(none_str)
                else:
                    outL.append('\n'.join(val))

        rows_outL.append(outL)

    if rows_outL:
        print_string_rows(rows_outL,
                          const_col_w=True,
                          line_chr='_',
                          left_pad='    ',
                          y_axis_label=y_axis_label,
                          row_tickL=row_tickL,
                          col_tickL=col_tickL,
                          header=environment.name + ' Episode Summary',
                          x_axis_label=x_axis_label,
                          justify='right')