def s_hash_print(self, none_str='*'): rows_outL = [] for row in self.s_hash_rowL: outL = [] for s_hash in row: if not self.environment.is_legal_state( s_hash ): if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) else: outL.append( none_str ) else: if s_hash in self.named_s_hashD: outL.append( self.named_s_hashD[s_hash] ) else: outL.append( str(s_hash) ) rows_outL.append( outL ) if rows_outL: lmaxL = print_string_rows( rows_outL, row_tickL=self.row_tickL, const_col_w=True, col_tickL=self.col_tickL, header=self.environment.name, y_axis_label=self.y_axis_label, x_axis_label='State-Hash') return lmaxL # return the number of characters in each row. else: return []
def summ_print(self, fmt_V='%g', none_str='*', show_states=True): print() print('___ "%s" State-Value Summary ___'%self.environment.name ) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print( none_str='*' ) rows_outL = [] for row in self.environment.layout.s_hash_rowL: outL = [] for s_hash in row: if s_hash not in self.environment.SC.stateD: outL.append( none_str ) else: outL.append( fmt_V%self.VsD[ s_hash ] ) rows_outL.append( outL ) print_string_rows( rows_outL, row_tickL=self.environment.layout.row_tickL, col_tickL=self.environment.layout.col_tickL, const_col_w=True, line_chr='_', left_pad=' ', header=self.environment.name + ' State-Value Summary, V(s)', y_axis_label=self.environment.layout.y_axis_label, x_axis_label=self.environment.layout.x_axis_label, justify='right') # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of tuples = (s_hash, V) for s_hash,V in self.VsD.items(): outL.append( (s_hash, V) ) lmax_hash = max(lmax_hash, len(str(s_hash))) fmt_hash = '%' + '%is'%lmax_hash outL.sort() # sort in-place for (s_hash, V) in outL: print(' ', fmt_hash%str(s_hash), fmt_V%V )
def param_print(self, paramD, row_tickL=None, const_col_w=True, col_tickL=None, header='', x_axis_label='', y_axis_label='', none_str='*'): """ parameter values are in dictionary paramD paramD index=s_hash, value=string """ rows_outL = [] for row in self.s_hash_rowL: outL = [] for s_hash in row: if (s_hash in paramD) and self.environment.is_legal_state( s_hash ): outL.append( str( paramD[s_hash] ) ) else: if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) elif s_hash in self.named_s_hashD: outL.append( self.named_s_hashD[s_hash] ) else: outL.append( none_str ) rows_outL.append( outL ) if row_tickL is None: row_tickL = self.row_tickL if col_tickL is None: col_tickL = self.col_tickL if not x_axis_label: x_axis_label = self.x_axis_label if not y_axis_label: y_axis_label = self.y_axis_label if rows_outL: lmaxL = print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=const_col_w, col_tickL=col_tickL, header=header, x_axis_label=x_axis_label, y_axis_label=y_axis_label) return lmaxL # return the number of characters in each row. else: return []
def summ_print(self, fmt_V='%g', none_str='*', show_states=True, show_last_change=True, show_policy=True): print() print('___ "%s" Alpha-Based State-Value Summary ___' % self.environment.name) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print(none_str='*') row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label rows_outL = [] last_delta_rows_outL = [] # if show_last_change == True for row in self.environment.layout.s_hash_rowL: outL = [] ld_outL = [] ld_outL.append(none_str) for s_hash in row: if not self.environment.is_legal_state(s_hash): if is_literal_str(s_hash): outL.append(s_hash[1:-1]) ld_outL.append(s_hash[1:-1]) else: outL.append(none_str) ld_outL.append(none_str) else: outL.append(fmt_V % self.VsD[s_hash]) delta = self.last_delta_VsD.get(s_hash, None) if delta is None: ld_outL.append('None') else: ld_outL.append(fmt_V % delta) rows_outL.append(outL) last_delta_rows_outL.append(ld_outL) print_string_rows(rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' State-Value Summary, V(s)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_last_change: print_string_rows(last_delta_rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Last Change to V(s) Summary', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_policy: policy = self.get_policy() policy.summ_print(verbosity=0, environment=self.environment) # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 lmax_V = 6 outL = [] # list of tuples = (s_hash, V) for s_hash, V in self.VsD.items(): outL.append((s_hash, V)) lmax_hash = max(lmax_hash, len(str(s_hash))) lmax_V = max(lmax_V, len(fmt_V % V)) fmt_hash = '%' + '%is' % lmax_hash fmt_strV = '%' + '%is' % lmax_V outL.sort() # sort in-place for (s_hash, V) in outL: V = fmt_V % V print(' ', fmt_hash % str(s_hash), fmt_strV % V, end='') if show_last_change: print(' Last Delta = %s' % self.last_delta_VsD.get(s_hash, None)) else: print()
def summ_print(self, verbosity=2, environment=None, show_env_states=True, none_str='*'): # pragma: no cover """Show State objects in sorted state_hash order.""" print('___ Policy Summary ___' ) print(' Nstate-actions=%i'%len(self.state_actionsD) ) #self.state_coll.summ_print() #self.action_coll.summ_print() sL = sorted( [(S.hash,S) for S in self.state_actionsD.keys()], key=NaturalOrStrKey ) if verbosity==2: for s_hash,S in sL: SA = self.state_actionsD[ S ] SA.summ_print() exL = [str(self.get_single_action(S.hash)) for i in range(16) ] print(' ex. actions:', ' '.join(exL)) elif verbosity==1: print(' State Action') for s_hash,S in sL: SA = self.state_actionsD[ S ] # force a single action a_desc = self.get_single_action(S.hash) print('%13s'%str(s_hash),' %s'%a_desc, end=' ') if len(SA)>1: optL = sorted( [ A.desc for (A,prob) in SA.action_probD.items()], key=NaturalOrStrKey ) print('from:',', '.join(optL)) else: print() if (environment is not None) and (environment.layout is not None): # make summ_print using environment.layout if show_env_states: environment.layout.s_hash_print( none_str='*' ) rows_outL = [] for row in environment.layout.s_hash_rowL: outL = [] for s_hash in row: if not environment.is_legal_state( s_hash ): if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) else: outL.append( none_str ) else: a_desc = self.get_single_action(s_hash) if a_desc is None: outL.append( ' *' ) else: outL.append( self.get_state_summ_str( s_hash, verbosity=verbosity ) ) rows_outL.append( outL ) row_tickL = environment.layout.row_tickL col_tickL = environment.layout.col_tickL y_axis_label = environment.layout.y_axis_label if not environment.layout.x_axis_label: x_axis_label = 'Actions' else: x_axis_label = environment.layout.x_axis_label print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', header=environment.name + ' Policy Summary', x_axis_label=x_axis_label, justify='right', col_tickL=col_tickL, y_axis_label=y_axis_label)
def summ_print(self, fmt_Q='%.3f', none_str='*', show_states=True, show_last_change=True, show_policy=True): print() print('___ "%s" Action-Value Summary ___'%self.environment.name ) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print( none_str='*' ) row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label d_max = self.get_max_last_delta_overall() if d_max==0.0: d_max = 1.0E-10 rows_outL = [] last_delta_rows_outL = [] # if show_last_change == True for row in self.environment.layout.s_hash_rowL: outL = [] ld_outL = [] for s_hash in row: if not self.environment.is_legal_state( s_hash ): if is_literal_str( s_hash ): outL.append( s_hash[1:-1] ) ld_outL.append( s_hash[1:-1] ) else: outL.append( none_str ) ld_outL.append( none_str ) else: # s_hash is a legal state hash aL = self.environment.get_state_legal_action_list( s_hash ) sL = [str(s_hash)] ld_sL = [str(s_hash)] for a_desc in aL: qsa = self.QsaEst( s_hash, a_desc ) s = fmt_Q%qsa sL.append( '%s='%str(a_desc) + s.strip() ) try: d_val = int(100.0*self.last_delta_QsaD[s_hash].get( a_desc )/d_max) if d_val > 0: lds = '%i%%'%d_val ld_sL.append( '%s='%str(a_desc) + lds.strip() ) else: ld_sL.append( '%s~0'%str(a_desc) ) except: ld_sL.append( '%s=None'%str(a_desc) ) outL.append( '\n'.join(sL).strip() ) ld_outL.append( '\n'.join(ld_sL).strip() ) rows_outL.append( outL ) last_delta_rows_outL.append( ld_outL ) print_string_rows( rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Action-Value Summary, Q(s,a)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_last_change: print_string_rows( last_delta_rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Last %% of Max Change to Q(s,a) Summary, (max change=%g)'%d_max, x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') if show_policy: policy = self.get_policy() policy.summ_print(verbosity=0, environment=self.environment) # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of strings "(s_hash,a_desc)=Q" for s_hash in self.environment.iter_all_states(): aL = self.environment.get_state_legal_action_list( s_hash ) for a_desc in aL: qsa = self.QsaEst( s_hash, a_desc ) q = fmt_Q%qsa s = '(%s, %s)='%(str(s_hash),str(a_desc)) + q.strip() if show_last_change: s = s + ' Last Delta = %s'%self.last_delta_QsaD[s_hash].get( a_desc, None) outL.append( s ) lmax_hash = max(lmax_hash, len(s)) outL.sort() # sort in-place for s in outL: print(' ', s )
def summ_print(self, fmt_Q='%g', none_str='*', show_states=True, showRunningAve=True): print() print('___ "%s" Action-Value Summary ___' % self.environment.name) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print(none_str='*') row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label rows_outL = [] for row in self.environment.layout.s_hash_rowL: outL = [] for s_hash in row: if not self.environment.is_legal_state(s_hash): #outL.append( none_str ) if is_literal_str(s_hash): outL.append(s_hash[1:-1]) else: outL.append(none_str) else: #aL = self.environment.get_state_legal_action_list( s_hash ) aD = self.Qsa_RaveD[s_hash] sL = [str(s_hash)] for a_desc, Q in aD.items(): s = fmt_Q % Q.get_ave() sL.append('%s=' % str(a_desc) + s.strip()) outL.append('\n'.join(sL).strip()) rows_outL.append(outL) print_string_rows(rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' Action-Value Summary, Q(s,a)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of strings "(s_hash,a_desc)=Q" for s_hash in self.Qsa_RaveD.keys(): for a_desc, Q in self.Qsa_RaveD[s_hash].items(): q = fmt_Q % Q.get_ave() s = '(%s, %s)=' % (str(s_hash), str(a_desc)) + q.strip() outL.append(s) lmax_hash = max(lmax_hash, len(s)) outL.sort() # sort in-place for s in outL: print(' ', s)
def summ_print(self, fmt_V='%g', none_str='*', show_states=True, showRunningAve=True): print() print('___ "%s" State-Value Summary ___' % self.environment.name) if self.environment.layout is not None: # make summ_print using environment.layout if show_states: self.environment.layout.s_hash_print(none_str='*') row_tickL = self.environment.layout.row_tickL col_tickL = self.environment.layout.col_tickL x_axis_label = self.environment.layout.x_axis_label y_axis_label = self.environment.layout.y_axis_label rows_outL = [] for row in self.environment.layout.s_hash_rowL: outL = [] for s_hash in row: if not self.environment.is_legal_state(s_hash): #outL.append( none_str ) if is_literal_str(s_hash): outL.append(s_hash[1:-1]) else: outL.append(none_str) else: outL.append(fmt_V % self.Vs_RaveD[s_hash].get_ave()) rows_outL.append(outL) print_string_rows(rows_outL, row_tickL=row_tickL, const_col_w=True, line_chr='_', left_pad=' ', col_tickL=col_tickL, header=self.environment.name + ' State-Value Summary, V(s)', x_axis_label=x_axis_label, y_axis_label=y_axis_label, justify='right') # ------------------------- simple output w/o a layout ------------ else: lmax_hash = 6 outL = [] # list of tuples = (s_hash, V) for s_hash, V in self.Vs_RaveD.items(): outL.append((s_hash, V)) lmax_hash = max(lmax_hash, len(str(s_hash))) fmt_hash = '%' + '%is' % lmax_hash outL.sort() # sort in-place for (s_hash, V) in outL: print(' ', fmt_hash % str(s_hash), fmt_V % V) if showRunningAve: for s_hash, RA in self.Vs_RaveD.items( ): # index=state_hash, value=RunningAve object RA.summ_print()
# ------------------------ usable_aceL = [] for player_sum in range(21, 11, -1): rowL = [] for dealer_showing in range(1, 11): rowL.append(bj_policyD[(player_sum, True, dealer_showing)]) usable_aceL.append(rowL) row_tickL = [i for i in range(21, 10, -1)] print_string_rows(usable_aceL, row_tickL=row_tickL, const_col_w=True, line_chr='=', left_pad='', header='Usable Ace Policy', x_axis_label='A 2 3 4 5 6 7 8 9 10 ', justify='left') # left, right, center print(' Dealer Showing') print() # ------------------------ no_usable_aceL = [] for player_sum in range(21, 10, -1): rowL = [] for dealer_showing in range(1, 11): rowL.append(bj_policyD[(player_sum, False, dealer_showing)]) no_usable_aceL.append(rowL)
def layout_print(self, vname='reward', fmt='', show_env_states=True, none_str='*'): """print the value "vname" formatted by the environment layout (if present). """ if self.layout is None: print( '...ERROR... "%s" tried to layout_print w/o a defined layout' % self.name) return if show_env_states: self.layout.s_hash_print(none_str=none_str) msgD = {} # initialize special message dictionary to empty if vname == 'reward': valD, msgD = self.get_estimated_rewards( ) # index=s_hash, value=float reward estimate. else: valD = {} # empty if not recognized vname x_axis_label = self.layout.x_axis_label y_axis_label = self.layout.y_axis_label row_tickL = self.layout.row_tickL col_tickL = self.layout.col_tickL rows_outL = [] for row in self.layout.s_hash_rowL: outL = [] for s_hash in row: if s_hash not in self.SC.stateD: if is_literal_str(s_hash): outL.append(s_hash[1:-1]) else: outL.append(none_str) else: val = valD.get(s_hash, None) if val is None: outL.append(none_str) else: if fmt: outL.append(fmt % val) else: outL.append(str(val)) if msgD.get(s_hash, ''): outL[-1] = outL[-1] + msgD.get(s_hash, '') rows_outL.append(outL) if rows_outL: print_string_rows(rows_outL, const_col_w=True, line_chr='_', left_pad=' ', y_axis_label=y_axis_label, row_tickL=row_tickL, col_tickL=col_tickL, header=self.name + ' %s Summary' % vname.title(), x_axis_label=x_axis_label, justify='right')
def epi_summ_print(episode, policy, environment, show_rewards=False, show_env_states=True, none_str='*'): """print the environment layout with the episode shown. """ if environment.layout is None: print('...ERROR... "%s" tried to layout_print w/o a defined layout' % environment.name) return if show_env_states: environment.layout.s_hash_print(none_str=none_str) state_visitD = { } # index=s_hash, value=list of (N, action) OR (N, action, reward) Nvis = 1 for (s_hash, a_desc, reward, sn_hash) in episode.iter_all_sars(): if s_hash not in state_visitD: state_visitD[s_hash] = [] if show_rewards: state_visitD[s_hash].append('[%i->%s %g]' % (Nvis, str(a_desc), reward)) else: state_visitD[s_hash].append('[%i->%s]' % (Nvis, str(a_desc))) Nvis += 1 if sn_hash not in state_visitD: state_visitD[sn_hash] = [] state_visitD[sn_hash].append('T->' + str(sn_hash)) # start setting up output grid x_axis_label = environment.layout.x_axis_label y_axis_label = environment.layout.y_axis_label row_tickL = environment.layout.row_tickL col_tickL = environment.layout.col_tickL state_hash_set = set(environment.get_all_action_state_hashes()) rows_outL = [] for row in environment.layout.s_hash_rowL: outL = [] for s_hash in row: #if s_hash not in environment.SC.stateD: if s_hash not in state_hash_set: outL.append(none_str) else: val = state_visitD.get(s_hash, None) if val is None: outL.append(none_str) else: outL.append('\n'.join(val)) rows_outL.append(outL) if rows_outL: print_string_rows(rows_outL, const_col_w=True, line_chr='_', left_pad=' ', y_axis_label=y_axis_label, row_tickL=row_tickL, col_tickL=col_tickL, header=environment.name + ' Episode Summary', x_axis_label=x_axis_label, justify='right')