v_f = value_iteration(gamma=0.9, theta=0.01) graph_vals(v1, 'Sweep 1') graph_vals(v10, 'Sweep 10') graph_vals(v_f, 'Optimal V*') # # Policy Iteration # values = [] V, pi = policy_iteration(gamma=0.9, theta=0.01, value_list=values) for i, v in enumerate(values): graph_vals(v, 'Policy Evaluation %d' % (i + 1)) ''' #Gamma Variations print 'Generating Gamma Variations' for g in (0.9, 0.5, 0.3): values = [] V, pi = policy_iteration(theta=0.01, value_list=values, gamma=g) print 'gamma = %f:' % (g) for s in ((4, 7, 1), (1, 3, 6), (9, 2, 1)): print 'state %s => action %s' % (s, pi[s]) ''' for i, v in enumerate(values): graph_vals(v, 'Policy Evaluation %d. Gamma: %f' % (i + 1, g), 'gamma_%d_%d' % (int(g*10), i+1)) '''
''' Test value iteration Created on 24 Sep 2009 @author: joh ''' from value_methods import policy_iteration if __name__ == '__main__': values = [] V, pi = policy_iteration(gamma=0.9, theta=0.01, value_list=values) assert values[-1] == V def vcmp(v1, v2): #print 'vcmp',v1,v2 return cmp(v1[1], v2[1]) V = sorted(V.items(), cmp=vcmp) for v in V: print v, "=>", pi[v[0]] print "%d policy evaluation steps were required." % (len(values))
graph_vals(v1, 'Sweep 1') graph_vals(v10, 'Sweep 10') graph_vals(v_f, 'Optimal V*') # # Policy Iteration # values = [] V, pi = policy_iteration(gamma=0.9, theta=0.01, value_list=values) for i, v in enumerate(values): graph_vals(v, 'Policy Evaluation %d' % (i + 1)) ''' #Gamma Variations print 'Generating Gamma Variations' for g in(0.9, 0.5, 0.3): values = [] V, pi = policy_iteration(theta=0.01, value_list=values, gamma=g) print 'gamma = %f:' % (g) for s in ((4,7,1), (1,3,6), (9,2,1)): print 'state %s => action %s' % (s, pi[s]) ''' for i, v in enumerate(values): graph_vals(v, 'Policy Evaluation %d. Gamma: %f' % (i + 1, g), 'gamma_%d_%d' % (int(g*10), i+1)) '''