def genstate(self): chg = 1 state0 = [None] * 9 stateV = STATE(state0) self.pi[tuple(state0)] = stateV while chg > 0: n0 = len(self.pi) keys = self.pi.keys() for s in keys: v = self.pi[s] sgn = getturn(s) n = len(v.Actions) for act, v1 in v.Actions.iteritems(): state1 = init.newstate(s, act, sgn) v1.p = 1. / n v1.ret = init.evalS(state1) v1.r = Reward(s, state1) v1.v = v1.r if v1.ret is None: key = tuple(state1) if key not in self.pi: stateV = STATE(key) self.pi[key] = stateV v1.nextstate = self.pi[key] n1 = len(self.pi) chg = n1 - n0 print n0, n1
def rSA(self, state, act, sgn): sgn1 = 'X' if self.sgn == 'O' else 'O' state1 = newstate(state, act, sgn) r0 = self.reward(state, state1) v = r0 ret = init.evalS(state1) if ret is None: r1 = 0 for act1, p1 in self.pi[state1].iteritems(): state2 = newstate(state1, act1, sgn1) p = self.pi[state1][act1] V1 = self.value(state2) r1 += p * V1[0] v += self.gamma * r1 return v
def score(state): ret = init.evalS(state) cnt = 0. if ret is None: pt = [1, 1, 1, 1, 1, 1, 1, 1, 1] for i, s in enumerate(state): if s: j = 1 if s == 'O' else -1 cnt += pt[i] * j elif ret == 'O': cnt = 10. elif ret == 'X': cnt = -10. return cnt