class MCPlayer(AbstractPlayer): def __init__(self, max_iter, timeout, log=0, pref=True): self.max_iter = max_iter params['timeout'] = timeout params['log'] = log if not pref: params["prefs"] = False self.h = History() self.last_action = POMDPAction() self.first = True def next_action(self, state): # init domain knowledge if self.first: self.dom_kno = Minesweeper(state.board.h, state.board.w, state.board.m) #self.first = False # update history with last action - observation o = Observation(state.board.clone().knowledge, state.board.m) self.h.add(self.last_action, o) #print(self.h) # launch UCT to select next best action based on current history a = search(self.h.clone(), self.dom_kno, self.max_iter, clean=self.first) if self.first: self.first = False self.last_action = a assert isinstance(a, Action) return a.cell def reset(self): self.h = History() self.last_action = POMDPAction() self.first = True
class TestHistory(unittest.TestCase): def setUp(self): self.b = Board(4, 5, 3) self.s = State(self.b) self.h = History() def test_add(self): a = Action(0, 0) o, r = a.do_on(self.s) self.h.add(a, o) a2 = Action(2, 1) o2, r2 = a2.do_on(self.s) self.h.add(a2, o2) self.assertEqual(self.h.last_action(), a2) #print(o2) #print(self.h.last_obs()) self.assertEqual(self.h.last_obs(), o2) def test_clone(self): a = Action(1, 0) o, r = a.do_on(self.s) self.h.add(a, o) h = self.h.clone() self.assertEqual(h, self.h)