def parse(self): tree = ET.parse(self.file) root = tree.getroot() actiondict = {} statedict = {} for st in root.iter('location'): st_id = st.find('id') state = State(st_id) statedict[st_id] = state for a in root.iter('branchpoint'): a_id = a.find('id') action = Action(a_id) actiondict[a_id] = action for t in root.iter('transition'): pass actionlist = list(actiondict.keys()) statelist = list(statedict.keys()) return statelist, actionlist
from MDP import State, Action, Transition import ValueIter y = 0.2 e = 0.01 reward = 15 #Add states states = { "A": State("A"), "B": State("B"), "C": State("C"), "R": State("R", reward, 1) } #Add actions states["A"].add_action(Action("A", "rt")) states["A"].add_action(Action("A", "up")) states["B"].add_action(Action("B", "lt")) states["B"].add_action(Action("B", "up")) states["C"].add_action(Action("C", "rt")) states["C"].add_action(Action("C", "dn")) #Add transitions A states["A"].actions["rt"].add_transition("B", 0.8, -1) states["A"].actions["rt"].add_transition("A", 0.2, -1) states["A"].actions["up"].add_transition("C", 0.8, -1) states["A"].actions["up"].add_transition("A", 0.2, -1) #Add transitions B states["B"].actions["lt"].add_transition("A", 0.8, -1) states["B"].actions["lt"].add_transition("B", 0.2, -1)
ref[state] = idx idx += 1 # Add actions for p in POSITIONS: for m in MATERIALS: for a in ARROWS: for mm in MM: for h in HEALTH: cur = (p, m, a, mm, h) now = ref[cur] if h == 0: states[now].add_action(Action(now, "NONE")) continue states[now].add_action(Action(now, "STAY")) if p == "W": states[now].add_action(Action(now, "RIGHT")) if a > 0: states[now].add_action(Action(now, "SHOOT")) elif p == "E": states[now].add_action(Action(now, "LEFT")) states[now].add_action(Action(now, "HIT")) if a > 0: states[now].add_action(Action(now, "SHOOT"))
ref[idx] = state idx += 1 # Add actions for p in POSITIONS: for m in MATERIALS: for a in ARROWS: for mm in MM: for h in HEALTH: if h == 0: continue cur = (p, m, a, mm, h) now = ref[cur] states[now].add_action(Action(now, "STAY")) if p == "W": states[now].add_action(Action(now, "RIGHT")) if a > 0: states[now].add_action(Action(now, "SHOOT")) elif p == "E": states[now].add_action(Action(now, "LEFT")) states[now].add_action(Action(now, "HIT")) if a > 0: states[now].add_action(Action(now, "SHOOT")) elif p == "S": states[now].add_action(Action(now, "UP"))
# ------------ # ---- States ---- states_list = [State(1, 5.), State(2, 10), State(3, -7.2)] # ---- MDP ---- disc_fact = 0.5 chain = MDP(states_list, disc_fact) # ---- Actions ---- # ---- transitions_a = np.array([[0.3, 0.6, 0.1], [0.1, 0.6, 0.3], [0.5, 0.0, 0.5]]) reward_a = np.array([5.0, 5.0, 1.0]) a = Action("a", chain, transitions_a, reward_a) # ---- # ---- transitions_b = np.array([[0.0, 0.3, 0.7], [1 / 3, 1 / 3, 1 / 3], [0.0, 0.5, 0.5]]) reward_b = np.array([2.8, 0.0, 10.0]) b = Action("b", chain, transitions_b, reward_b) # ---- # ---- transitions_c = np.array([[0.2, 0.4, 0.4], [0.2, 0.6, 0.2], [1 / 3, 1 / 3, 1 / 3]]) reward_c = np.array([-7.2, -7.2, 0.0]) c = Action("c", chain, transitions_c, reward_c) # ----