Пример #1
0
    def doAction(self, chaser: Agent, target: Agent, grid: GridMap,
                 state: State):

        #状態を記録
        now_st = state.getState(chaser, target)
        self.st.append(now_st)

        #行動の決定
        action = -1
        p = random.random()
        if p < epsilon or (not np.any(
                self.q[now_st[0]][now_st[1]][now_st[2]])):
            while True:
                action = random.randint(0, 3)
                if state.canMoveDirection(target, grid, action):
                    break
        else:
            max_q = -100000000
            for i in range(4):
                if max_q < self.q[now_st[0]][now_st[1]][
                        now_st[2]][i] and state.canMoveDirection(
                            target, grid, i):
                    max_q = self.q[now_st[0]][now_st[1]][now_st[2]][i]
                    action = i

        #行動を実行・記録
        target.Walk(action)
        self.act.append(action)

        #次状態のエージェントを予測
        tmp_chaser = copy.deepcopy(chaser)
        tmp_target = copy.deepcopy(target)
        tmp_state = copy.deepcopy(state)

        tmp_target.Walk(action)
        tmp_chaser.Walk(tmp_state.nextDirection(chaser, target, grid))

        next_st = state.getState(tmp_chaser, tmp_target)

        #Q値更新
        nextMax_q = -100000000
        p = random.random()
        if p < epsilon or (not np.any(
                self.q[next_st[0]][next_st[1]][next_st[2]])):
            while True:
                next_action = random.randint(0, 3)
                if state.canMoveDirection(target, grid, next_action):
                    nextMax_q = nextMax_q = self.q[next_st[0]][next_st[1]][
                        next_st[2]][next_action]
                    break
        else:
            for i in range(4):
                if nextMax_q < self.q[next_st[0]][next_st[1]][
                        next_st[2]][i] and grid.canMove(
                            next_st[0] + dx[i], next_st[1] + dy[i]):
                    nextMax_q = self.q[next_st[0]][next_st[1]][next_st[2]][i]

        self.q[now_st[0]][now_st[1]][now_st[2]][action] = (1 - alpha) * self.q[
            now_st[0]][now_st[1]][now_st[2]][action] + alpha * (self.getReward(
                tmp_chaser, tmp_target, tmp_state) + ganma * nextMax_q)
Пример #2
0
  def greedy_doAction(self, chaser:Agent, target:Agent, grid:GridMap, state:State):
    now_st = state.getState(chaser, target)

    action = 0
    max_q = -100000000
    for i in range(4):
      if max_q < self.q[now_st[0]][now_st[1]][now_st[2]][i] and state.canMoveDirection(target, grid, i):
        max_q = self.q[now_st[0]][now_st[1]][now_st[2]][i]
        action = i
    
    target.Walk(action)
Пример #3
0
 def doAction(self, chaser:Agent, target:Agent, grid:GridMap, state:State):
   
   #状態を記録
   now_st = state.getState(chaser, target)
   self.st.append(now_st)
   
   #行動の決定
   action = -1
   p = random.random()
   if p < epsilon or (not np.any(self.q[now_st[0]][now_st[1]][now_st[2]])):
     while True:
       action = random.randint(0,3)
       if state.canMoveDirection(target, grid, action):
         break
   else:
     max_q = -100000000
     for i in range(4):
       if max_q < self.q[now_st[0]][now_st[1]][now_st[2]][i] and state.canMoveDirection(target, grid, i):
         max_q = self.q[now_st[0]][now_st[1]][now_st[2]][i]
         action = i
   
   #行動を実行・記録
   target.Walk(action)
   self.act.append(action)
Пример #4
0
    def chaseTarget(self, chaser: Agent, target: Agent, grid: GridMap,
                    state: State):

        dire = state.nextDirection(chaser, target, grid)
        chaser.Walk(dire)