Exemplo n.º 1
0
    def policy(self, state, turn, available, epsilon=0.08):
        maxvalue = -99999
        minvalue = 99999
        action_list = []

        if np.random.rand(1) < epsilon:
            action_list = available
        else:
            if turn == 1:
                for i in available:
                    state[i] = turn
                    state = encode(state)
                    if self.value[state] > maxvalue:
                        action_list = []
                        maxvalue = self.value[state]
                        action_list.append(i)
                    elif self.value[state] == maxvalue:
                        action_list.append(i)
                    state = decode(state)
                    state[i] = 0
            else:
                for i in available:
                    state[i] = turn
                    state = encode(state)
                    if self.value[state] < minvalue:
                        action_list = []
                        minvalue = self.value[state]
                        action_list.append(i)
                    elif self.value[state] == minvalue:
                        action_list.append(i)
                    state = decode(state)
                    state[i] = 0

        return random.choice(action_list)
Exemplo n.º 2
0
    def policy(self, state, turn, epsilon=0.08):
        maxvalue = -99999
        minvalue = 99999
        encoded_state = encode(state)
        available = available_actions(state)
        action_list = []

        if np.random.rand(1) < epsilon:
            action_list = available
        else:
            if turn == 1:
                for action in available:
                    encoded = encoded_state + str(action)
                    if self.action_value[encoded] > maxvalue:
                        action_list = []
                        maxvalue = self.action_value[encoded]
                        action_list.append(action)
                    elif self.action_value[encoded] == maxvalue:
                        action_list.append(action)
            else:
                for action in available:
                    encoded = encoded_state + str(action)
                    if self.action_value[encoded] < minvalue:
                        action_list = []
                        minvalue = self.action_value[encoded]
                        action_list.append(action)
                    elif self.action_value[encoded] == minvalue:
                        action_list.append(action)

        return random.choice(action_list)
Exemplo n.º 3
0
    def init_value(self):
        state_list = itertools.product([0, 1, 2], repeat=9)

        for state in state_list:
            state = list(state)
            encoded_state = encode(state)
            available = available_actions(state)

            for action in available:
                encoded = encoded_state + str(action)
                self.action_value[encoded] = 0
Exemplo n.º 4
0
    def init_value(self):
        state_list = itertools.product([0, 1, 2], repeat=9)

        for state in state_list:
            state = list(state)
            encoded = encode(state)
            done, winner = is_finished(state)
            if not done:
                self._value[encoded] = random.uniform(-0.5, 0.5)
                self._policy[encoded] = random.choice(available_actions(state))
            # terminal state value
            else:
                self._value[encoded] = 0
Exemplo n.º 5
0
    def init_value(self):

        state_list = itertools.product([0, 1, 2], repeat=9)

        for state in state_list:
            state = list(state)
            done, winner = is_finished(state)
            encoded = encode(state)
            if not done:
                self.value[encoded] = 0
            elif winner == 1:
                self.value[encoded] = 1
            elif winner == 2:
                self.value[encoded] = -1
            else:
                self.value[encoded] = 0
Exemplo n.º 6
0
 def assign_policy(self, state, x):
     encoded = encode(state)
     self._policy[encoded] = x
Exemplo n.º 7
0
 def policy(self, state):
     encoded = encode(state)
     return self._policy[encoded]
Exemplo n.º 8
0
 def assign_value(self, state, x):
     encoded = encode(state)
     self._value[encoded] = x
Exemplo n.º 9
0
 def value(self, state):
     encoded = encode(state)
     return self._value[encoded]
Exemplo n.º 10
0
def update(agent, state, next_state, learning_rate=0.4):
    state = encode(state)
    next_state = encode(next_state)
    agent.value[state] = agent.value[state] + learning_rate * (
        agent.value[next_state] - agent.value[state])
Exemplo n.º 11
0
 def assign_q(self, state, action, x):
     encoded = encode(state) + str(action)
     self.action_value[encoded] = x
Exemplo n.º 12
0
 def q(self, state, action):
     encoded = encode(state) + str(action)
     return self.action_value[encoded]