示例#1
0
文件: main.py 项目: nickparser/nnet
class Main(App):
    def build(self):
        self._init_view()
        main_l = BoxLayout(orientation='vertical')

        self.nnet = NNet(n_input=INPUT,
                         n_hidden=HIDDEN,
                         n_output=OUTPUT,
                         learning_rate=LR)
        self._prepare_nnet()

        main_l.add_widget(self._paint_w)
        main_l.add_widget(self._conf_l)
        return main_l

    def _init_view(self):
        self._conf_l = BoxLayout(size_hint=(None, None),
                                 width=WINDOW_WIDTH,
                                 height=CONFIG_HEIGHT)
        self._paint_w = PaintWidget(size_hint=(None, None),
                                    width=WINDOW_WIDTH,
                                    height=PAINT_HEIGHT)
        self._clear_b = Button(text='clear')
        self._clear_b.bind(on_press=self.clear)
        self._query_b = Button(text='query')
        self._query_b.bind(on_press=self.query)

        self._conf_l.add_widget(self._clear_b)
        self._conf_l.add_widget(self._query_b)

    def _prepare_nnet(self):
        try:
            self.nnet.restore(MODEL_PATH)
        except:
            from tensorflow.examples.tutorials.mnist import input_data
            mnist = input_data.read_data_sets('MNIST_data/', one_hot=True)

            for episode in range(EPISODES):
                batch_input, batch_target = mnist.train.next_batch(BATCH_SIZE)

                if episode % 100 == 0: status = True
                else: status = False

                self.nnet.train(batch_input, batch_target, status)

            self.nnet.save(MODEL_PATH)

    def clear(self, instance):
        self._paint_w.clear_canvas()

    def query(self, instance):
        predict = str(
            self.nnet.predict(
                self._paint_w.get_prepared_data(
                    (28, 28)).reshape(1, INPUT) / 255)[0])
        Popup(title='predict',
              content=Label(text=predict),
              size_hint=(None, None),
              size=(200, 200)).open()
test_i_raw = idx2numpy.convert_from_file('mnist\\t10k-images.idx3-ubyte')
test_l_raw = idx2numpy.convert_from_file('mnist\\t10k-labels.idx1-ubyte')
tr_i = tr_i_raw.reshape([len(tr_i_raw), 784]) / 255
test_i = test_i_raw.reshape([len(test_i_raw), 784]) / 255
tr_l = np.full([len(tr_l_raw), 10], 0)
test_l = np.full([len(test_l_raw), 10], 0)
for i in range(len(tr_l_raw)):
    tr_l[i][tr_l_raw[i]] = 1
for i in range(len(test_l_raw)):
    test_l[i][test_l_raw[i]] = 1

nn = NNet(784, 10, layers=[16, 20])
nn.fit(tr_i, tr_l, batch=32, timeout=60 * 0.5, epochs=3)
nn.plot_stats()

res = nn.predict(test_i)
acc_matrix = np.zeros((10, 10))
for i in range(len(test_i)):
    index_predicted = res[i].argmax()
    index_true = test_l[i].argmax()
    if index_predicted != index_true:
        acc_matrix[index_true, index_predicted] += 1

acc_matrix = acc_matrix / len(test_i)
plt.matshow(acc_matrix)
plt.xlabel('predicted values')
plt.ylabel('true values')
plt.xticks(np.arange(0, 10))
plt.yticks(np.arange(0, 10))
plt.colorbar()
plt.show()
示例#3
0
class AlphaZero(AbstractModel):
    def __init__(self, state_encoder, exploration_rate,
                 number_of_mcts_simulations):
        super().__init__(state_encoder)
        self.exploration_rate = exploration_rate
        self.number_of_mcts_simulations = number_of_mcts_simulations
        self.visited_states = set()
        self.examples = []
        self.P = {}  #P[s][a] = policy value for move a in state s
        self.Q = {}  #Q[s][a] = q-value of taking action a from state s
        self.N = {
        }  #N[s][a] = number of times algorithm played action a from state s
        self.is_learning = False
        self.net = NNet(self.input_nodes, self.output_nodes)

    def simulate_game(self, env, depth=0):
        s = self.state_encoder.state_to_vector(env.return_state(False))
        if s not in self.visited_states:
            self.Q[s] = [0] * self.output_nodes
            self.N[s] = [0] * self.output_nodes

        if env.end or depth >= 200:
            return self.get_score_at_end_pos(env.current_player, env.winner)

        if s not in self.visited_states:
            self.visited_states.add(s)
            prediction = self.net.predict(s)
            self.P[s] = prediction[0][0]
            v = prediction[1][0][0]
            print(v)
            return v

        best_action = (-float("inf"), -1)

        for action, avail in enumerate(
                self.state_encoder.available_outputs(env)):
            if avail:
                upper_confidence_bound = self.Q[s][
                    action] + self.exploration_rate * self.P[s][action] * (sum(
                        self.N[s])**0.5) / (1 + self.N[s][action])
                best_action = max(best_action,
                                  (upper_confidence_bound, action))

        action = best_action[1]
        cur_player = env.current_player
        #print("moving", self.output_to_move(action, env.return_state()))
        env.move(self.state_encoder.output_to_move(action, env.return_state()))
        player_after_move = env.current_player
        v = self.simulate_game(env, depth + 1)
        if cur_player != player_after_move:
            v *= -1

        self.Q[s][action] = (self.N[s][action] * self.Q[s][action] +
                             v) / (self.N[s][action] + 1)
        self.N[s][action] += 1
        return v

    def get_pi(self, state):
        ns = np.array(self.N[state])
        if sum(ns) == 0:
            ns = np.ones(self.output_nodes)
        return ns / sum(ns)

    def get_scores_for_each_move(self, env):
        for _ in range(self.number_of_mcts_simulations):
            ecp = env.copy()
            result = self.simulate_game(ecp, 0)
            #print('game_result = ',result)

        s = self.state_encoder.state_to_vector(env.return_state(False))
        pi = self.get_pi(s)
        if self.is_learning:
            self.examples.append((s, pi.copy(), None))
        return pi

    def get_best_move(self, env):
        state = env.return_state()
        aval_vector = self.state_encoder.available_outputs(env)
        raw_prediction = self.get_scores_for_each_move(env)
        prediction = raw_prediction * aval_vector
        assert sum(prediction) > 0
        prediction /= sum(prediction)
        #for i in range(len(prediction)):
        #    assert prediction[i] == raw_prediction[i]
        return self.state_encoder.output_to_move(
            np.random.choice(len(prediction), p=prediction), state)

    def update_model_after_game(self, env):
        if self.is_learning:
            #abs(ex[0][0] - loser) is 1 if current player is winner, 0 is he lost and 0.5 if loser == 0.5
            self.examples = [(ex[0], ex[1], ex[2] if ex[2] is not None else
                              self.get_score_at_end_pos(ex[0][0], env.winner))
                             for ex in self.examples]

    def produce_new_version(self):
        new_net = NNet(self.input_nodes, self.output_nodes)
        new_net.train(self.examples)

        az = AlphaZero(self.state_encoder, self.exploration_rate,
                       self.number_of_mcts_simulations)
        az.net = new_net
        return az

    def get_score_at_end_pos(self, current_player, winner):
        if current_player == winner:
            return 1
        elif current_player == 1 - winner:
            return -1
        return 0