Пример #1
0
            model.train_on_batch(x=np.expand_dims(last_state, axis=0),
                                 y=identity[action:action + 1])
            print(
                f"                        training happens: {reward}\nx_pos:{info['x_pos']}"
            )
        elif reward < 0:
            action = env.action_space.sample()
            model.train_on_batch(x=np.expand_dims(last_state, axis=0),
                                 y=identity[action:action + 1])
            print(
                f"                        training happens: {reward}\nx_pos:{info['x_pos']}"
            )

        env.render()

        x_pos = info["x_pos"]
        if x_pos > max_x_pos:
            max_x_pos = x_pos
            io.write_settings("max_x_pos", int(max_x_pos))
            if info["life"] == 2:
                failer_mode = False
                perfect_model = model
                model.save(model_file_path)
        if info["stage"] == 2:
            io.write_settings("max_x_pos", int(max_x_pos))
            model.save(final_model_file_path)
            input("congraduations!")
            exit()

env.close()
Пример #2
0
            last_info = info

        action = 3
        state, reward, done, info = env.step(action)

        history_actions.append(action)
        history_actions = history_actions[-200:]
        history_x_pos.append(info['x_pos'])
        history_x_pos = history_x_pos[-200:]
        history_y_pos.append(info['y_pos'])
        history_y_pos = history_y_pos[-200:]

        if isinstance(last_state, (np.ndarray, np.generic)):
            if len(history_actions) >= 101 and len(history_x_pos) >= 101 and len(history_y_pos) >= 101:
                training_couting += 1
                io.write_settings("training_couting", int(training_couting))

                print(f"                                            learning happend with action: {SIMPLE_MOVEMENT[action]}")
                train_once(last_state, history_actions[-101:-1], history_x_pos[-101:-1], history_y_pos[-101:-1], action, reward)

        temp = 15
        while 1:
            if isinstance(state, (np.ndarray, np.generic)) and info != None:
                last_state = state
                last_info = info

            action = 2
            state, reward, done, info = env.step(action)

            history_actions.append(action)
            history_actions = history_actions[-200:]