Exemplo n.º 1
0
    def dqn_play_keras_rl(self):
        """Create 6 players, one of them a trained DQN"""
        env_name = 'neuron_poker-v0'
        stack = 500
        self.env = gym.make(env_name, initial_stacks=stack, render=self.render)
        self.env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=.5))
        self.env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=.8))
        self.env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=.7))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(PlayerShell(name='keras-rl', stack_size=stack))

        self.env.reset()

        dqn = DQNPlayer(load_model='dqn1', env=self.env)
        dqn.play(nb_episodes=self.num_episodes, render=self.render)
Exemplo n.º 2
0
    def dqn_train_keras_rl(self):
        """Implementation of kreras-rl deep q learing."""
        env_name = 'neuron_poker-v0'
        stack = 100
        env = gym.make(env_name,
                       initial_stacks=stack,
                       funds_plot=self.funds_plot,
                       render=self.render,
                       use_cpp_montecarlo=self.use_cpp_montecarlo)

        np.random.seed(123)
        env.seed(123)
        #        env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7))
        env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.3))
        env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        env.add_player(PlayerShell(
            name='keras-rl',
            stack_size=stack))  # shell is used for callback to keras rl

        env.reset()

        dqn = DQNPlayer()
        dqn.initiate_agent(env)
        dqn.train(env_name='dqn1')
Exemplo n.º 3
0
    def dqn_play_keras_rl(self, model_name):
        """Create 6 players, one of them a trained DQN"""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_keras_rl_dqn import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        self.env = gym.make(env_name,
                            initial_stacks=self.stack,
                            render=self.render)
        self.env.add_player(
            EquityPlayer(name='equity/50/50',
                         min_call_equity=.5,
                         min_bet_equity=.5))
        self.env.add_player(
            EquityPlayer(name='equity/50/80',
                         min_call_equity=.8,
                         min_bet_equity=.8))
        self.env.add_player(
            EquityPlayer(name='equity/70/70',
                         min_call_equity=.7,
                         min_bet_equity=.7))
        self.env.add_player(
            EquityPlayer(name='equity/20/30',
                         min_call_equity=.2,
                         min_bet_equity=.3))
        self.env.add_player(RandomPlayer())
        self.env.add_player(PlayerShell(name='keras-rl',
                                        stack_size=self.stack))

        self.env.reset()

        dqn = DQNPlayer(load_model=model_name, env=self.env)
        dqn.play(nb_episodes=self.num_episodes, render=self.render)
Exemplo n.º 4
0
    def dqn_train_keras_rl(self, model_name):
        """Implementation of kreras-rl deep q learing."""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_keras_rl_dqn import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        env = gym.make(env_name,
                       initial_stacks=self.stack,
                       funds_plot=self.funds_plot,
                       render=self.render,
                       use_cpp_montecarlo=self.use_cpp_montecarlo)

        np.random.seed(123)
        env.seed(123)
        # env.add_player(EquityPlayer(name='equity/50/70',
        #                             min_call_equity=.5, min_bet_equity=.7))
        # env.add_player(EquityPlayer(name='equity/20/30',
        #                             min_call_equity=.2, min_bet_equity=.3))
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        # shell is used for callback to keras rl
        env.add_player(
            EquityPlayer(name='equity_default',
                         min_call_equity=.4,
                         min_bet_equity=.5))
        env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack))

        env.reset()

        dqn = DQNPlayer()
        dqn.initiate_agent(env)
        dqn.train(env_name=model_name)
Exemplo n.º 5
0
    def dqn_train_keras_rl(self):
        """Implementation of kreras-rl deep q learing."""
        env_name = 'neuron_poker-v0'
        stack = 2000
        env = gym.make(env_name, initial_stacks=stack, funds_plot=self.funds_plot, render=self.render,
                       use_cpp_montecarlo=self.use_cpp_montecarlo)

        np.random.seed(123)
        env.seed(123)
        #        env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7))
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        # env.add_player(RandomPlayer())
        # env.add_player(PlayerShell(name='keras-rl-1', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        # env.add_player(PlayerShell(name='keras-rl-2', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        # env.add_player(PlayerShell(name='keras-rl-3', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        # env.add_player(PlayerShell(name='keras-rl-4', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        # env.add_player(PlayerShell(name='keras-rl-5', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        # env.add_player(PlayerShell(name='keras-rl-6', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        # env.add_player(PlayerShell(name='keras-rl-7', stack_size=stack), range=0.9)  # shell is used for callback to keras rl
        env.add_player(PlayerShell(name='LJY', stack_size=stack, range=0.33))  # shell is used for callback to keras rl
        # dqn = DQNPlayer(name='DQN-1',stack_size=2000, range=0.9, env=env , load_model=None)
        # env.add_player(dqn)
        env.add_player(RandomPlayer(name='Random-1',range=1))
        # env.add_player(RandomPlayer(name='Random-2',range=1))
        # env.add_player(RandomPlayer(name='Random-3',range=1))
        # env.add_player(RandomPlayer(name='Random-4',range=1))
        # env.add_player(RandomPlayer(name='Random-5',range=1))
        # env.add_player(RandomPlayer(name='Random-6',range=1))
        # env.add_player(RandomPlayer(name='Random-7',range=1))
        # env.add_player(DQNPlayer(name='DQN-2',stack_size=2000, range=0.9, env=env , load_model=None))
        # env.add_player(DQNPlayer(name='DQN-3',stack_size=2000, range=0.9, env=env , load_model=None))
        # env.add_player(DQNPlayer(name='DQN-4',stack_size=2000, range=0.9, env=env , load_model=None))
        # env.add_player(DQNPlayer(name='DQN-5',stack_size=2000, range=0.9, env=env , load_model=None))
        # env.add_player(DQNPlayer(name='DQN-6',stack_size=2000, range=0.9, env=env , load_model=None))
        # env.add_player(DQNPlayer(name='DQN-7',stack_size=2000, range=0.9, env=env , load_model=None))
        # env.add_player(DQNPlayer(name='DQN-8',stack_size=2000, range=0.9, env=env , load_model=None))
        env.reset()
        # print(env.players[0].range)
        # print(env.players[1].range)
        # print(env.players[2].range)
        # print(env.players[3].range)
        # print(env.players[4].range)
        # print(env.players[5].range)
        dqn = DQNPlayer()
        # dqn.initiate_agent(env,load_model='3dqn_vs_3rd')
        dqn.initiate_agent(env)
        dqn.train(ckpt_name='LJY')
Exemplo n.º 6
0
    def dqn_train_heads_up_keras_rl(self, model_name):
        """Implementation of kreras-rl deep q learing."""
        from agents.agent_consider_equity import Player as EquityPlayer
        from agents.agent_keras_rl_dqn import Player as DQNPlayer
        from agents.agent_random import Player as RandomPlayer
        env_name = 'neuron_poker-v0'
        env = gym.make(env_name, initial_stacks=self.stack, funds_plot=self.funds_plot, render=self.render,
                       use_nn_equity=self.use_nn_equity, use_cpp_montecarlo=self.use_cpp_montecarlo)

        # np.random.seed(123)
        # env.seed(123)
        env.add_player(EquityPlayer(name='equity/50/70', min_call_equity=.5, min_bet_equity=.7))
        env.add_player(PlayerShell(name='keras-rl', stack_size=self.stack))  # shell is used for callback to keras rl

        env.reset()

        dqn = DQNPlayer()
        # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name)
        # # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None)
        # dqn.initiate_agent(env, load_memory=None, load_model=None, load_optimizer=None, batch_size=128)
        # dqn.train(env_name=model_name, policy_epsilon=0.9)

        batch_sizes = [128, 128, 128, 128, 128, 128, 128, 128]
        policy_epsilon = [0.1, 0.1,0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
        learn_rate = np.geomspace(1e-2, 1e-4, 5)

        for x in range(10):
            dqn = DQNPlayer()
            # dqn.initiate_agent(env, load_memory=model_name, load_model=model_name, load_optimizer=model_name, batch_size=128)
            dqn.initiate_agent(env, model_name=None, load_memory=None, load_model=None, load_optimizer=None, batch_size=128, learn_rate=learn_rate[x])

            dqn.train(env_name=model_name, policy_epsilon=policy_epsilon[x])