Python Qlearning示例，price_simulator.src.algorithm.agents.tabular.Qlearning Python示例

示例#1

0

显示文件

def test_play_game():
    env = DiscreteSynchronEnvironment(
        demand=LogitDemand(price_sensitivity=1.0, outside_quality=10.0),
        agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)],
        markup=0.0,
        n_prices=10,
        n_periods=1,
    )
    env.play_game()
    assert len(env.possible_prices) == 10
    assert min(env.possible_prices) == min(env.nash_prices)
    assert max(env.possible_prices) == max(env.monopoly_prices)

    env = DiscreteSynchronEnvironment(
        demand=PrisonersDilemmaDemand(),
        agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)],
        possible_prices=[3, 4],
        markup=0.1,
        n_prices=10,
        n_periods=1,
    )
    env.play_game()
    assert len(env.possible_prices) == 2
    assert min(env.possible_prices) == min(env.nash_prices)
    assert max(env.possible_prices) == max(env.monopoly_prices)

示例#2

0

显示文件

def test_environment_prisoners():
    test_1 = DiscreteSynchronEnvironment(
        n_periods=10000,
        possible_prices=[2, 3],
        demand=PrisonersDilemmaDemand(),
        agents=[
            Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)),
            Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)),
        ],
    )

    test_2 = DiscreteSynchronEnvironment(
        n_periods=10,
        possible_prices=[1, 2],
        demand=PrisonersDilemmaDemand(),
        agents=[Qlearning(discount=0.95, learning_rate=0.5, decision=DecreasingEpsilonGreedy()), AlwaysDefectAgent()],
    )

    test_3 = DiscreteSynchronEnvironment(
        n_periods=10000,
        possible_prices=[1, 2],
        demand=PrisonersDilemmaDemand(),
        agents=[
            Qlearning(discount=0.95, learning_rate=0.5, decision=DecreasingEpsilonGreedy()),
            Qlearning(discount=0.5, learning_rate=0.1, decision=DecreasingEpsilonGreedy()),
        ],
    )

    assert test_1.play_game()
    assert test_2.play_game()
    assert test_3.play_game()

示例#3

0

显示文件

文件： test_analyzer.py 项目： mesjou/price_simulator

def test_prepare_profit_calculation():
    env = DiscreteSynchronEnvironment(
        n_periods=1, agents=[Qlearning(), Qlearning(), Qlearning(), Qlearning()], demand=LogitDemand(),
    )
    env.play_game()
    nash_profits, monopoly_profits = analyzer.prepare_profit_calculation(env)
    assert len(nash_profits) == len(env.agents)
    assert len(monopoly_profits) == len(env.agents)
    assert (nash_profits < monopoly_profits).all()

示例#4

0

显示文件

文件： test_policies.py 项目： mesjou/price_simulator

def test_correct_init():
    env = DiscreteSynchronEnvironment(
        n_periods=100,
        n_prices=100,
        history_after=0,
        agents=[
            Qlearning(decision=EpsilonGreedy(eps=1.0)),
            Qlearning(decision=EpsilonGreedy(eps=1.0))
        ],
    )
    env.play_game()
    prices = np.array(env.price_history)
    assert np.all(prices[:, 1] == prices[:, 0]) == False  # noqa E712

示例#5

0

显示文件

def test_init():
    env = DiscreteSynchronEnvironment(
        demand=LogitDemand(price_sensitivity=1.0, outside_quality=0.0),
        agents=[Qlearning(quality=1.0, marginal_cost=0.0), Qlearning(quality=1.0, marginal_cost=0.0)],
    )
    assert max(env.monopoly_prices) > min(env.nash_prices)
    assert sum(np.greater(env.nash_prices, env.monopoly_prices)) == 0

    env = DiscreteSynchronEnvironment(
        demand=LogitDemand(price_sensitivity=1.0, outside_quality=10.0),
        agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)],
    )
    assert max(env.monopoly_prices) > min(env.nash_prices)
    assert sum(np.greater(env.nash_prices, env.monopoly_prices)) == 0

    env = DiscreteSynchronEnvironment(
        demand=PrisonersDilemmaDemand(),
        agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)],
        possible_prices=[2, 3],
    )
    assert (env.monopoly_prices == np.array([3, 3])).all()
    assert (env.nash_prices == np.array([2, 2])).all()

    with pytest.raises(AssertionError):
        DiscreteSynchronEnvironment(
            demand=PrisonersDilemmaDemand(),
            agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)],
        )

示例#6

0

显示文件

def test_environment_advanced_qlearning():
    test_1 = DiscreteSynchronEnvironment(
        n_periods=10000,
        possible_prices=[2, 3],
        demand=LogitDemand(),
        agents=[
            Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)),
            Qlearning(
                discount=0.95, learning_rate=0.3, marginal_cost=4.0, quality=5.0, decision=EpsilonGreedy(eps=0.1)
            ),
            AlwaysDefectAgent(marginal_cost=0.1),
        ],
    )

    assert test_1.play_game()

示例#7

0

显示文件

文件： main.py 项目： mesjou/price_simulator

def run():
    dqn_env = DiscreteSynchronEnvironment(
        markup=0.1,
        n_periods=100,
        possible_prices=[],
        n_prices=15,
        demand=LogitDemand(outside_quality=0.0, price_sensitivity=0.25),
        history_after=50,
        agents=[
            DiffDQN(
                discount=0.95,
                learning_rate=0.001,
                decision=DecreasingEpsilonGreedy(),
                marginal_cost=1.0,
                quality=2.0,
            ),
            Qlearning(
                discount=0.95,
                learning_rate=0.125,
                decision=DecreasingEpsilonGreedy(),
                marginal_cost=1.0,
                quality=2.0,
            ),
            AlwaysDefectAgent(marginal_cost=1.0, quality=2.0),
        ],
    )
    dqn_env.play_game()
    Analyzer.analyze(dqn_env)

示例#8

0

显示文件

文件： test_analyzer.py 项目： mesjou/price_simulator

def test_analyze():
    env = DiscreteSynchronEnvironment(
        agents=[
            Qlearning(marginal_cost=0.0),
            Qlearning(marginal_cost=0.0),
            Qlearning(marginal_cost=1.0),
            Qlearning(marginal_cost=1.0),
        ],
        demand=ConstantDemand(),
    )
    env.nash_prices = [1.0, 4.0, 1.0, 4.0]
    env.monopoly_prices = [2.0, 6.0, 2.0, 6.0]
    env.agents[0].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0]
    env.agents[1].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0]
    env.agents[2].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0]
    env.agents[3].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0]
    average_profits = [mean(agent.rewards) for agent in env.agents]
    nash_profits, monopoly_profits = analyzer.prepare_profit_calculation(env)
    collusion_profits = analyzer.get_collusion_for(average_profits, nash_profits, monopoly_profits)
    assert (collusion_profits == np.array([1.0, -1.0, 2.0, -0.5])).all()

    env = DiscreteSynchronEnvironment(
        agents=[Qlearning(marginal_cost=1.0), Qlearning(marginal_cost=0.0)], demand=ConstantDemand()
    )
    env.nash_prices = [1.0, 1.0]
    env.monopoly_prices = [2.0, 4.0]
    env.agents[0].rewards = [1.5, 1.5, 1.5, 1.5, 1.5, 1.5]
    env.agents[1].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0]
    average_profits = [mean(agent.rewards) for agent in env.agents]
    nash_profits, monopoly_profits = analyzer.prepare_profit_calculation(env)
    collusion_profits = analyzer.get_collusion_for(average_profits, nash_profits, monopoly_profits)
    assert (collusion_profits == np.array([1.5, 1 / 3])).all()

示例#9

0

显示文件

文件： test_tabular.py 项目： mesjou/price_simulator

def test_play_price():
    agent = Qlearning(decision=EpsilonGreedy(eps=0.0))
    p = agent.play_price((1.0, 1.0), [1.0, 2.0], 0, 0)
    assert p == 1.0 or p == 2.0

示例#10

0

显示文件

文件： test_tabular.py 项目： mesjou/price_simulator

def test_learn():
    q_matrix = {
        (1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
    }

    # no reward
    agent = Qlearning(q_matrix=copy.deepcopy(q_matrix),
                      discount=0.95,
                      learning_rate=0.1)
    agent.learn(
        reward=0.0,
        state=(1.0, 1.0),
        action=1.0,
        next_state=(1.0, 1.0),
        action_space=[],
        previous_reward=0.0,
        previous_action=0.0,
        previous_state=(None, ),
    )
    assert agent.q_matrix == q_matrix

    # learned nothing
    agent = Qlearning(q_matrix=copy.deepcopy(q_matrix),
                      discount=0.95,
                      learning_rate=0.0)
    agent.learn(
        reward=10.0,
        state=(1.0, 1.0),
        action=1.0,
        next_state=(1.0, 1.0),
        action_space=[],
        previous_reward=0.0,
        previous_action=0.0,
        previous_state=(None, ),
    )
    assert agent.q_matrix == q_matrix

    q_matrix = {
        (1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (1.0, 2.0): {
            1.0: 5.0,
            2.0: 0.0
        },
        (2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
    }

    # future has no meaning
    agent = Qlearning(q_matrix=copy.deepcopy(q_matrix),
                      discount=0.0,
                      learning_rate=0.9)
    agent.learn(
        reward=10.0,
        state=(1.0, 1.0),
        action=1.0,
        next_state=(1.0, 2.0),
        action_space=[],
        previous_reward=0.0,
        previous_action=0.0,
        previous_state=(None, ),
    )
    assert agent.q_matrix[(1.0, 1.0)][1.0] == 9.0
    assert agent.q_matrix == {
        (1.0, 1.0): {
            1.0: 9.0,
            2.0: 0.0
        },
        (1.0, 2.0): {
            1.0: 5.0,
            2.0: 0.0
        },
        (2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
    }

    # future has meaning
    agent = Qlearning(q_matrix=copy.deepcopy(q_matrix),
                      discount=1.0,
                      learning_rate=0.5)
    agent.learn(
        reward=10.0,
        state=(1.0, 1.0),
        action=1.0,
        next_state=(1.0, 2.0),
        action_space=[],
        previous_reward=0.0,
        previous_action=0.0,
        previous_state=(None, ),
    )
    assert agent.q_matrix[(1.0, 1.0)][1.0] == 7.5
    assert agent.q_matrix == {
        (1.0, 1.0): {
            1.0: 7.5,
            2.0: 0.0
        },
        (1.0, 2.0): {
            1.0: 5.0,
            2.0: 0.0
        },
        (2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
    }

示例#11

0

显示文件

文件： test_tabular.py 项目： mesjou/price_simulator

def test_initialize_q_matrix():
    # 1 action
    q_matrix = Qlearning().initialize_q_matrix(n_agents=1, actions_space=[1.0])
    assert q_matrix == {(1.0, ): {1.0: 0.0}}
    q_matrix = Qlearning().initialize_q_matrix(n_agents=2, actions_space=[1.0])
    assert q_matrix == {(1.0, 1.0): {1.0: 0.0}}
    q_matrix = Qlearning().initialize_q_matrix(n_agents=3, actions_space=[1.0])
    assert q_matrix == {(1.0, 1.0, 1.0): {1.0: 0.0}}

    # 2 actions
    q_matrix = Qlearning().initialize_q_matrix(n_agents=1,
                                               actions_space=[1.0, 2.0])
    assert q_matrix == {
        (1.0, ): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, ): {
            1.0: 0.0,
            2.0: 0.0
        }
    }
    q_matrix = Qlearning().initialize_q_matrix(n_agents=2,
                                               actions_space=[1.0, 2.0])
    assert q_matrix == {
        (1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
    }
    q_matrix = Qlearning().initialize_q_matrix(n_agents=3,
                                               actions_space=[1.0, 2.0])
    assert q_matrix == {
        (1.0, 1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (1.0, 2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (1.0, 2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (1.0, 1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
        (2.0, 1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0
        },
    }

    # 3 actions
    q_matrix = Qlearning().initialize_q_matrix(n_agents=1,
                                               actions_space=[1.0, 2.0, 3.0])
    assert q_matrix == {
        (1.0, ): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, ): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, ): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
    }
    q_matrix = Qlearning().initialize_q_matrix(n_agents=2,
                                               actions_space=[1.0, 2.0, 3.0])
    assert q_matrix == {
        (1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
    }
    q_matrix = Qlearning().initialize_q_matrix(n_agents=3,
                                               actions_space=[1.0, 2.0, 3.0])
    assert q_matrix == {
        (1.0, 1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 1.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 2.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 3.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 3.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 3.0, 1.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 1.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 2.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 3.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 3.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 3.0, 2.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 1.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 1.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 1.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 2.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 2.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 2.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (1.0, 3.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (2.0, 3.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
        (3.0, 3.0, 3.0): {
            1.0: 0.0,
            2.0: 0.0,
            3.0: 0.0
        },
    }