def test_play_game(): env = DiscreteSynchronEnvironment( demand=LogitDemand(price_sensitivity=1.0, outside_quality=10.0), agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)], markup=0.0, n_prices=10, n_periods=1, ) env.play_game() assert len(env.possible_prices) == 10 assert min(env.possible_prices) == min(env.nash_prices) assert max(env.possible_prices) == max(env.monopoly_prices) env = DiscreteSynchronEnvironment( demand=PrisonersDilemmaDemand(), agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)], possible_prices=[3, 4], markup=0.1, n_prices=10, n_periods=1, ) env.play_game() assert len(env.possible_prices) == 2 assert min(env.possible_prices) == min(env.nash_prices) assert max(env.possible_prices) == max(env.monopoly_prices)
def test_environment_prisoners(): test_1 = DiscreteSynchronEnvironment( n_periods=10000, possible_prices=[2, 3], demand=PrisonersDilemmaDemand(), agents=[ Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)), Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)), ], ) test_2 = DiscreteSynchronEnvironment( n_periods=10, possible_prices=[1, 2], demand=PrisonersDilemmaDemand(), agents=[Qlearning(discount=0.95, learning_rate=0.5, decision=DecreasingEpsilonGreedy()), AlwaysDefectAgent()], ) test_3 = DiscreteSynchronEnvironment( n_periods=10000, possible_prices=[1, 2], demand=PrisonersDilemmaDemand(), agents=[ Qlearning(discount=0.95, learning_rate=0.5, decision=DecreasingEpsilonGreedy()), Qlearning(discount=0.5, learning_rate=0.1, decision=DecreasingEpsilonGreedy()), ], ) assert test_1.play_game() assert test_2.play_game() assert test_3.play_game()
def test_prepare_profit_calculation(): env = DiscreteSynchronEnvironment( n_periods=1, agents=[Qlearning(), Qlearning(), Qlearning(), Qlearning()], demand=LogitDemand(), ) env.play_game() nash_profits, monopoly_profits = analyzer.prepare_profit_calculation(env) assert len(nash_profits) == len(env.agents) assert len(monopoly_profits) == len(env.agents) assert (nash_profits < monopoly_profits).all()
def test_correct_init(): env = DiscreteSynchronEnvironment( n_periods=100, n_prices=100, history_after=0, agents=[ Qlearning(decision=EpsilonGreedy(eps=1.0)), Qlearning(decision=EpsilonGreedy(eps=1.0)) ], ) env.play_game() prices = np.array(env.price_history) assert np.all(prices[:, 1] == prices[:, 0]) == False # noqa E712
def test_init(): env = DiscreteSynchronEnvironment( demand=LogitDemand(price_sensitivity=1.0, outside_quality=0.0), agents=[Qlearning(quality=1.0, marginal_cost=0.0), Qlearning(quality=1.0, marginal_cost=0.0)], ) assert max(env.monopoly_prices) > min(env.nash_prices) assert sum(np.greater(env.nash_prices, env.monopoly_prices)) == 0 env = DiscreteSynchronEnvironment( demand=LogitDemand(price_sensitivity=1.0, outside_quality=10.0), agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)], ) assert max(env.monopoly_prices) > min(env.nash_prices) assert sum(np.greater(env.nash_prices, env.monopoly_prices)) == 0 env = DiscreteSynchronEnvironment( demand=PrisonersDilemmaDemand(), agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)], possible_prices=[2, 3], ) assert (env.monopoly_prices == np.array([3, 3])).all() assert (env.nash_prices == np.array([2, 2])).all() with pytest.raises(AssertionError): DiscreteSynchronEnvironment( demand=PrisonersDilemmaDemand(), agents=[Qlearning(quality=10.0, marginal_cost=5.0), Qlearning(quality=10.0, marginal_cost=1.0)], )
def test_environment_advanced_qlearning(): test_1 = DiscreteSynchronEnvironment( n_periods=10000, possible_prices=[2, 3], demand=LogitDemand(), agents=[ Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)), Qlearning( discount=0.95, learning_rate=0.3, marginal_cost=4.0, quality=5.0, decision=EpsilonGreedy(eps=0.1) ), AlwaysDefectAgent(marginal_cost=0.1), ], ) assert test_1.play_game()
def run(): dqn_env = DiscreteSynchronEnvironment( markup=0.1, n_periods=100, possible_prices=[], n_prices=15, demand=LogitDemand(outside_quality=0.0, price_sensitivity=0.25), history_after=50, agents=[ DiffDQN( discount=0.95, learning_rate=0.001, decision=DecreasingEpsilonGreedy(), marginal_cost=1.0, quality=2.0, ), Qlearning( discount=0.95, learning_rate=0.125, decision=DecreasingEpsilonGreedy(), marginal_cost=1.0, quality=2.0, ), AlwaysDefectAgent(marginal_cost=1.0, quality=2.0), ], ) dqn_env.play_game() Analyzer.analyze(dqn_env)
def test_analyze(): env = DiscreteSynchronEnvironment( agents=[ Qlearning(marginal_cost=0.0), Qlearning(marginal_cost=0.0), Qlearning(marginal_cost=1.0), Qlearning(marginal_cost=1.0), ], demand=ConstantDemand(), ) env.nash_prices = [1.0, 4.0, 1.0, 4.0] env.monopoly_prices = [2.0, 6.0, 2.0, 6.0] env.agents[0].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0] env.agents[1].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0] env.agents[2].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0] env.agents[3].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0] average_profits = [mean(agent.rewards) for agent in env.agents] nash_profits, monopoly_profits = analyzer.prepare_profit_calculation(env) collusion_profits = analyzer.get_collusion_for(average_profits, nash_profits, monopoly_profits) assert (collusion_profits == np.array([1.0, -1.0, 2.0, -0.5])).all() env = DiscreteSynchronEnvironment( agents=[Qlearning(marginal_cost=1.0), Qlearning(marginal_cost=0.0)], demand=ConstantDemand() ) env.nash_prices = [1.0, 1.0] env.monopoly_prices = [2.0, 4.0] env.agents[0].rewards = [1.5, 1.5, 1.5, 1.5, 1.5, 1.5] env.agents[1].rewards = [2.0, 2.0, 3.0, 3.0, 1.0, 1.0] average_profits = [mean(agent.rewards) for agent in env.agents] nash_profits, monopoly_profits = analyzer.prepare_profit_calculation(env) collusion_profits = analyzer.get_collusion_for(average_profits, nash_profits, monopoly_profits) assert (collusion_profits == np.array([1.5, 1 / 3])).all()
def test_play_price(): agent = Qlearning(decision=EpsilonGreedy(eps=0.0)) p = agent.play_price((1.0, 1.0), [1.0, 2.0], 0, 0) assert p == 1.0 or p == 2.0
def test_learn(): q_matrix = { (1.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (1.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, } # no reward agent = Qlearning(q_matrix=copy.deepcopy(q_matrix), discount=0.95, learning_rate=0.1) agent.learn( reward=0.0, state=(1.0, 1.0), action=1.0, next_state=(1.0, 1.0), action_space=[], previous_reward=0.0, previous_action=0.0, previous_state=(None, ), ) assert agent.q_matrix == q_matrix # learned nothing agent = Qlearning(q_matrix=copy.deepcopy(q_matrix), discount=0.95, learning_rate=0.0) agent.learn( reward=10.0, state=(1.0, 1.0), action=1.0, next_state=(1.0, 1.0), action_space=[], previous_reward=0.0, previous_action=0.0, previous_state=(None, ), ) assert agent.q_matrix == q_matrix q_matrix = { (1.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (1.0, 2.0): { 1.0: 5.0, 2.0: 0.0 }, (2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, } # future has no meaning agent = Qlearning(q_matrix=copy.deepcopy(q_matrix), discount=0.0, learning_rate=0.9) agent.learn( reward=10.0, state=(1.0, 1.0), action=1.0, next_state=(1.0, 2.0), action_space=[], previous_reward=0.0, previous_action=0.0, previous_state=(None, ), ) assert agent.q_matrix[(1.0, 1.0)][1.0] == 9.0 assert agent.q_matrix == { (1.0, 1.0): { 1.0: 9.0, 2.0: 0.0 }, (1.0, 2.0): { 1.0: 5.0, 2.0: 0.0 }, (2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, } # future has meaning agent = Qlearning(q_matrix=copy.deepcopy(q_matrix), discount=1.0, learning_rate=0.5) agent.learn( reward=10.0, state=(1.0, 1.0), action=1.0, next_state=(1.0, 2.0), action_space=[], previous_reward=0.0, previous_action=0.0, previous_state=(None, ), ) assert agent.q_matrix[(1.0, 1.0)][1.0] == 7.5 assert agent.q_matrix == { (1.0, 1.0): { 1.0: 7.5, 2.0: 0.0 }, (1.0, 2.0): { 1.0: 5.0, 2.0: 0.0 }, (2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, }
def test_initialize_q_matrix(): # 1 action q_matrix = Qlearning().initialize_q_matrix(n_agents=1, actions_space=[1.0]) assert q_matrix == {(1.0, ): {1.0: 0.0}} q_matrix = Qlearning().initialize_q_matrix(n_agents=2, actions_space=[1.0]) assert q_matrix == {(1.0, 1.0): {1.0: 0.0}} q_matrix = Qlearning().initialize_q_matrix(n_agents=3, actions_space=[1.0]) assert q_matrix == {(1.0, 1.0, 1.0): {1.0: 0.0}} # 2 actions q_matrix = Qlearning().initialize_q_matrix(n_agents=1, actions_space=[1.0, 2.0]) assert q_matrix == { (1.0, ): { 1.0: 0.0, 2.0: 0.0 }, (2.0, ): { 1.0: 0.0, 2.0: 0.0 } } q_matrix = Qlearning().initialize_q_matrix(n_agents=2, actions_space=[1.0, 2.0]) assert q_matrix == { (1.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (1.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, } q_matrix = Qlearning().initialize_q_matrix(n_agents=3, actions_space=[1.0, 2.0]) assert q_matrix == { (1.0, 1.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (1.0, 2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (1.0, 2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, (1.0, 1.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 1.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0, 1.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 2.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, (2.0, 1.0, 2.0): { 1.0: 0.0, 2.0: 0.0 }, } # 3 actions q_matrix = Qlearning().initialize_q_matrix(n_agents=1, actions_space=[1.0, 2.0, 3.0]) assert q_matrix == { (1.0, ): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, ): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, ): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, } q_matrix = Qlearning().initialize_q_matrix(n_agents=2, actions_space=[1.0, 2.0, 3.0]) assert q_matrix == { (1.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, } q_matrix = Qlearning().initialize_q_matrix(n_agents=3, actions_space=[1.0, 2.0, 3.0]) assert q_matrix == { (1.0, 1.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 1.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 1.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 2.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 2.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 2.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 3.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 3.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 3.0, 1.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 1.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 1.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 1.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 2.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 2.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 2.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 3.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 3.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 3.0, 2.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 1.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 1.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 1.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 2.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 2.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 2.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (1.0, 3.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (2.0, 3.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, (3.0, 3.0, 3.0): { 1.0: 0.0, 2.0: 0.0, 3.0: 0.0 }, }