#bidders = [SimpleBidder(i, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
#           for i in range(num_bidders)]
# bidders = [WeberBidder(i, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
#           for i in range(num_bidders)]
learner = MDPBidderUAIAugS(num_bidders, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
learner.learn_auction_parameters(bidders, num_mc)

"""
print('Transitions')
for k in learner.T.keys():
    if learner.T[k] > 0.000000001:
        print(k[0], '    ', k[1], '    ', k[2], '    ', learner.T[k])
"""

# Check that this runs
learner.valuations = [.1, .1]
learner.calc_expected_rewards()
learner.solve_mdp()
final_round_truthful = learner.is_bidding_valuation_in_final_round()
print('Bidding truthfully in final round =', final_round_truthful)

# Compare against a bidder
bidders[0].reset()
num_trials = 100
b0 = [0] * num_trials
lb0 = [0] * num_trials
lb10 = [0] * num_trials
lb11 = [0] * num_trials
val0 = [0] * num_trials
val1 = [0] * num_trials
for r in range(num_trials):
Пример #2
0
num_rounds = 2
num_bidders = 2

# Valuations
possible_types = [3, 10]
type_dist_disc = True
type_dist = [.8, .2]

# Learn the MDP
num_mc = 10000
bidders = [SimpleBidder(i, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
           for i in range(num_bidders)]
learner = MDPBidderUAIAugS(num_bidders, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
# learner = MDPBidderUAI(num_bidders, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
learner.learn_auction_parameters(bidders, num_mc)
learner.valuations = copy.deepcopy(bidders[1].valuations)
learner.calc_expected_rewards()
learner.solve_mdp()
print('Test if the bidder bids truthfully')
final_round_truthful = learner.is_bidding_valuation_in_final_round()
print('Bidding truthfully in final round =', final_round_truthful)

# Display what was learned
print('Transitions: state \t action \t next state \t probability')
sorted_keys = list(learner.T.keys())
sorted_keys.sort()
for k in sorted_keys:
    print(k[0], '\t', k[1], '\t', k[2], '\t', learner.T[k])

# See how the learner performs
policies = {}