def to_instance(game): ((index,U),demonstrated) = game (M,actions,_) = index N = len(actions) K = U[0].shape[1] deviation_offset = [0]*N offset = 0 for i in range(N): deviation_offset[i] = offset offset = offset + actions[i]*(actions[i]-1) deviations = offset A = [] row = [] col = [] for outcome in range(M): for i in range(N): action = indexing.unindex(index, outcome) x = action[i] for y in range(actions[i]): if x != y: outcomep = indexing.reindex(index, outcome, i, y) deviation = deviation_offset[i] + (actions[i]-1)*x + y if x < y: deviation = deviation - 1 for k in range(K): A.append(U[i][outcomep,k] - U[i][outcome,k]) row.append(outcome) col.append(K*deviation+k) return (demonstrated, K, scipy.sparse.csr_matrix((A,(row,col)), shape=(M, K*deviations)))
def solve(game, c, w, opt): (index, U) = game (M, actions, _) = index N = len(actions) K = U[0].shape[1] deviation_offset = [0] * N offset = 0 for i in range(N): deviation_offset[i] = offset offset = offset + actions[i] * (actions[i] - 1) deviations = offset Uw = zeros((M, N)) for outcome in range(M): for i in range(N): Uw[outcome, i] = dot(U[i][outcome], w) cp = zeros(M) for outcome in range(M): for i in range(N): cp[outcome] += c[i] * Uw[outcome, i] R = zeros((M, deviations)) for outcome in range(M): for i in range(N): action = indexing.unindex(index, outcome) x = action[i] for y in range(actions[i]): if x != y: outcomep = indexing.reindex(index, outcome, i, y) deviation = deviation_offset[i] + (actions[i] - 1) * x + y if x < y: deviation = deviation - 1 R[outcome, deviation] = Uw[outcomep, i] - Uw[outcome, i] def predict(x): regrets = cp - dot(R, x) offset = regrets.max() sigma = exp(regrets - offset) Z = sum(sigma) return sigma / Z def gradient(x): sigma = predict(x) g = dot(sigma, R) return g return predict(opt(deviations, gradient))
def solve(game, c, w, opt): (index,U) = game (M,actions,_) = index N = len(actions) K = U[0].shape[1] deviation_offset = [0]*N offset = 0 for i in range(N): deviation_offset[i] = offset offset = offset + actions[i]*(actions[i]-1) deviations = offset Uw = zeros((M, N)) for outcome in range(M): for i in range(N): Uw[outcome,i] = dot(U[i][outcome], w) cp = zeros(M) for outcome in range(M): for i in range(N): cp[outcome] += c[i]*Uw[outcome,i] R = zeros((M, deviations)) for outcome in range(M): for i in range(N): action = indexing.unindex(index, outcome) x = action[i] for y in range(actions[i]): if x != y: outcomep = indexing.reindex(index, outcome, i, y) deviation = deviation_offset[i] + (actions[i]-1)*x + y if x < y: deviation = deviation - 1 R[outcome,deviation] = Uw[outcomep,i] - Uw[outcome,i] def predict(x): regrets = cp - dot(R,x) offset = regrets.max() sigma = exp(regrets-offset) Z = sum(sigma) return sigma/Z def gradient(x): sigma = predict(x) g = dot(sigma, R) return g return predict(opt(deviations, gradient))
def create(N): assert N > 1 actions = [4] * N idx = indexing.create(actions) (M, _, _) = idx U = [zeros((M, 4)) for i in range(N)] for outcome in range(M): a = indexing.unindex(idx, outcome) left = 0 back = 0 for x in a: if x % 2 == 0: left += 1 if x // 2 == 0: back += 1 v = array([1.5 + .2 * N, 9.0, 1.0 / 8.0 + 0.04 * N, 7 + 0.4 * N]) for i in range(N): if a[i] % 2 == 0: u = v + array([1.0, 1.5, 1.0 / 20.0, 2.0]) else: u = v + array([ 1.0 + 2.0 * left, 1.0, 1.0 / 20.0 + 0.04 * left, 1.5 + .4 * left ]) if a[i] // 2 == 0: u = v + array([ 6.0 + 0.5 * back, 12.0, 1.0 / 7.0 + 0.01 * back, 10.0 + 3.0 * back ]) else: u = v + array([2.0, 20.0, 1.0 / 8.0, 15.0]) U[i][outcome, :] = -u return (idx, U)
def to_instance(game): ((index, U), demonstrated) = game (M, actions, _) = index N = len(actions) K = U[0].shape[1] deviation_offset = [0] * N offset = 0 for i in range(N): deviation_offset[i] = offset offset = offset + actions[i] * (actions[i] - 1) deviations = offset A = [] row = [] col = [] for outcome in range(M): for i in range(N): action = indexing.unindex(index, outcome) x = action[i] for y in range(actions[i]): if x != y: outcomep = indexing.reindex(index, outcome, i, y) deviation = deviation_offset[i] + (actions[i] - 1) * x + y if x < y: deviation = deviation - 1 for k in range(K): A.append(U[i][outcomep, k] - U[i][outcome, k]) row.append(outcome) col.append(K * deviation + k) return (demonstrated, K, scipy.sparse.csr_matrix((A, (row, col)), shape=(M, K * deviations)))