def dict_loop(D,env,w,damping=0.001): """ Speedy and memory efficient. """ k = len(w) A = {(i,i) : damping for i in xrange(k)} b = {} for (s,a,r,ns,na) in D: features = env.phi(s, a, sparse=True, format='rawdict') next = env.linear_policy(w, ns) newfeatures = env.phi(ns, next, sparse=True, format='rawdict') # for 1-dim array on vals, rows matter nf = features.copy() for i,v in newfeatures.items(): nf[i] = nf.get(i,0) - env.gamma * v for i,v1 in features.items(): for j,v2 in nf.items(): A[i,j] = A.get((i,j), 0) + v1 * v2 b[i] = b.get(i,0) + v1 * r # convert to sparse matrices since these could be large A = sp_create_dict(A,k,k,format='csr') b = sp_create_dict(b,k,1,format='csr') return A,b
def dict_loop(D, env, w, damping=0.001): """ Speedy and memory efficient. """ k = len(w) A = {(i, i): damping for i in xrange(k)} b = {} for (s, a, r, ns, na) in D: features = env.phi(s, a, sparse=True, format='rawdict') next = env.linear_policy(w, ns) newfeatures = env.phi(ns, next, sparse=True, format='rawdict') # for 1-dim array on vals, rows matter nf = features.copy() for i, v in newfeatures.items(): nf[i] = nf.get(i, 0) - env.gamma * v for i, v1 in features.items(): for j, v2 in nf.items(): A[i, j] = A.get((i, j), 0) + v1 * v2 b[i] = b.get(i, 0) + v1 * r # convert to sparse matrices since these could be large A = sp_create_dict(A, k, k, format='csr') b = sp_create_dict(b, k, 1, format='csr') return A, b
def drmax_loop(D, env, w, track, damping=0.001, rmax=1.0): """ Dictionary rmax loop. """ k = len(w) A = {(i,i) : damping for i in xrange(k)} b = {} grmax = rmax / (1.0 - env.gamma) for (s,a,r,ns,na) in D: if track.known_pair(s,a) and track.known_state(ns): features = env.phi(s, a, sparse=True, format='rawdict') next = env.linear_policy(w, ns) newfeatures = env.phi(ns, next, sparse=True, format='rawdict') nf = features.copy() for i,v in newfeatures.items(): nf[i] = nf.get(i,0) - env.gamma * v for i,v1 in features.items(): for j,v2 in nf.items(): A[i,j] = A.get((i,j), 0) + v1 * v2 b[i] = b.get(i,0) + v1 * r elif track.known_pair(s,a): features = env.phi(s, a, sparse=True, format='rawdict') for i,v1 in features.items(): for j,v2 in features.items(): A[i,j] = A.get((i,j), 0) + v1 * v2 b[i] = b.get(i,0) + v1 * (r + env.gamma * grmax) else: features = env.phi(s, a, sparse=True, format='rawdict') for i,v1 in features.items(): for j,v2 in features.items(): A[i,j] = A.get((i,j), 0) + v1 * v2 b[i] = b.get(i,0) + v1 * grmax for una in track.unknown(s): features = env.phi(s, una, sparse=True, format='rawdict') for i,v1 in features.items(): for j,v2 in features.items(): A[i,j] = A.get((i,j), 0) + v1 * v2 b[i] = b.get(i,0) + v1 * grmax A = sp_create_dict(A,k,k,format='csr') b = sp_create_dict(b,k,1,format='csr') return A,b
def drmax_loop(D, env, w, damping=0.001, rmax=1.0): """ Dictionary rmax loop. """ k = len(w) A = {(i, i): damping for i in xrange(k)} b = {} grmax = rmax / (1.0 - env.gamma) for (s, a, r, ns, na) in D: if D.known_pair(s, a) and D.known_state(ns): features = env.phi(s, a, sparse=True, format='rawdict') next = env.linear_policy(w, ns) newfeatures = env.phi(ns, next, sparse=True, format='rawdict') nf = features.copy() for i, v in newfeatures.items(): nf[i] = nf.get(i, 0) - env.gamma * v for i, v1 in features.items(): for j, v2 in nf.items(): A[i, j] = A.get((i, j), 0) + v1 * v2 b[i] = b.get(i, 0) + v1 * r elif D.known_pair(s, a): features = env.phi(s, a, sparse=True, format='rawdict') for i, v1 in features.items(): for j, v2 in features.items(): A[i, j] = A.get((i, j), 0) + v1 * v2 b[i] = b.get(i, 0) + v1 * (r + env.gamma * grmax) else: features = env.phi(s, a, sparse=True, format='rawdict') for i, v1 in features.items(): for j, v2 in features.items(): A[i, j] = A.get((i, j), 0) + v1 * v2 b[i] = b.get(i, 0) + v1 * grmax for una in D.unknown(s): features = env.phi(s, una, sparse=True, format='rawdict') for i, v1 in features.items(): for j, v2 in features.items(): A[i, j] = A.get((i, j), 0) + v1 * v2 b[i] = b.get(i, 0) + v1 * grmax A = sp_create_dict(A, k, k, format='csr') b = sp_create_dict(b, k, 1, format='csr') return A, b
def dict_loop_d(data, env, w, delta_w, damping=0.001): """ Speedy and memory efficient. """ delta_w_len = len(delta_w) D = {} b = {} for (s, a, r, ns, na) in data: t0 = time.time() features = env.phi(s, a, sparse=True, format='rawdict') bf = env.env.phi(s, a, sparse=True, format='rawdict') next = env.env.linear_policy(w, ns) newfeatures = env.phi(ns, next, sparse=True, format='rawdict') # for 1-dim array on vals, rows matter nf = features.copy() for i, v in newfeatures.items(): nf[i] = nf.get(i, 0) - env.gamma * v A = {} for i, v1 in bf.items(): for j, v2 in nf.items(): A[i, j] = A.get((i, j), 0.0) + v1 * v2 for k in range(len(w)): for l in range(delta_w_len): D[k, 0] = D.get((k, 0), 0.0) + A.get((k, l), 0.0) * delta_w[l] # convert to sparse matrices since these could be large D = sp_create_dict(D, len(w), 1, format='csr') return D.toarray()[:, 0]