Пример #1
0
def dict_loop(D,env,w,damping=0.001):
    """
    Speedy and memory efficient.
    """
    k = len(w)
    A = {(i,i) : damping for i in xrange(k)}
    b = {}

    for (s,a,r,ns,na) in D:
        features = env.phi(s, a, sparse=True, format='rawdict')
        next = env.linear_policy(w, ns)
        newfeatures = env.phi(ns, next, sparse=True, format='rawdict')

        # for 1-dim array on vals, rows matter
        nf = features.copy()
        for i,v in newfeatures.items():
            nf[i] = nf.get(i,0) - env.gamma * v

        for i,v1 in features.items():
            for j,v2 in nf.items():
                A[i,j] = A.get((i,j), 0) +  v1 * v2
            b[i] = b.get(i,0) + v1 * r

    # convert to sparse matrices since these could be large
    A = sp_create_dict(A,k,k,format='csr')
    b = sp_create_dict(b,k,1,format='csr')
    return A,b
Пример #2
0
def dict_loop(D, env, w, damping=0.001):
    """
    Speedy and memory efficient.
    """
    k = len(w)
    A = {(i, i): damping for i in xrange(k)}
    b = {}

    for (s, a, r, ns, na) in D:
        features = env.phi(s, a, sparse=True, format='rawdict')
        next = env.linear_policy(w, ns)
        newfeatures = env.phi(ns, next, sparse=True, format='rawdict')

        # for 1-dim array on vals, rows matter
        nf = features.copy()
        for i, v in newfeatures.items():
            nf[i] = nf.get(i, 0) - env.gamma * v

        for i, v1 in features.items():
            for j, v2 in nf.items():
                A[i, j] = A.get((i, j), 0) + v1 * v2
            b[i] = b.get(i, 0) + v1 * r

    # convert to sparse matrices since these could be large
    A = sp_create_dict(A, k, k, format='csr')
    b = sp_create_dict(b, k, 1, format='csr')
    return A, b
Пример #3
0
def drmax_loop(D, env, w, track, damping=0.001, rmax=1.0):
    """
    Dictionary rmax loop.
    """
    k = len(w)
    A = {(i,i) : damping for i in xrange(k)}
    b = {}
    grmax = rmax / (1.0 - env.gamma)


    for (s,a,r,ns,na) in D:
        if track.known_pair(s,a) and track.known_state(ns):
            features = env.phi(s, a, sparse=True, format='rawdict')
            next = env.linear_policy(w, ns)
            newfeatures = env.phi(ns, next, sparse=True, format='rawdict')

            nf = features.copy()
            for i,v in newfeatures.items():
                nf[i] = nf.get(i,0) - env.gamma * v

            for i,v1 in features.items():
                for j,v2 in nf.items():
                    A[i,j] = A.get((i,j), 0) +  v1 * v2
                b[i] = b.get(i,0) + v1 * r

        elif track.known_pair(s,a):
            features = env.phi(s, a, sparse=True, format='rawdict')
            for i,v1 in features.items():
                for j,v2 in features.items():
                    A[i,j] = A.get((i,j), 0) + v1 * v2
                b[i] = b.get(i,0) + v1 * (r + env.gamma * grmax)
        
        else:            
            features = env.phi(s, a, sparse=True, format='rawdict')
            for i,v1 in features.items():
                for j,v2 in features.items():
                    A[i,j] = A.get((i,j), 0) + v1 * v2
                b[i] = b.get(i,0) + v1 * grmax

        for una in track.unknown(s):
            features = env.phi(s, una, sparse=True, format='rawdict')
            for i,v1 in features.items():
                for j,v2 in features.items():
                    A[i,j] = A.get((i,j), 0) + v1 * v2
                b[i] = b.get(i,0) + v1 * grmax

    A = sp_create_dict(A,k,k,format='csr')
    b = sp_create_dict(b,k,1,format='csr')
    return A,b
Пример #4
0
def drmax_loop(D, env, w, damping=0.001, rmax=1.0):
    """
    Dictionary rmax loop.
    """
    k = len(w)
    A = {(i, i): damping for i in xrange(k)}
    b = {}
    grmax = rmax / (1.0 - env.gamma)

    for (s, a, r, ns, na) in D:
        if D.known_pair(s, a) and D.known_state(ns):
            features = env.phi(s, a, sparse=True, format='rawdict')
            next = env.linear_policy(w, ns)
            newfeatures = env.phi(ns, next, sparse=True, format='rawdict')

            nf = features.copy()
            for i, v in newfeatures.items():
                nf[i] = nf.get(i, 0) - env.gamma * v

            for i, v1 in features.items():
                for j, v2 in nf.items():
                    A[i, j] = A.get((i, j), 0) + v1 * v2
                b[i] = b.get(i, 0) + v1 * r

        elif D.known_pair(s, a):
            features = env.phi(s, a, sparse=True, format='rawdict')
            for i, v1 in features.items():
                for j, v2 in features.items():
                    A[i, j] = A.get((i, j), 0) + v1 * v2
                b[i] = b.get(i, 0) + v1 * (r + env.gamma * grmax)

        else:
            features = env.phi(s, a, sparse=True, format='rawdict')
            for i, v1 in features.items():
                for j, v2 in features.items():
                    A[i, j] = A.get((i, j), 0) + v1 * v2
                b[i] = b.get(i, 0) + v1 * grmax

        for una in D.unknown(s):
            features = env.phi(s, una, sparse=True, format='rawdict')
            for i, v1 in features.items():
                for j, v2 in features.items():
                    A[i, j] = A.get((i, j), 0) + v1 * v2
                b[i] = b.get(i, 0) + v1 * grmax

    A = sp_create_dict(A, k, k, format='csr')
    b = sp_create_dict(b, k, 1, format='csr')
    return A, b
Пример #5
0
def dict_loop_d(data, env, w, delta_w, damping=0.001):
    """
    Speedy and memory efficient.
    """
    delta_w_len = len(delta_w)
    D = {}
    b = {}

    for (s, a, r, ns, na) in data:
        t0 = time.time()
        features = env.phi(s, a, sparse=True, format='rawdict')
        bf = env.env.phi(s, a, sparse=True, format='rawdict')
        next = env.env.linear_policy(w, ns)
        newfeatures = env.phi(ns, next, sparse=True, format='rawdict')

        # for 1-dim array on vals, rows matter
        nf = features.copy()
        for i, v in newfeatures.items():
            nf[i] = nf.get(i, 0) - env.gamma * v
        
        A = {}
        for i, v1 in bf.items():
            for j, v2 in nf.items():
                A[i, j] = A.get((i, j), 0.0) + v1 * v2
        for k in range(len(w)):
            for l in range(delta_w_len):
                D[k, 0] = D.get((k, 0), 0.0) + A.get((k, l), 0.0) * delta_w[l]

    # convert to sparse matrices since these could be large
    D = sp_create_dict(D, len(w), 1, format='csr')
    return D.toarray()[:, 0]