Exemplo n.º 1
0
 def runTest(self):
     # test cases are taken from Particle Data Group table:
     # http://pdg.lbl.gov/2015/reviews/rpp2014-rev-clebsch-gordan-coefs.pdf
     test_cases = (
         (1/2, 1/2, 1/2, -1/2, 0, 0, sqrt(1/2)),
         (2, 1/2, 1, -1/2, 3/2, 1/2, sqrt(3/5)),
         (2, 1/2, -2, 1/2, 1.5, -3/2, -sqrt(4/5)),
         (1, 1/2, 0, -1/2, 3/2, -1/2, sqrt(2/3)),
         (3/2, 1/2, 3/2, -1/2, 2, 1, sqrt(1/4)),
         (3/2, 1, 3/2, 0, 5/2, 3/2, sqrt(2/5)),
         (2, 1, 2, -1, 3, 1, sqrt(1/15)),
         (1, 1, 0, 0, 0, 0, -sqrt(1/3)),
         (3/2, 1, -3/2, -1, 5/2, -5/2, 1),
         (2, 3/2, 2, -3/2, 5/2, 1/2, sqrt(6/35)),
         (3/2, 3/2, -1/2, 1/2, 0, 0, sqrt(1/4)),
         (3/2, 3/2, -3/2, 1/2, 1, -1, sqrt(3/10)),
         (2, 3/2, 0, -1/2, 1/2, -1/2, -sqrt(1/5)),
         (2, 2, 2, -2, 4, 0, sqrt(1/70)),
         (2, 2, 1, -2, 4, -1, sqrt(1/14)),
         (2, 2, -2, 1, 3, -1, -sqrt(3/10)),
         (2, 2, -1, -1, 3, -2, 0),
         )
         
     for case in test_cases:
         j1, j2, m1, m2, j, m, test_cg = case
         my_cg = cg.cg(j1,j2,m1,m2,j,m)
         print("< %g %g ; %g %g | %g %g > = " % case[:-1])
         print("    (calc), (expected) %.16g %.16g" % 
               (my_cg, test_cg))
         numpy.testing.assert_approx_equal(test_cg, my_cg, 14)
         # test permutation symmetry:
         perm_cg = cg.cg(j2,j1,m2,m1,j,m)
         numpy.testing.assert_approx_equal(
             (-1)**(j-j1-j2)*test_cg,
             perm_cg, 14)
Exemplo n.º 2
0
def test5():
    x = 'A[Mastering complex systems]\
    B[Enhance requirements engineering methods] B->A\
    C[Methods evolutivity] C->B\
    D[Extended enterprise] D->B\
    E[User-friendliness] E->B\
    F[Take into account certain NFRs] F->B\
    G[Enhance architectural design methods] G->A\
    H[Take zigzags into account (?)] H->G H->B\
    FLOSS->C\
    Extendability->C\
    K[Collaboration support] K->D K->E\
    L[Viewpoint-based language] L->E L->F\
    [Requirements evolution]->B\
    [Validate requirements as early as possible]->B\
    [Safe method]->B'
    #x = 'A B A->B'
    x = 'A B C D A->B C->D A->C B->D'
    mygraph = cg.cg(x,{})
    mygraph.layout(10,100)
    mygraph = cg.cg(x,{})
    print 'k=%s'%mygraph.get_k()
    for i in range (2,2):
        n,sum = 0,0
        for j in range(10):
            n+=1
            mygraph = cg(x,{})
            sum += mygraph.layout(i,40)
        print '%s %s'%(i,sum/n)        
Exemplo n.º 3
0
    def solve(self, prev_positions=None):
        Csize = 2 * self.i_indices.size
        # add damping
        self.rows.append(self.base_row_idx + np.arange(Csize))
        self.cols.append(np.arange(Csize))
        self.lvals.append(np.ones(Csize) * 0.001)
        self.rvals.append(np.zeros(Csize))

        x0 = np.hstack((v.ravel() for v in prev_positions))

        # build matrix
        rows = np.hstack([r.ravel() for r in self.rows])
        cols = np.hstack([c.ravel() for c in self.cols])
        lvals = np.hstack([l.ravel() for l in self.lvals])
        rvals = np.hstack([r.ravel() for r in self.rvals])
        Rsize = rows.max() + 1
        M = sparse.coo_matrix((lvals, (rows, cols)),
                              (Rsize, Csize)).tocsr()
        t0 = time.time()
        # V = linalg.lsqr(M.T * M, M.T * rvals.reshape((-1, 1)), show=False, damp=0.001)
        V = cg.cg(M.T * M, M.T * rvals.reshape((-1, 1)), x0=None)
        # print "   solved in", time.time() - t0, "NNZ", M.nnz, (M.T * M).nnz
        new_ivals = V[0][:self.i_indices.size].reshape(self.i_indices.shape)
        new_jvals = V[0][self.i_indices.size:].reshape(self.i_indices.shape)
        return new_ivals, new_jvals
Exemplo n.º 4
0
def test_connectors():
    n,ok=0,0
    for i in set_connectors.keys():
        item = cg.cg(i)
        n+=1
        if item.get_connectors() != set_connectors[i]:
            ok += 1 
            print 'Item %d %s Computed:%s|Expected:%s|'%(n, i, item.get_connectors(),set_connectors[i])
    print ('Test OK (%d cases)'%n if ok==0 else 'Test KO on %d tests'%ok)
    return n
Exemplo n.º 5
0
def test_syntax():
    n,ok=0,0
    for i in set_syntax.keys():
        item = cg.cg(i)
        n+=1
        value = item.check(i)
        if value != set_syntax[i]:
            ok += 1
            print 'Item %d %s Computed:%s|Expected:%s|'%(n, i, value,set_syntax[i])
    print ('Test OK (%d cases)'%n if ok==0 else 'Test KO on %d tests'%ok)
    return n
Exemplo n.º 6
0
def main():
	if len(sys.argv) < 3:
		print "Usage python proc_scoreboard.py cg_file rsa_file"
		return

	AAlist = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']
	cgfile = sys.argv[1]
	nafile = sys.argv[2]
	outfile = cgfile+'.nascore'

	print 'loading %s, %s' % (cgfile, nafile)

	na = naccess(nafile)
	alphabet = expandVars([AAlist, na.alphabet])
	#print repr(alphabet)

	cgs = [cg(line.strip(), alphabet) for line in open(cgfile)]

	fo = open(outfile, 'w')
	for c in cgs:
		if len(c.AAgroup) > 1:
			fo.write(c.nascore(na)+'\n')
	fo.close()
Exemplo n.º 7
0
def main():
	if len(sys.argv) < 3:
		print "Usage python proc_nvboard.py cg_file rsa_file"
		print "python proc_nvboard.py 1k2p.tip.hcg 1k2p.rsa"
		print "output: 1k2p.tip.hcg.nvscore"
		return

	AAlist = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']
	cgfile = sys.argv[1]
	nafile = sys.argv[2]
	outfile = cgfile+'.nvscore'

	#print 'loading %s, %s' % (cgfile, nafile)

	na = naccess(nafile)

	cgs = [cg(line.strip(), '') for line in open(cgfile)]

	fo = open(outfile, 'w')
	for c in cgs:
		if len(c.AAgroup) > 1:
			fo.write(c.nvscore(na)+'\n')
	fo.close()
	print 'finish writing %s.' % outfile
Exemplo n.º 8
0
def poisson(f_rhs,f_dist,h0,pts,tri,*args,**kwargs):
    """Solve Poisson's equation on a domain D by the FE method:
         - Laplacian u = f_rhs
    on D and
         u = 0
    on boundary of D.   The right-hand side is  f = f_rhs(pts,*args).
    We use a triangulation described by points pts, triangles tri,
    a mesh scale h0, and a signed distance function f_dist(pts,*args);
    see distmesh2d.py.  Returns
       uh     = approximate solution value at pts
       inside = index of interior point (or -1 if not interior)
    See fem_examples.py for examples.
    """
    announce = kwargs.get('announce',False)
    geps = 0.001 * h0
    ii = (f_dist(pts, *args) < -geps)      # boolean array for interior nodes
    Npts = np.shape(pts)[0]     # = number of nodes
    N = ii.sum()                # = number of *interior* nodes
    if announce:
        print "  poisson: assembling on mesh with  %d  nodes and  %d  interior nodes" \
            % (Npts,N)
    inside = np.zeros(Npts,dtype=np.int32) # index only the interior nodes
    count = 0
    for j in range(Npts):
        if ii[j]:
            inside[j] = count
            count = count + 1
        else:
            inside[j] = -1
    # eval f_rhs once for each node
    ff = np.zeros(Npts)
    for j in range(Npts):
        ff[j] = f_rhs(pts[j], *args)
    # loop over triangles to set up stiffness matrix A and load vector b
    # NOTE: not using sparse matrices at all
    A = np.zeros((N,N))
    b = np.zeros(N)
    for n in range(np.shape(tri)[0]):        # loop over triangles
        # indices, coordinates, and Jacobian of triangle
        j, k, l = tri[n,:]
        vj = inside[j]
        vk = inside[k]
        vl = inside[l]
        Jac = np.array([[ pts[k,0] - pts[j,0], pts[l,0] - pts[j,0] ],
                        [ pts[k,1] - pts[j,1], pts[l,1] - pts[j,1] ]])
        ar = abs(det2(Jac))/2.0
        C = ar/12.0
        Q = inv2(np.dot(Jac.transpose(),Jac))
        fT = np.array([ff[j], ff[k], ff[l]])
        # add triangle's contribution to linear system  A x = b
        if ii[j]:
            A[vj,vj] += ar * np.sum(Q)
            b[vj]    += C * np.dot(fT, np.array([2,1,1]))
        if ii[k]:
            A[vk,vk] += ar * Q[0,0]
            b[vk]    += C * np.dot(fT, np.array([1,2,1]))
        if ii[l]:
            A[vl,vl] += ar * Q[1,1]
            b[vl]    += C * np.dot(fT, np.array([1,1,2]))
        if ii[j] & ii[k]:
            A[vj,vk] -= ar * np.sum(Q[:,0])
            A[vk,vj] = A[vj,vk]
        if ii[j] & ii[l]:
            A[vj,vl] -= ar * np.sum(Q[:,1])
            A[vl,vj] = A[vj,vl]
        if ii[k] & ii[l]:
            A[vk,vl] += ar * Q[0,1]
            A[vl,vk] = A[vk,vl]
    if announce:
        print "  poisson: solving linear system  A uh = b  using cg (N=%d unknowns)" % N
    uh = np.zeros(Npts)
    # solve by cg including (weak) test of positive definiteness
    uh[ii], iters, r = cg(A,b,np.zeros(np.shape(b)),tol=1.0e-4,h=h0,test=True)
    #from numpy.linalg import solve
    #uh[ii] = solve(A,b)
    if (announce & (not(kwargs.get('getsys',False)))):
        if np.dot(b,b) > 0.0:
          err = np.sqrt(np.dot(r,r) / np.dot(b,b))
          print "  poisson: cg did  %d  iterations to get |r|/|b| = %.4e" % (iters,err)
        else:
          print "  poisson: cg did  %d  iterations" % iters
    if kwargs.get('getsys',False):
        return uh, inside, A, b
    else:
        return uh, inside
Exemplo n.º 9
0
def compute_flow(im1, im2, previous_flow=None,
                 average_derivs=True,
                 flow_iters=10,
                 alpha=1.0):
    # See Ce Liu's thesis, appendix A for notation

    assert im1.shape == im2.shape, "mismatch" + str(im1.shape)  + " " + str(im2.shape)

    # compute image derivatives
    Ix, Iy = derivs(im2)

    # warp im2 and derivs by existing flow
    if previous_flow is not None:
        cur_flow = previous_flow.resize(im1.shape)
        I2 = warp(im2, cur_flow)
        Ix = warp(Ix, cur_flow)
        Iy = warp(Iy, cur_flow)
    else:
        cur_flow = Flow(im1.shape)
        I2 = im2

    # Average derivatives
    if average_derivs:
        temp_Ix, temp_Iy = derivs(im1)
        Ix = (Ix + temp_Ix) / 2.0
        Iy = (Iy + temp_Iy) / 2.0

    # temporal derivative
    Iz = I2 - im1
    print "Median Abs error", np.median(np.abs(Iz))

    # mask nonoverlapping areas
    Iz[np.isnan(Iz)] = 0
    Ix[np.isnan(Ix)] = 0
    Iy[np.isnan(Iy)] = 0

    # setup
    Dx, Dy = deriv_operators(im1.shape)
    Ix = vectorize(Ix)
    Iy = vectorize(Iy)
    Iz = vectorize(Iz)
    U = vectorize(cur_flow.u)
    V = vectorize(cur_flow.v)
    dU = np.zeros_like(U)
    dV = np.zeros_like(V)
    prev_x = None
    for i in range(flow_iters):
        # Compute data and spatial weighting terms
        g = (Dx * (U + dU)) ** 2 + (Dy * (U + dU)) ** 2 + (Dx * (V + dV)) ** 2 + (Dy * (V + dV)) ** 2
        f = (Iz + Ix * dU + Iy * dV) ** 2

        Phi = phi_prime(g)
        Psi = psi_prime(f)

        L = Dx.T * diag(Phi) * Dx + Dy.T * diag(Phi) * Dy

        UL = diag(Psi * (Ix ** 2)) + alpha * L
        UR = LL = diag(Psi * Ix * Iy)
        LR = diag(Psi * (Iy ** 2)) + alpha * L
        A = s_vstack((s_hstack((UL, UR)),
                      s_hstack((LL, LR)))).tocsc()
        di = A[range(A.shape[0]), range(A.shape[0])].A.ravel()
        di[di == 0] = 1.0
        preA = sparse.diags(1.0 / di, 0)
        bU = Psi * Ix * Iz + alpha * L * U
        bL = Psi * Iy * Iz + alpha * L * V
        b = - np.vstack((bU, bL))

        x, st = cg(A, b, x0=prev_x, M=preA, tol=0.05 / np.linalg.norm(b))
        print i, np.median(np.abs(x)), st
        dU = x[:dU.shape[0]].reshape(dU.shape)
        dV = x[dU.shape[0]:].reshape(dU.shape)
        prev_x = x
        if st <= 3 and i > 1:
            break
    cur_flow.u += dU.reshape(cur_flow.u.shape)
    cur_flow.v += dV.reshape(cur_flow.v.shape)
    cur_flow.u = cv2.medianBlur(cur_flow.u, 5)
    cur_flow.v = cv2.medianBlur(cur_flow.v, 5)
    return cur_flow
Exemplo n.º 10
0
        }
    }

    # for every vector ...
    for pair in b_dict:

        # ... extract values from dict ...
        b_value = b_dict[pair]['b_value']
        plot_title = b_dict[pair]['title']
        save_name = b_dict[pair]['save_name']

        print(plot_title)

        # ... and do some magic
        t1 = time.monotonic()
        x_0, R = cg(A, b_value)
        t2 = time.monotonic()
        print('{:.4f} seconds with own cg'.format(t2 - t1))

        # get the lengths of the residuum and the error
        R_norm = calc_residuum(R)
        E = calc_errors(R, A)

        # plot the results
        plot_residuum(R_norm, title=plot_title, save_name=save_name)
        plot_errors(E, title=plot_title, save_name=save_name)
        plot_residuum_and_errors(R_norm,
                                 E,
                                 title=plot_title,
                                 save_name=save_name)
Exemplo n.º 11
0
def learn(env,
          policy_func,
          reward_giver,
          reward_guidance,
          expert_dataset,
          rank,
          pretrained,
          pretrained_weight,
          *,
          g_step,
          d_step,
          entcoeff,
          save_per_iter,
          ckpt_dir,
          log_dir,
          timesteps_per_batch,
          task_name,
          gamma,
          lam,
          algo,
          max_kl,
          cg_iters,
          cg_damping=1e-2,
          vf_stepsize=3e-4,
          d_stepsize=1e-4,
          vf_iters=3,
          max_timesteps=0,
          max_episodes=0,
          max_iters=0,
          loss_percent=0.0,
          callback=None):

    nworkers = MPI.COMM_WORLD.Get_size()
    rank = MPI.COMM_WORLD.Get_rank()
    np.set_printoptions(precision=3)
    # Setup losses and stuff
    # ----------------------------------------
    ob_space = env.observation_space
    ac_space = env.action_space
    policy = build_policy(env, 'mlp', value_network='copy')

    ob = observation_placeholder(ob_space)
    with tf.variable_scope('pi'):
        pi = policy(observ_placeholder=ob)
    with tf.variable_scope('oldpi'):
        oldpi = policy(observ_placeholder=ob)

    atarg = tf.placeholder(
        dtype=tf.float32,
        shape=[None])  # Target advantage function (if applicable)
    ret = tf.placeholder(dtype=tf.float32, shape=[None])  # Empirical return

    ac = pi.pdtype.sample_placeholder([None])

    kloldnew = oldpi.pd.kl(pi.pd)
    ent = pi.pd.entropy()
    meankl = tf.reduce_mean(kloldnew)
    meanent = tf.reduce_mean(ent)
    entbonus = entcoeff * meanent

    vferr = tf.reduce_mean(tf.square(pi.vf - ret))

    ratio = tf.exp(pi.pd.logp(ac) -
                   oldpi.pd.logp(ac))  # advantage * pnew / pold
    surrgain = tf.reduce_mean(ratio * atarg)

    optimgain = surrgain + entbonus
    losses = [optimgain, meankl, entbonus, surrgain, meanent]
    loss_names = ["optimgain", "meankl", "entloss", "surrgain", "entropy"]

    dist = meankl

    all_var_list = get_trainable_variables('pi')
    # var_list = [v for v in all_var_list if v.name.startswith("pi/pol") or v.name.startswith("pi/logstd")]
    # vf_var_list = [v for v in all_var_list if v.name.startswith("pi/vff")]
    var_list = get_pi_trainable_variables("pi")
    vf_var_list = get_vf_trainable_variables("pi")
    # assert len(var_list) == len(vf_var_list) + 1
    d_adam = MpiAdam(reward_giver.get_trainable_variables())
    guidance_adam = MpiAdam(reward_guidance.get_trainable_variables())

    vfadam = MpiAdam(vf_var_list)

    get_flat = U.GetFlat(var_list)
    set_from_flat = U.SetFromFlat(var_list)
    klgrads = tf.gradients(dist, var_list)
    flat_tangent = tf.placeholder(dtype=tf.float32,
                                  shape=[None],
                                  name="flat_tan")
    shapes = [var.get_shape().as_list() for var in var_list]
    start = 0
    tangents = []
    for shape in shapes:
        sz = U.intprod(shape)
        tangents.append(tf.reshape(flat_tangent[start:start + sz], shape))
        start += sz
    gvp = tf.add_n([
        tf.reduce_sum(g * tangent)
        for (g, tangent) in zipsame(klgrads, tangents)
    ])  # pylint: disable=E1111
    fvp = U.flatgrad(gvp, var_list)

    assign_old_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(oldv, newv)
            for (oldv,
                 newv) in zipsame(get_variables('oldpi'), get_variables('pi'))
        ])
    compute_losses = U.function([ob, ac, atarg], losses)
    compute_lossandgrad = U.function([ob, ac, atarg], losses +
                                     [U.flatgrad(optimgain, var_list)])
    compute_fvp = U.function([flat_tangent, ob, ac, atarg], fvp)
    compute_vflossandgrad = U.function([ob, ret],
                                       U.flatgrad(vferr, vf_var_list))

    @contextmanager
    def timed(msg):
        if rank == 0:
            print(colorize(msg, color='magenta'))
            tstart = time.time()
            yield
            print(
                colorize("done in %.3f seconds" % (time.time() - tstart),
                         color='magenta'))
        else:
            yield

    def allmean(x):
        assert isinstance(x, np.ndarray)
        out = np.empty_like(x)
        MPI.COMM_WORLD.Allreduce(x, out, op=MPI.SUM)
        out /= nworkers
        return out

    U.initialize()
    th_init = get_flat()
    MPI.COMM_WORLD.Bcast(th_init, root=0)
    set_from_flat(th_init)
    d_adam.sync()
    guidance_adam.sync()
    vfadam.sync()
    if rank == 0:
        print("Init param sum", th_init.sum(), flush=True)

    # Prepare for rollouts
    # ----------------------------------------
    seg_gen = traj_segment_generator(pi,
                                     env,
                                     reward_giver,
                                     reward_guidance,
                                     timesteps_per_batch,
                                     stochastic=True,
                                     algo=algo,
                                     loss_percent=loss_percent)

    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=40)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=40)  # rolling buffer for episode rewards
    true_rewbuffer = deque(maxlen=40)

    assert sum([max_iters > 0, max_timesteps > 0, max_episodes > 0]) == 1

    g_loss_stats = stats(loss_names)
    d_loss_stats = stats(reward_giver.loss_name)
    ep_stats = stats(["True_rewards", "Rewards", "Episode_length"])
    # if provide pretrained weight
    if pretrained_weight is not None:
        U.load_state(pretrained_weight, var_list=pi.get_variables())

    while True:
        if callback: callback(locals(), globals())
        if max_timesteps and timesteps_so_far >= max_timesteps:
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            break
        elif max_iters and iters_so_far >= max_iters:
            break

        # Save model
        # if rank == 0 and iters_so_far % save_per_iter == 0 and ckpt_dir is not None:
        #     fname = os.path.join(ckpt_dir, task_name)
        #     os.makedirs(os.path.dirname(fname), exist_ok=True)
        #     saver = tf.train.Saver()
        #     saver.save(tf.get_default_session(), fname)

        logger.log("********** Iteration %i ************" % iters_so_far)

        # global flag_render
        # if iters_so_far > 0 and iters_so_far % 10 ==0:
        #     flag_render = True
        # else:
        #     flag_render = False

        def fisher_vector_product(p):
            return allmean(compute_fvp(p, *fvpargs)) + cg_damping * p

        # ------------------ Update G ------------------
        logger.log("Optimizing Policy...")
        for _ in range(g_step):
            with timed("sampling"):
                seg = seg_gen.__next__()
            print('rewards', seg['rew'])
            add_vtarg_and_adv(seg, gamma, lam)
            # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets))
            ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg[
                "tdlamret"]
            vpredbefore = seg[
                "vpred"]  # predicted value function before udpate
            atarg = (atarg - atarg.mean()) / atarg.std(
            )  # standardized advantage function estimate

            if hasattr(pi, "ob_rms"):
                pi.ob_rms.update(ob)  # update running mean/std for policy

            args = seg["ob"], seg["ac"], atarg
            fvpargs = [arr[::5] for arr in args]

            assign_old_eq_new(
            )  # set old parameter values to new parameter values
            with timed("computegrad"):
                *lossbefore, g = compute_lossandgrad(*args)
            lossbefore = allmean(np.array(lossbefore))
            g = allmean(g)
            if np.allclose(g, 0):
                logger.log("Got zero gradient. not updating")
            else:
                with timed("cg"):
                    stepdir = cg(fisher_vector_product,
                                 g,
                                 cg_iters=cg_iters,
                                 verbose=rank == 0)
                assert np.isfinite(stepdir).all()
                shs = .5 * stepdir.dot(fisher_vector_product(stepdir))
                lm = np.sqrt(shs / max_kl)
                # logger.log("lagrange multiplier:", lm, "gnorm:", np.linalg.norm(g))
                fullstep = stepdir / lm
                expectedimprove = g.dot(fullstep)
                surrbefore = lossbefore[0]
                stepsize = 1.0
                thbefore = get_flat()
                for _ in range(10):
                    thnew = thbefore + fullstep * stepsize
                    set_from_flat(thnew)
                    meanlosses = surr, kl, *_ = allmean(
                        np.array(compute_losses(*args)))
                    improve = surr - surrbefore
                    logger.log("Expected: %.3f Actual: %.3f" %
                               (expectedimprove, improve))
                    if not np.isfinite(meanlosses).all():
                        logger.log("Got non-finite value of losses -- bad!")
                    elif kl > max_kl * 1.5:
                        logger.log("violated KL constraint. shrinking step.")
                    elif improve < 0:
                        logger.log("surrogate didn't improve. shrinking step.")
                    else:
                        logger.log("Stepsize OK!")
                        break
                    stepsize *= .5
                else:
                    logger.log("couldn't compute a good step")
                    set_from_flat(thbefore)
                if nworkers > 1 and iters_so_far % 20 == 0:
                    paramsums = MPI.COMM_WORLD.allgather(
                        (thnew.sum(),
                         vfadam.getflat().sum()))  # list of tuples
                    assert all(
                        np.allclose(ps, paramsums[0]) for ps in paramsums[1:])
            with timed("vf"):
                for _ in range(vf_iters):
                    for (mbob, mbret) in dataset.iterbatches(
                        (seg["ob"], seg["tdlamret"]),
                            include_final_partial_batch=False,
                            batch_size=128):
                        if hasattr(pi, "ob_rms"):
                            pi.ob_rms.update(
                                mbob)  # update running mean/std for policy
                        g = allmean(compute_vflossandgrad(mbob, mbret))
                        vfadam.update(g, vf_stepsize)

        g_losses = meanlosses
        for (lossname, lossval) in zip(loss_names, meanlosses):
            logger.record_tabular(lossname, lossval)
        logger.record_tabular("ev_tdlam_before",
                              explained_variance(vpredbefore, tdlamret))

        # ------------------ Update D ------------------
        logger.log("Optimizing Discriminator...")
        logger.log(fmt_row(13, reward_giver.loss_name))
        ob_expert, ac_expert = expert_dataset.get_next_batch(
            batch_size=len(ob))
        batch_size = 128
        d_losses = [
        ]  # list of tuples, each of which gives the loss for a minibatch
        with timed("Discriminator"):
            for (ob_batch, ac_batch) in dataset.iterbatches(
                (ob, ac),
                    include_final_partial_batch=False,
                    batch_size=batch_size):
                ob_expert, ac_expert = expert_dataset.get_next_batch(
                    batch_size=batch_size)
                # update running mean/std for reward_giver
                if hasattr(reward_giver, "obs_rms"):
                    reward_giver.obs_rms.update(
                        np.concatenate((ob_batch, ob_expert), 0))
                *newlosses, g = reward_giver.lossandgrad(ob_batch, ob_expert)
                d_adam.update(allmean(g), d_stepsize)
                d_losses.append(newlosses)
        logger.log(fmt_row(13, np.mean(d_losses, axis=0)))

        # ------------------ Update Guidance ------------
        logger.log("Optimizing Guidance...")

        logger.log(fmt_row(13, reward_guidance.loss_name))
        batch_size = 128
        guidance_losses = [
        ]  # list of tuples, each of which gives the loss for a minibatch
        with timed("Guidance"):
            for ob_batch, ac_batch in dataset.iterbatches(
                (ob, ac),
                    include_final_partial_batch=False,
                    batch_size=batch_size):
                ob_expert, ac_expert = expert_dataset.get_next_batch(
                    batch_size=batch_size)

                idx_condition = process_expert(ob_expert, ac_expert)
                pick_idx = (idx_condition >= loss_percent)
                # pick_idx = idx_condition

                ob_expert_p = ob_expert[pick_idx]
                ac_expert_p = ac_expert[pick_idx]

                ac_batch_p = []
                for each_ob in ob_expert_p:
                    tmp_ac, _, _, _ = pi.step(each_ob, stochastic=True)
                    ac_batch_p.append(tmp_ac)

                # update running mean/std for reward_giver
                if hasattr(reward_guidance, "obs_rms"):
                    reward_guidance.obs_rms.update(ob_expert_p)
                # reward_guidance.train(expert_s=ob_batch_p, agent_a=ac_batch_p, expert_a=ac_expert_p)
                *newlosses, g = reward_guidance.lossandgrad(
                    ob_expert_p, ac_batch_p, ac_expert_p)
                guidance_adam.update(allmean(g), d_stepsize)
                guidance_losses.append(newlosses)
        logger.log(fmt_row(13, np.mean(guidance_losses, axis=0)))

        lrlocal = (seg["ep_lens"], seg["ep_rets"], seg["ep_true_rets"]
                   )  # local values
        listoflrpairs = MPI.COMM_WORLD.allgather(lrlocal)  # list of tuples
        lens, rews, true_rets = map(flatten_lists, zip(*listoflrpairs))
        true_rewbuffer.extend(true_rets)
        lenbuffer.extend(lens)
        rewbuffer.extend(rews)

        logger.record_tabular("EpLenMean", np.mean(lenbuffer))
        logger.record_tabular("EpRewMean", np.mean(rewbuffer))
        logger.record_tabular("EpTrueRewMean", np.mean(true_rewbuffer))
        logger.record_tabular("EpThisIter", len(lens))
        episodes_so_far += len(lens)
        timesteps_so_far += sum(lens) * g_step
        iters_so_far += 1

        logger.record_tabular("EpisodesSoFar", episodes_so_far)
        logger.record_tabular("TimestepsSoFar", timesteps_so_far)
        logger.record_tabular("TimeElapsed", time.time() - tstart)

        if rank == 0:
            logger.dump_tabular()

    if ckpt_dir is not None:
        print('saving...')
        fname = os.path.join(ckpt_dir, task_name)
        os.makedirs(os.path.dirname(fname), exist_ok=True)
        pi.save(fname)
        print('save completely and the path:', fname)
Exemplo n.º 12
0
           Actor_Learning_rate,
           Critic_Learning_rate,
           Tau,
           trajectory_number=100,
           update_epoach=50)

if args.obj == 'quadratic':
    obj = Quadratic(dim)
elif args.obj == 'logistic':
    obj = Logistic(dim, X, Y)
elif args.obj == 'ackley':
    obj = Ackley(dim)
elif args.obj == 'neural':
    obj = NeuralNet(dim, X, Y, **kwargs)

cg_x, cg_y, _, cg_iter, _ = cg(obj, x0=init_point, maxiter=max_iter)
print('CG method:\n optimal point: {0}, optimal value: {1}, iterations {2}'.
      format(cg_x, cg_y, cg_iter))
sd_x, sd_y, _, sd_iter, _ = sd(obj, x0=init_point, maxiter=max_iter)
print('SD method:\n optimal point: {0}, optimal value: {1}, iterations {2}'.
      format(sd_x, sd_y, sd_iter))
bfgs_x, bfgs_y, _, bfgs_iter, _ = quasiNewton(obj,
                                              x0=init_point,
                                              maxiter=max_iter)
print('bfgs method:\n optimal point: {0}, optimal value: {1}, iterations {2}'.
      format(bfgs_x, bfgs_y, bfgs_iter))

if args.agent == 'naf':
    agent = train(naf, max_epoch, max_iter)
elif args.agent == 'ddpg':
    agent = train(ddpg, max_epoch, max_iter)
Exemplo n.º 13
0
def poisson(f_rhs, f_dist, h0, pts, tri, *args, **kwargs):
    """Solve Poisson's equation on a domain D by the FE method:
         - Laplacian u = f_rhs
    on D and
         u = 0
    on boundary of D.   The right-hand side is  f = f_rhs(pts,*args).
    We use a triangulation described by points pts, triangles tri,
    a mesh scale h0, and a signed distance function f_dist(pts,*args);
    see py_distmesh2d.py.  Returns
       uh     = approximate solution value at pts
       inside = index of interior point (or -1 if not interior)
    See fem_examples.py for examples.
    """
    announce = kwargs.get('announce', False)
    geps = 0.001 * h0
    ii = (f_dist(pts, *args) < -geps)  # boolean array for interior nodes
    Npts = np.shape(pts)[0]  # = number of nodes
    N = ii.sum()  # = number of *interior* nodes
    if announce:
        print ("  poisson: assembling on mesh with  %d  nodes and  %d  interior nodes" \
            % (Npts,N))
    inside = np.zeros(Npts, dtype=np.int32)  # index only the interior nodes
    count = 0
    for j in range(Npts):
        if ii[j]:
            inside[j] = count
            count = count + 1
        else:
            inside[j] = -1
    # eval f_rhs once for each node
    ff = np.zeros(Npts)
    for j in range(Npts):
        ff[j] = f_rhs(pts[j], *args)
    # loop over triangles to set up stiffness matrix A and load vector b
    # NOTE: not using sparse matrices at all
    A = np.zeros((N, N))
    b = np.zeros(N)
    for n in range(np.shape(tri)[0]):  # loop over triangles
        # indices, coordinates, and Jacobian of triangle
        j, k, l = tri[n, :]
        vj = inside[j]
        vk = inside[k]
        vl = inside[l]
        Jac = np.array([[pts[k, 0] - pts[j, 0], pts[l, 0] - pts[j, 0]],
                        [pts[k, 1] - pts[j, 1], pts[l, 1] - pts[j, 1]]])
        ar = abs(det2(Jac)) / 2.0
        C = ar / 12.0
        Q = inv2(np.dot(Jac.transpose(), Jac))
        fT = np.array([ff[j], ff[k], ff[l]])
        # add triangle's contribution to linear system  A x = b
        if ii[j]:
            A[vj, vj] += ar * np.sum(Q)
            b[vj] += C * np.dot(fT, np.array([2, 1, 1]))
        if ii[k]:
            A[vk, vk] += ar * Q[0, 0]
            b[vk] += C * np.dot(fT, np.array([1, 2, 1]))
        if ii[l]:
            A[vl, vl] += ar * Q[1, 1]
            b[vl] += C * np.dot(fT, np.array([1, 1, 2]))
        if ii[j] & ii[k]:
            A[vj, vk] -= ar * np.sum(Q[:, 0])
            A[vk, vj] = A[vj, vk]
        if ii[j] & ii[l]:
            A[vj, vl] -= ar * np.sum(Q[:, 1])
            A[vl, vj] = A[vj, vl]
        if ii[k] & ii[l]:
            A[vk, vl] += ar * Q[0, 1]
            A[vl, vk] = A[vk, vl]
    if announce:
        print(
            "  poisson: solving linear system  A uh = b  using cg (N=%d unknowns)"
            % N)
    uh = np.zeros(Npts)
    # solve by cg including (weak) test of positive definiteness
    uh[ii], iters, r = cg(A,
                          b,
                          np.zeros(np.shape(b)),
                          tol=1.0e-4,
                          h=h0,
                          test=True)
    #from numpy.linalg import solve
    #uh[ii] = solve(A,b)
    if (announce & (not (kwargs.get('getsys', False)))):
        if np.dot(b, b) > 0.0:
            err = np.sqrt(np.dot(r, r) / np.dot(b, b))
            print("  poisson: cg did  %d  iterations to get |r|/|b| = %.4e" %
                  (iters, err))
        else:
            print("  poisson: cg did  %d  iterations" % iters)
    if kwargs.get('getsys', False):
        return uh, inside, A, b
    else:
        return uh, inside