Пример #1
0
def f(sq: Squarer, x):
    return sq.square(x)


f_jitted = jax.jit(f)


def f_no_class(x):
    """Implements the same math as Squarer.square, but without the class."""
    return 2 * x**2


f_no_class_jitted = jax.jit(f_no_class)


x_test = jnp.array([0., 1., 2., 3.])
x_test_onp = onp.array([0., 1., 2., 3.])
sq = Squarer(2.)
# Make jit compile
f_jitted(sq, x_test)
f_no_class_jitted(x_test)


def print_times(times, n_runs, n_repeats):
    print('{:d} loops, best of {:d}: {:.1f} usec per loop'.format(
        n_runs, n_repeats, 1e6 * min(times) / n_runs))


n_runs = 1000
n_repeats = 5
Пример #2
0
Tmax = 500

#power spectrum resolution and range
fnums = 30
freq_range = [15, 100]

#SSN parameters
n = 2
k = 0.04
tauE = 20  # in ms
tauI = 10  # in ms
psi = 0.774

t_scale = 1
tau_s = np.array([
    3, 5, 100
]) * t_scale  #in ms, AMPA, GABA, NMDA current decay time constants

contrasts = np.array([0, 25, 50, 100])

gridsizedeg = 4
dradius = gridsizedeg / 8
gridperdeg = 5
gridsize = round(gridsizedeg * gridperdeg) + 1
magnFactor = 2  #mm/deg
#biological hyper_col length is ~750 um, magFactor is typically 2 mm/deg in macaque V1
# hyper_col = 0.75/magnFactor
hyper_col = 8
Lx = gridsizedeg
Ly = gridsizedeg
# r_cent = np.array([0.3, 0.6, 0.9, 1.2, 1.5])
Пример #3
0
 def as_array(self):
     return tree_util.tree_multimap(lambda *args: np.array(list(args)),
                                    *self.data)
Пример #4
0
 def testOneHotNonArrayInput(self):
   actual = nn.one_hot([0, 1, 2], 3)
   expected = jnp.array([[1., 0., 0.],
                        [0., 1., 0.],
                        [0., 0., 1.]])
   self.assertAllClose(actual, expected)
Пример #5
0
 def testGluValue(self):
   val = nn.glu(jnp.array([1.0, 0.0]), axis=0)
   self.assertAllClose(val, jnp.array([0.5]))
Пример #6
0
def invm(Pbc):
    _Pbc = Pbc * np.array([-1,-1,-1,1])
    return np.sum(Pbc * _Pbc,axis=1)
Пример #7
0
def l2_regularizer(params, reg=reg):
    """ Return the L2 regularization loss. """
    leaves, _ = tree_flatten(params)
    return reg * jnp.sum(jnp.array([jnp.vdot(x, x) for x in leaves]))
Пример #8
0
    def test_jit_or_pmap_broadcast(self):
        def kernel_fn(x1,
                      x2,
                      do_flip,
                      keys,
                      do_square,
                      params,
                      _unused=None,
                      p=0.65):
            res = np.abs(np.matmul(x1, x2))
            if do_square:
                res *= res
            if do_flip:
                res = -res

            res *= random.uniform(keys) * p
            return [res, params]

        params = (np.array([1., 0.3]), (np.array([1.2]), np.array([0.5])))
        x2 = np.arange(0, 10).reshape((10, ))
        keys = random.PRNGKey(1)

        kernel_fn_pmapped = batch._jit_or_pmap_broadcast(kernel_fn,
                                                         device_count=0)
        x1 = np.arange(0, 10).reshape((1, 10))
        for do_flip in [True, False]:
            for do_square in [True, False]:
                with self.subTest(do_flip=do_flip,
                                  do_square=do_square,
                                  device_count=0):
                    res_1 = kernel_fn(x1,
                                      x2,
                                      do_flip,
                                      keys,
                                      do_square,
                                      params,
                                      _unused=True,
                                      p=0.65)
                    res_2 = kernel_fn_pmapped(x1,
                                              x2,
                                              do_flip,
                                              keys,
                                              do_square,
                                              params,
                                              _unused=True)
                    self.assertAllClose(res_1, res_2, True)

        test_utils.stub_out_pmap(batch, 1)
        x1 = np.arange(0, 10).reshape((1, 10))
        kernel_fn_pmapped = batch._jit_or_pmap_broadcast(kernel_fn,
                                                         device_count=1)
        for do_flip in [True, False]:
            for do_square in [True, False]:
                with self.subTest(do_flip=do_flip,
                                  do_square=do_square,
                                  device_count=1):
                    res_1 = kernel_fn(x1,
                                      x2,
                                      do_flip,
                                      keys,
                                      do_square,
                                      params,
                                      _unused=False,
                                      p=0.65)
                    res_2 = kernel_fn_pmapped(x1,
                                              x2,
                                              do_flip,
                                              keys,
                                              do_square,
                                              params,
                                              _unused=None)
                    self.assertAllClose(res_1[0], res_2[0], True)
                    self.assertAllClose(
                        tree_map(partial(np.expand_dims, axis=0), res_1[1]),
                        res_2[1], True)

        kernel_fn_pmapped = batch._jit_or_pmap_broadcast(kernel_fn,
                                                         device_count=2)
        x1 = np.arange(0, 20).reshape((2, 10))
        test_utils.stub_out_pmap(batch, 2)

        def broadcast(arg):
            return np.broadcast_to(arg, (2, ) + arg.shape)

        for do_flip in [True, False]:
            for do_square in [True, False]:
                with self.subTest(do_flip=do_flip,
                                  do_square=do_square,
                                  device_count=2):
                    res_1 = kernel_fn(x1,
                                      x2,
                                      do_flip,
                                      keys,
                                      do_square,
                                      params,
                                      p=0.2)
                    res_2 = kernel_fn_pmapped(x1,
                                              x2,
                                              do_flip,
                                              keys,
                                              do_square,
                                              params,
                                              _unused=None,
                                              p=0.2)
                    self.assertAllClose(res_1[0][0], res_2[0][0], True)
                    self.assertAllClose(res_1[0][1], res_2[0][1], True)
                    self.assertAllClose(tree_map(broadcast, res_1[1]),
                                        res_2[1], True)
Пример #9
0
def integrate_tke(u, v, w, maskU, maskV, maskW, dxt, dxu, dyt, dyu, dzt, dzw, cost, cosu, kbot, kappaM, mxl, forc, forc_tke_surface, tke, dtke):
    tau = 0
    taup1 = 1
    taum1 = 2

    dt_tracer = 1.
    dt_mom = 1.
    AB_eps = 0.1
    alpha_tke = 1.
    c_eps = 0.7
    K_h_tke = 2000.

    flux_east = np.zeros_like(maskU)
    flux_north = np.zeros_like(maskU)
    flux_top = np.zeros_like(maskU)

    sqrttke = np.sqrt(np.maximum(0., tke[:, :, :, tau]))

    """
    integrate Tke equation on W grid with surface flux boundary condition
    """
    dt_tke = dt_mom  # use momentum time step to prevent spurious oscillations

    """
    vertical mixing and dissipation of TKE
    """
    ks = kbot - 1 # [2:-2, 2:-2]

    print("Init empty")
    a_tri = np.zeros((maskU.shape[0], maskU.shape[1], maskU.shape[2])) # [2:-2, 2:-2]) shapes match better if we ignore slicing
    b_tri = np.zeros((maskU.shape[0], maskU.shape[1], maskU.shape[2])) # [2:-2, 2:-2])
    c_tri = np.zeros((maskU.shape[0], maskU.shape[1], maskU.shape[2])) # [2:-2, 2:-2])
    d_tri = np.zeros((maskU.shape[0], maskU.shape[1], maskU.shape[2])) # [2:-2, 2:-2])
    delta = np.zeros((maskU.shape[0], maskU.shape[1], maskU.shape[2])) # [2:-2, 2:-2])
    b_tri_edge = np.zeros((maskU.shape[0], maskU.shape[1], maskU.shape[2])) # [2:-2, 2:-2])
    

    # delta = jax.ops.index_update(
    #     delta, jax.ops.index[:, :, :-1],
    #     dt_tke / dzt[np.newaxis, np.newaxis, 1:] * alpha_tke * 0.5 \
    #     * (kappaM[2:-2, 2:-2, :-1] + kappaM[2:-2, 2:-2, 1:])
    # )
    print("Init delta")
    for x in range(delta.shape[0]):
        for y in range(delta.shape[1]):
            for z in range(delta.shape[2]):
                if x >= 2 and x < delta.shape[0] - 2 and y >= 2 and y < delta.shape[1] - 2 and z < delta.shape[2]-1:
                    delta[x,y,z] = dt_tke / dzt[z+1] * alpha_tke * 0.5 \
                        * (kappaM[x, y, z] + kappaM[x, y, z+1])
                # else:
                #     # not necessary if we assume 0 init
                #     delta[x,y,z] = 0
        


    # a_tri = jax.ops.index_update(
    #     a_tri, jax.ops.index[:, :, 1:-1],
    #     -delta[:, :, :-2] /
    #     dzw[np.newaxis, np.newaxis, 1:-1]
    # )
    # a_tri = jax.ops.index_update(
    #     a_tri, jax.ops.index[:, :, -1],
    #     -delta[:, :, -2] / (0.5 * dzw[-1])
    # )
    print("Init attri")
    for x in range(a_tri.shape[0]):
        for y in range(a_tri.shape[1]):
            for z in range(a_tri.shape[2]):
                if x >= 2 and x < a_tri.shape[0] - 2 and y >= 2 and y < a_tri.shape[1] - 2:
                    if z > 0 and z < a_tri.shape[2]-1:
                        a_tri[x,y,z] = -delta[x,y,z-1] / dzw[z]
                    elif z == a_tri.shape[2] - 1:
                        a_tri[x,y,z] = -delta[x, y, z-1] / (0.5 * dzw[z])

    # b_tri = jax.ops.index_update(
    #     b_tri, jax.ops.index[:, :, 1:-1],
    #     1 + (delta[:, :, 1:-1] + delta[:, :, :-2]) / dzw[np.newaxis, np.newaxis, 1:-1] \
    #     + dt_tke * c_eps \
    #     * sqrttke[2:-2, 2:-2, 1:-1] / mxl[2:-2, 2:-2, 1:-1]
    # )
    # b_tri = jax.ops.index_update(
    #     b_tri, jax.ops.index[:, :, -1],
    #      1 + delta[:, :, -2] / (0.5 * dzw[-1]) \
    #     + dt_tke * c_eps / mxl[2:-2, 2:-2, -1] * sqrttke[2:-2, 2:-2, -1]
    # )
    print("Init b_tri")
    for x in range(b_tri.shape[0]):
        for y in range(b_tri.shape[1]):
            for z in range(b_tri.shape[2]):
                if x >= 2 and x < b_tri.shape[0] - 2 and y >= 2 and y < b_tri.shape[1] - 2:
                    if z > 0 and z < b_tri.shape[2]-1:
                        b_tri[x,y,z] = 1 + (delta[x, y, z] + delta[x, y, z-1]) / dzw[z] \
                            + dt_tke * c_eps \
                            * sqrttke[x, y, z] / mxl[x, y, z]
                    elif z == b_tri.shape[2]-1:
                        b_tri[x,y,z] =  1 + delta[x, y, z-1] / (0.5 * dzw[z]) \
                            + dt_tke * c_eps / mxl[x,y,z] * sqrttke[x,y,z]
                    else:
                        # not necessary if we assume 0 init
                        b_tri[x,y,z] = 0
                else:
                    # not necessary if we assume 0 init
                    b_tri[x,y,z] = 0

#   b_tri_edge = 1 + delta / dzw[np.newaxis, np.newaxis, :] \
#         + dt_tke * c_eps / mxl[2:-2, 2:-2, :] * sqrttke[2:-2, 2:-2, :]
    print("Init b_tri_edge")
    for x in range(b_tri_edge.shape[0]):
        for y in range(b_tri_edge.shape[1]):
            for z in range(b_tri_edge.shape[2]):
                if x >= 2 and x < b_tri_edge.shape[0] - 2 and y >= 2 and y < b_tri_edge.shape[1] - 2:
                    b_tri_edge[x,y,z] = 1 + delta[x,y,z] / dzw[z] \
                        + dt_tke * c_eps / mxl[x, y, z] * sqrttke[x, y, z] #mxl and sqrttke
                else:
                        # not necessary if we assume 0 init
                        b_tri[x,y,z] = 0


    # c_tri = jax.ops.index_update(
    #     c_tri, jax.ops.index[:, :, :-1],
    #      -delta[:, :, :-1] / dzw[np.newaxis, np.newaxis, :-1]
    # )
    print("Init c_tri")
    for x in range(c_tri.shape[0]):
        for y in range(c_tri.shape[1]):
            for z in range(c_tri.shape[2]):
                if x >= 2 and x < c_tri.shape[0] - 2 and y >= 2 and y < c_tri.shape[1] - 2:
                    if z < c_tri.shape[2]-1:
                        c_tri[x,y,z] = -delta[x,y,z] / dzw[z]

    # d_tri = tke[2:-2, 2:-2, :, tau] + dt_tke * forc[2:-2, 2:-2, :]
    # d_tri = jax.ops.index_add(
    #     d_tri, jax.ops.index[:, :, -1],
    #     dt_tke * forc_tke_surface[2:-2, 2:-2] / (0.5 * dzw[-1])
    # )
    print("Init d_tri")
    for x in range(d_tri.shape[0]):
        for y in range(d_tri.shape[1]):
            for z in range(d_tri.shape[2]):
                if x >= 2 and x < d_tri.shape[0] - 2 and y >= 2 and y < d_tri.shape[1] - 2:
                    d_tri[x,y,z] = tke[x,y,z,tau] + dt_tke * forc[x,y,z]
                    if z == d_tri.shape[2]-1:
                        d_tri[x,y,z] += dt_tke * forc_tke_surface[x,y] / (0.5 * dzw[z])
    
    # so far so good#
    print("Init masks and edge")
    # edge_mask = np.zeros(a_tri.shape)
    # water_mask = np.zeros(a_tri.shape)


    for x in range(a_tri.shape[0]):
        for y in range(a_tri.shape[1]):
            land_mask = ks[x,y] >= 0
            for z in range(a_tri.shape[2]):
                if x >= 2 and x < a_tri.shape[0] - 2 and y >= 2 and y < a_tri.shape[1] - 2:
                    edge_mask = land_mask and (z == ks[x, y])
                    water_mask = land_mask and (z >= ks[x, y])
                    if edge_mask:
                        a_tri[x,y,z] = 0 #water_mask * a_tri[x,y,z] * np.logical_not(edge_mask)
                    if not water_mask:
                        a_tri[x,y,z] = 0
                        b_tri[x,y,z] = 1.
                        c_tri[x,y,z] = 0
                        d_tri[x,y,z] = 0
                    if b_tri_edge is not None:
                        if edge_mask:
                            b_tri[x,y,z] = b_tri_edge[x,y,z]
    

                # if d_edge is not None:
                #     if edge_mask:
                #         d_tri[x,y,z] = d_edge[x,y,z]

    # if d_edge is not None:
    #     d_tri = where(edge_mask, d_edge, d_tri)
    print("solve tridiag")
    a_tri_jax = jnp.array(a_tri)
    b_tri_jax = jnp.array(b_tri)
    c_tri_jax = jnp.array(c_tri)
    d_tri_jax = jnp.array(d_tri)
    a_tri_jax.block_until_ready()
    b_tri_jax.block_until_ready()
    c_tri_jax.block_until_ready()
    d_tri_jax.block_until_ready()
    sol = solve_tridiag(a_tri_jax, b_tri_jax, c_tri_jax, d_tri_jax)
    print("solve tridiag done")
    sol.block_until_ready()
    sol = np.array(sol)
    # tke = jax.ops.index_update(
    #     tke, jax.ops.index[2:-2, 2:-2, :, taup1],
    #     where(water_mask, sol, tke[2:-2, 2:-2, :, taup1])
    # )
    print("integrate tridiag sol")
    for x in range(a_tri.shape[0]):
        for y in range(a_tri.shape[1]):
            for z in range(a_tri.shape[2]):
                water_mask = (ks[x,y] >= 0) and (z >= ks[x, y])
                if x >= 2 and x < c_tri.shape[0] - 2 and y >= 2 and y < c_tri.shape[1] - 2:
                    if water_mask:
                        tke[x,y,z,taup1] = sol[x,y,z]


    """
    Add TKE if surface density flux drains TKE in uppermost box
    """
    #mask = tke[2:-2, 2:-2, -1, taup1] < 0.0

        # tke_surf_corr = jax.ops.index_update(
        # tke_surf_corr, jax.ops.index[2:-2, 2:-2],
        # where(mask,
        #       -tke[2:-2, 2:-2, -1, taup1] * 0.5 * dzw[-1] / dt_tke,
        #       0.)
        #   )

        # tke = jax.ops.index_update(
        #     tke, jax.ops.index[2:-2, 2:-2, -1, taup1],
        #     np.maximum(0., tke[2:-2, 2:-2, -1, taup1])
        # )
    print("correct surf")
    tke_surf_corr = np.zeros((maskU.shape[0], maskU.shape[1]))
    for x in range(tke_surf_corr.shape[0]):
        for y in range(tke_surf_corr.shape[1]):
            if x >= 2 and x < tke_surf_corr.shape[0] - 2 and y >= 2 and y < tke_surf_corr.shape[1] - 2:
                tke_val = tke[x, y, tke.shape[2]-1, taup1]
                if tke_val < 0.0:
                    tke_surf_corr[x,y] = -tke_val * 0.5 * dzw[dzw.shape[0]-1] / dt_tke
                    tke[x, y,tke.shape[2]-1, taup1] = 0   
                else:
                    tke_surf_corr[x,y] = 0
  
    
    # """
    # add tendency due to lateral diffusion
    # """
    # flux_east = jax.ops.index_update(
    #     flux_east, jax.ops.index[:-1, :, :],
    #     K_h_tke * (tke[1:, :, :, tau] - tke[:-1, :, :, tau])
    #     / (cost[np.newaxis, :, np.newaxis] * dxu[:-1, np.newaxis, np.newaxis]) * maskU[:-1, :, :]
    # )
    print("lateral diffusion east")
    for x in range(flux_east.shape[0]):
        for y in range(flux_east.shape[1]):
            for z in range(flux_east.shape[2]):
                if x < flux_east.shape[0]-1:
                    flux_east[x,y,z] = K_h_tke * (tke[x+1, y, z, tau] - tke[x, y, z, tau]) \
                        / (cost[y] * dxu[x]) * maskU[x, y, z]


    # flux_north = jax.ops.index_update(
    #     flux_north, jax.ops.index[:, :-1, :],
    #     K_h_tke * (tke[:, 1:, :, tau] - tke[:, :-1, :, tau]) \
    #     / dyu[np.newaxis, :-1, np.newaxis] * maskV[:, :-1, :] * cosu[np.newaxis, :-1, np.newaxis]
    # )
    print("lateral diffusion north")
    for x in range(flux_north.shape[0]):
        for y in range(flux_north.shape[1]):
            for z in range(flux_north.shape[2]):
                if y < flux_north.shape[1]-1:
                    flux_north[x,y,z] = K_h_tke * (tke[x, y+1, z, tau] - tke[x, y, z, tau]) \
                        / dyu[y] * maskV[x, y, z] * cosu[y]


    # tke = jax.ops.index_add(
    #     tke, jax.ops.index[2:-2, 2:-2, :, taup1],
    #     dt_tke * maskW[2:-2, 2:-2, :] *
    #     ((flux_east[2:-2, 2:-2, :] - flux_east[1:-3, 2:-2, :])
    #         / (cost[np.newaxis, 2:-2, np.newaxis] * dxt[2:-2, np.newaxis, np.newaxis])
    #         + (flux_north[2:-2, 2:-2, :] - flux_north[2:-2, 1:-3, :])
    #         / (cost[np.newaxis, 2:-2, np.newaxis] * dyt[np.newaxis, 2:-2, np.newaxis]))
    # )
    print("add lateral diffusion")
    for x in range(tke.shape[0]):
        for y in range(tke.shape[1]):
            for z in range(tke.shape[2]):
                if x >= 2 and x < tke.shape[0] - 2 and y >= 2 and y < tke.shape[1] - 2:
                    tke[x,y,z,taup1] += dt_tke * maskW[x, y, z] * \
                        ((flux_east[x,y,z] - flux_east[x-1, y, z])
                        / (cost[y] * dxt[x])
                        + (flux_north[x,y,z] - flux_north[x, y-1, z])
                        / (cost[y] * dyt[y]))

    """
    add tendency due to advection
    """
    flux_east, flux_north, flux_top = adv_flux_superbee_wgrid(
        tke[:, :, :, tau], u[..., tau], v[..., tau], w[..., tau],
        maskW, dxt, dyt, dzw,
        cost, cosu, dt_tracer
    )

    # dtke = jax.ops.index_update(
    #     dtke, jax.ops.index[2:-2, 2:-2, :, tau],
    #     maskW[2:-2, 2:-2, :] * (-(flux_east[2:-2, 2:-2, :] - flux_east[1:-3, 2:-2, :])
    #     / (cost[jnp.newaxis, 2:-2, jnp.newaxis] * dxt[2:-2, jnp.newaxis, jnp.newaxis])
    #     - (flux_north[2:-2, 2:-2, :] - flux_north[2:-2, 1:-3, :])
    #     / (cost[jnp.newaxis, 2:-2, jnp.newaxis] * dyt[jnp.newaxis, 2:-2, jnp.newaxis]))
    # )

    print("Adding to dtke")
    for x in range(dtke.shape[0]):
        for y in range(dtke.shape[1]):
            for z in range(dtke.shape[2]):
                if x >= 2 and x < dtke.shape[0] - 2 and y >= 2 and y < dtke.shape[1] - 2:
                    dtke[x,y,z,tau] = maskW[x,y,z] * (-(flux_east[x,y,z] - flux_east[x-1, y, z]) \
                        / (cost[y] * dxt[x]) \
                        - (flux_north[x,y,z] - flux_north[x, y-1, z])                      \
                        / (cost[y] * dyt[y]))
                if z == 0:
                    dtke[x,y,z,tau] -= flux_top[x, y, 0] / dzw[0]
                if z >= 1 and z < dtke.shape[2]-1:
                    dtke[x,y,z,tau] -= (flux_top[x, y, z] - flux_top[x, y, z-1]) / dzw[z]
                if  z == dtke.shape[2]-1:
                    dtke[x,y,z,tau] -= (flux_top[x, y, z] - flux_top[x, y, z-1]) / \
                                            (0.5 * dzw[z])

                tke[x,y,z, taup1] += dt_tracer * ((1.5 + AB_eps) * dtke[x, y, z, tau] - (0.5 + AB_eps) * dtke[x, y, z, taum1])
    

    # dtke = jax.ops.index_add(
    #     dtke, jax.ops.index[:, :, 0, tau],
    #     -flux_top[:, :, 0] / dzw[0]
    # )
    # dtke = jax.ops.index_add(
    #     dtke, jax.ops.index[:, :, 1:-1, tau],
    #     -(flux_top[:, :, 1:-1] - flux_top[:, :, :-2]) / dzw[1:-1]
    # )
    # dtke = jax.ops.index_add(
    #     dtke, jax.ops.index[:, :, -1, tau],
    #     -(flux_top[:, :, -1] - flux_top[:, :, -2]) / \
    #     (0.5 * dzw[-1])
    # )

    # """
    # Adam Bashforth time stepping
    # """
    # tke = jax.ops.index_add(
    #     tke, jax.ops.index[:, :, :, taup1],
    #     dt_tracer * ((1.5 + AB_eps) * dtke[:, :, :, tau] - (0.5 + AB_eps) * dtke[:, :, :, taum1])
    # )

    return tke, dtke, tke_surf_corr
Пример #10
0
from typing import Any, Callable, Sequence, Optional
import flax
from flax.core import freeze, unfreeze
from flax import linen as nn

from jax.config import config
config.enable_omnistaging()  # Linen requires enabling omnistaging
# config.update("jax_enable_x64", True)# Enable complex128

key = random.PRNGKey(42)

N = 1  # single qubit
N1 = 10  # parameter space size

sz = jnp.array([[1, 0], [0, -1]], dtype=jnp.float32)

sx = jnp.array([[0, 1], [1, 0]], dtype=jnp.float32)


class ExplicitMLP(nn.Module):
    '''
    MLP class
    '''
    features: Sequence[int]

    def setup(self):
        self.layers = [nn.Dense(feat) for feat in self.features]

    def __call__(self, inputs):
        x = inputs
Пример #11
0
    def func(y, t, *args):
        omega, params, = args

        return -1.0j * (omega * sz + A.apply(params, jnp.array([t, omega])) *
                        sx) @ y  #Using DNN as control field
Пример #12
0
def load_prob_method_to_result(problem_ids=all_problems,
                               method_ids=all_methods,
                               problem_to_methods=None,
                               metrics='mse'):
    '''
    Description: Initializes the experiment instance. 

    Args:
        problem_ids (list): ids of problems to evaluate on
        method_ids (list): ids of methods to use
        problem_to_methods (dict): map of the form problem_id -> list of method_id. If None,
                                  then we assume that the user wants to test every method
                                  in method_to_params against every problem in problem_to_params
        metrics (list): metrics to load

     Returns:
        prob_method_to_result (dict): Dictionary containing results for all specified metrics and
                                     performance (time and memory usage) for all problem-method
                                     associations.
    '''

    if (problem_to_methods is None):
        problem_to_methods = create_full_problem_to_methods(
            problem_ids, method_ids)

    prob_method_to_result = {}
    ''' Get loss series '''
    for metric in metrics:
        for problem_id in problem_ids:
            # datapath for current metric and problem
            tigerforecast_dir = get_tigerforecast_dir()
            datapath = 'data/precomputed_results/' + metric + '_' + problem_id[:
                                                                               -3] + '.csv'
            datapath = os.path.join(tigerforecast_dir, datapath)

            with open(datapath) as csvfile:
                reader = csv.reader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
                method_no = 0
                for row in reader:
                    if (all_methods[method_no] in method_ids):
                        prob_method_to_result[(
                            metric, problem_id,
                            all_methods[method_no])] = np.array(row)
                    method_no += 1
            csvfile.close()
    ''' Get time and memory usage '''
    for problem_id in problem_ids:
        # datapath for current metric and problem
        tigerforecast_dir = get_tigerforecast_dir()
        datapath = 'data/precomputed_results/time_memory' + '_' + problem_id[:
                                                                             -3] + '.csv'
        datapath = os.path.join(tigerforecast_dir, datapath)

        with open(datapath) as csvfile:
            reader = csv.reader(csvfile, quoting=csv.QUOTE_NONNUMERIC)
            method_no = 0
            for row in reader:
                if (all_methods[method_no] in method_ids):
                    prob_method_to_result[('time', problem_id,
                                           all_methods[method_no])] = row[0]
                    prob_method_to_result[('memory', problem_id,
                                           all_methods[method_no])] = row[1]
                method_no += 1
        csvfile.close()

    return prob_method_to_result
Пример #13
0
 def setup(self):
     if self.goal_state is None:
         self.goal_state = jnp.array([0., -1., 0.])
Пример #14
0
 def jnp_fun(x, unpacked_indexer):
   indexer = pack_indexer(unpacked_indexer)
   return jnp.array(x)[indexer]
Пример #15
0
def debug_fft():
    from jax.config import config

    config.update("jax_enable_x64", True)

    import time
    import numpy as np

    import jax
    from jax import numpy as jnp

    np.random.seed(0)

    signal = np.random.randn(2 ** 20)
    signal_jax = jnp.array(signal)

    jfft = jax.jit(jnp.fft.fft)

    import tensorflow as tf
    signal_tf = tf.constant(signal, dtype=tf.complex128)

    def tffft(x):
        return tf.signal.fft(x).numpy()

    X_np = np.fft.fft(signal)
    X_jax = jfft(signal_jax)
    X_tf = tffft(signal_tf)


    print(np.mean(np.abs(X_np)))
    print("With JAX:")
    print('max:\t', jnp.max(jnp.abs(X_np - X_jax)))
    print('mean:\t', jnp.mean(jnp.abs(X_np - X_jax)))
    print('min:\t', jnp.min(jnp.abs(X_np - X_jax)))

    print("With Tensorflow:")
    print('max:\t', jnp.max(jnp.abs(X_np - X_tf)))
    print('mean:\t', jnp.mean(jnp.abs(X_np - X_tf)))
    print('min:\t', jnp.min(jnp.abs(X_np - X_tf)))

    ### CPU
    # 907.3490574884647
    # max:	 2.8773885332210747
    # mean:	 0.3903197564919141
    # min:	 2.4697454729898156e-05

    ### GPU
    # 907.3490574884647
    # max:	 0.001166179716824765
    # mean:	 0.00020841654559267488
    # min:	 2.741492442122853e-07

    R = 100
    ts = time.time()
    for i in range(R):
        _ = np.fft.fft(signal)
    print('numpy fft execution time [ms]:\t', (time.time() - ts) / R * 1000)

    # Compile
    _ = jfft(signal_jax).block_until_ready()

    ts = time.time()
    for i in range(R):
        _ = jfft(signal_jax).block_until_ready()
    print('jax fft execution time [ms]:\t', (time.time() - ts) / R * 1000)

    ts = time.time()
    for i in range(R):
        _ = tffft(signal_tf)
    print('tensorflow fft execution time [ms]:\t', (time.time() - ts) / R * 1000)
Пример #16
0
def line_search(f,
                xk,
                pk,
                old_fval=None,
                old_old_fval=None,
                gfk=None,
                c1=1e-4,
                c2=0.9,
                maxiter=20):
    """Inexact line search that satisfies strong Wolfe conditions.

  Algorithm 3.5 from Wright and Nocedal, 'Numerical Optimization', 1999, pg. 59-61

  Args:
    fun: function of the form f(x) where x is a flat ndarray and returns a real
      scalar. The function should be composed of operations with vjp defined.
    x0: initial guess.
    pk: direction to search in. Assumes the direction is a descent direction.
    old_fval, gfk: initial value of value_and_gradient as position.
    old_old_fval: unused argument, only for scipy API compliance.
    maxiter: maximum number of iterations to search
    c1, c2: Wolfe criteria constant, see ref.

  Returns: LineSearchResults
  """
    def restricted_func_and_grad(t):
        phi, g = jax.value_and_grad(f)(xk + t * pk)
        dphi = jnp.dot(g, pk)
        return phi, dphi, g

    if old_fval is None or gfk is None:
        phi_0, dphi_0, gfk = restricted_func_and_grad(0.)
    else:
        phi_0 = old_fval
        dphi_0 = jnp.dot(gfk, pk)

    def wolfe_one(a_i, phi_i):
        # actually negation of W1
        return phi_i > phi_0 + c1 * a_i * dphi_0

    def wolfe_two(dphi_i):
        return jnp.abs(dphi_i) <= -c2 * dphi_0

    state = _LineSearchState(
        done=False,
        failed=False,
        # algorithm begins at 1 as per Wright and Nocedal, however Scipy has a
        # bug and starts at 0. See https://github.com/scipy/scipy/issues/12157
        i=1,
        a_i1=0.,
        phi_i1=phi_0,
        dphi_i1=dphi_0,
        nfev=1 if (old_fval is None or gfk is None) else 0,
        ngev=1 if (old_fval is None or gfk is None) else 0,
        a_star=0.,
        phi_star=phi_0,
        dphi_star=dphi_0,
        g_star=gfk,
        saddle_point=False,
    )

    def body(state):
        # no amax in this version, we just double as in scipy.
        # unlike original algorithm we do our next choice at the start of this loop
        a_i = jnp.where(state.i == 1, 1., state.a_i1 * 2.)
        # if a_i <= 0 then something went wrong. In practice any really small step
        # length is a failure. Likely means the search pk is not good, perhaps we
        # are at a saddle point.
        saddle_point = a_i < 1e-5
        state = state._replace(failed=saddle_point, saddle_point=saddle_point)

        phi_i, dphi_i, g_i = restricted_func_and_grad(a_i)
        state = state._replace(nfev=state.nfev + 1, ngev=state.ngev + 1)

        star_to_zoom1 = wolfe_one(a_i, phi_i) | ((phi_i >= state.phi_i1) &
                                                 (state.i > 1))
        star_to_i = wolfe_two(dphi_i) & (~star_to_zoom1)
        star_to_zoom2 = (dphi_i >= 0.) & (~star_to_zoom1) & (~star_to_i)

        zoom1 = _zoom(restricted_func_and_grad, wolfe_one, wolfe_two,
                      state.a_i1, state.phi_i1, state.dphi_i1, a_i, phi_i,
                      dphi_i, gfk, ~star_to_zoom1)

        state = state._replace(nfev=state.nfev + zoom1.nfev,
                               ngev=state.ngev + zoom1.ngev)

        zoom2 = _zoom(restricted_func_and_grad, wolfe_one, wolfe_two, a_i,
                      phi_i, dphi_i, state.a_i1, state.phi_i1, state.dphi_i1,
                      gfk, ~star_to_zoom2)

        state = state._replace(nfev=state.nfev + zoom2.nfev,
                               ngev=state.ngev + zoom2.ngev)

        state = state._replace(
            done=star_to_zoom1 | state.done,
            failed=(star_to_zoom1 & zoom1.failed) | state.failed,
            **_binary_replace(
                star_to_zoom1,
                state._asdict(),
                zoom1._asdict(),
                keys=['a_star', 'phi_star', 'dphi_star', 'g_star'],
            ),
        )
        state = state._replace(
            done=star_to_i | state.done,
            **_binary_replace(
                star_to_i,
                state._asdict(),
                dict(
                    a_star=a_i,
                    phi_star=phi_i,
                    dphi_star=dphi_i,
                    g_star=g_i,
                ),
            ),
        )
        state = state._replace(
            done=star_to_zoom2 | state.done,
            failed=(star_to_zoom2 & zoom2.failed) | state.failed,
            **_binary_replace(
                star_to_zoom2,
                state._asdict(),
                zoom2._asdict(),
                keys=['a_star', 'phi_star', 'dphi_star', 'g_star'],
            ),
        )
        state = state._replace(i=state.i + 1,
                               a_i1=a_i,
                               phi_i1=phi_i,
                               dphi_i1=dphi_i)
        return state

    state = while_loop(
        lambda state: (~state.done) & (state.i <= maxiter) & (~state.failed),
        body, state)

    status = jnp.where(
        state.failed & (~state.saddle_point),
        jnp.array(1),  # zoom failed
        jnp.where(
            state.failed & state.saddle_point,
            jnp.array(2),  # saddle point reached,
            jnp.where(
                state.i > maxiter,
                jnp.array(3),  # maxiter reached
                jnp.array(0),  # passed (should be)
            ),
        ),
    )
    results = _LineSearchResults(
        failed=state.failed | (~state.done),
        nit=state.i - 1,  # because iterations started at 1
        nfev=state.nfev,
        ngev=state.ngev,
        k=state.i,
        a_k=state.a_star,
        f_k=state.phi_star,
        g_k=state.g_star,
        status=status,
    )
    return results
Пример #17
0
def invm_plus(Pb,Pc):
    Pbc = Pb + Pc
    _Pbc = Pbc * np.array([-1,-1,-1,1])
    return np.sum(Pbc * _Pbc,axis=1)
Пример #18
0
    def supervised_optimization(self,
                                sup_density_list,
                                wiring_str,
                                save_supervised_result_bool,
                                dataset_str,
                                EXPLOITATION_NUM_EPOCHS,
                                EXPLOITATION_BATCH_SIZE,
                                OPTIMIZER_STR,
                                STEP_SIZE,
                                REG,
                                W_initializers_str='glorot_normal()',
                                b_initializers_str='normal()',
                                init_weight_rescale_bool=False,
                                EXPLOITATION_VALIDATION_FRACTION=0.1,
                                EXPLOIT_TRAIN_DATASET_FRACTION=1.0,
                                RECORD_ACC_FREQ=100,
                                DROPOUT_LAYER_POS=[],
                                **kwargs):
        """ 
        Train a neural network with loaded wiring from scratch.

        Args: 
            sup_density_list: a list of network density levels
            wiring_str: a string that represents the network wiring, e.g., trans, rand, snip
            dataset_str: a string used to retreive the dataset
            EXPLOITATION_NUM_EPOCHS: the number of epochs used in supervsied training
            EXPLOITATION_BATCH_SIZE: the batch size used in supervsied training
            OPTIMIZER_STR: a string used to retreive the optimzier
            STEP_SIZE: step size of the optimizer
            REG: l2 regularization constant
            EXPLOITATION_VALIDATION_FRACTION: the fraction of training data held out for validation purpose
            EXPLOIT_TRAIN_DATASET_FRACTION: the fraction of training data used in evaluation. 
            RECORD_ACC_FREQ: the frequency for recording train and test results

        Returns:
            train_acc_list_runs: a list of training accuracy
            test_acc_list_runs: a list of testing accuracy
        """

        for density in sup_density_list:
            if density not in self.ntt_setup_dict['NN_DENSITY_LEVEL_LIST']:
                raise ValueError(
                    'The desired density level for supervised training is not used in NTT.'
                )

        dataset_info = Dataset(
            datasource=dataset_str,
            VALIDATION_FRACTION=EXPLOITATION_VALIDATION_FRACTION)

        dataset = dataset_info.dataset

        # configure the dataset
        gen_batches = dataset_info.data_stream(EXPLOITATION_BATCH_SIZE)

        batch_input_shape = [-1] + self.ntt_setup_dict['instance_input_shape']

        nr_training_samples = len(dataset['train']['input'])

        nr_training_samples_subset = int(nr_training_samples *
                                         EXPLOIT_TRAIN_DATASET_FRACTION)

        train_input = dataset['train'][
            'input'][:nr_training_samples_subset].reshape(batch_input_shape)
        train_label = dataset['train']['label'][:nr_training_samples_subset]

        test_input = dataset['test']['input'].reshape(batch_input_shape)
        test_label = dataset['test']['label']

        num_complete_batches, leftover = divmod(nr_training_samples,
                                                EXPLOITATION_BATCH_SIZE)

        num_mini_batches_per_epochs = num_complete_batches + bool(leftover)

        total_batch = EXPLOITATION_NUM_EPOCHS * num_mini_batches_per_epochs

        if len(DROPOUT_LAYER_POS) == 0:
            # in this case, dropout is NOT used
            init_fun_no_dropout, f_train = model_dict[self.model_str](
                W_initializers_str=W_initializers_str,
                b_initializers_str=b_initializers_str)
            f_test = f_train
            f_no_dropout = f_train
            key_dropout = None
            subkey_dropout = None

        else:
            # in this case, dropout is used
            _, f_train = model_dict[self.model_str + '_dropout'](
                mode='train',
                W_initializers_str=W_initializers_str,
                b_initializers_str=b_initializers_str)
            _, f_test = model_dict[self.model_str + '_dropout'](
                mode='test',
                W_initializers_str=W_initializers_str,
                b_initializers_str=b_initializers_str)

            init_fun_no_dropout, f_no_dropout = model_dict[self.model_str](
                W_initializers_str=W_initializers_str,
                b_initializers_str=b_initializers_str)

            key_dropout = random.PRNGKey(0)

        @jit
        def step(i, opt_state, x, y, masks, key):
            this_step_params = get_params(opt_state)
            masked_g = grad(softmax_cross_entropy_with_logits_l2_reg)(
                this_step_params,
                f_train,
                x,
                y,
                masks,
                L2_REG_COEFF=REG,
                key=key)
            return opt_update(i, masked_g, opt_state)

        train_results_dict = {}
        test_results_dict = {}
        trained_masked_dict = {}

        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)

        time.sleep(orig_random.uniform(1, 5))
        now_str = '__' + str(datetime.now().strftime("%D:%H:%M:%S")).replace(
            '/', ':')

        supervised_model_info = '[u]' + self.ntt_file_name + '_[s]' + dataset_str

        supervised_model_wiring_info = supervised_model_info + '_' + wiring_str

        supervised_model_wiring_dir = self.supervised_result_path + supervised_model_info + '/' + supervised_model_wiring_info + now_str

        if save_supervised_result_bool:

            while os.path.exists(supervised_model_wiring_dir):
                temp = supervised_model_wiring_dir + '_0'
                supervised_model_wiring_dir = temp
            # print(supervised_model_wiring_dir)
            os.makedirs(supervised_model_wiring_dir)

            logging.basicConfig(filename=supervised_model_wiring_dir +
                                "/supervised_learning_log.log",
                                format='%(asctime)s %(message)s',
                                filemode='w',
                                level=logging.DEBUG)
        else:
            logging.basicConfig(filename="supervised_learning_log.log",
                                format='%(asctime)s %(message)s',
                                filemode='w',
                                level=logging.DEBUG)

        for nn_density_level in sup_density_list:

            nn_density_level = onp.round(nn_density_level, 2)
            train_acc_list_runs = []
            test_acc_list_runs = []
            trained_masked_params_runs = []

            for run_index in range(1, self.ntt_setup_dict['NUM_RUNS'] + 1):

                if wiring_str == 'trans':
                    # load ntt masks and parameters
                    density_run_dir = '/' + 'density_' + str(
                        nn_density_level) + '/' + 'run_' + str(run_index)

                    transferred_masks_fileName = '/transferred_masks_' + self.model_str + density_run_dir.replace(
                        '/', '_') + '.npy'

                    transferred_param_fileName = '/transferred_params_' + self.model_str + density_run_dir.replace(
                        '/', '_') + '.npy'

                    masks = list(
                        np.load(self.ntt_result_path + density_run_dir +
                                transferred_masks_fileName,
                                allow_pickle=True))

                    masked_params = list(
                        np.load(self.ntt_result_path + density_run_dir +
                                transferred_param_fileName,
                                allow_pickle=True))

                elif wiring_str == 'rand':
                    # randomly initialize masks and parameters

                    _, params = init_fun_no_dropout(random.PRNGKey(run_index),
                                                    tuple(batch_input_shape))

                    masks = get_masks_from_jax_params(
                        params,
                        nn_density_level,
                        global_bool=self.ntt_setup_dict['GLOBAL_PRUNE_BOOL'],
                        magnitude_base_bool=False,
                        reshuffle_seed=run_index)

                    masked_params = get_sparse_params_filtered_by_masks(
                        params, masks)

                elif wiring_str == 'dense':
                    # randomly initialize masks and parameters

                    _, params = init_fun_no_dropout(random.PRNGKey(run_index),
                                                    tuple(batch_input_shape))

                    #                     masks = get_masks_from_jax_params(params, nn_density_level, global_bool = self.ntt_setup_dict['GLOBAL_PRUNE_BOOL'], magnitude_base_bool = False, reshuffle_seed = run_index)
                    logger.info("Dense net!!")

                    masks = None
                    masked_params = params

                elif wiring_str == 'snip':
                    # randomly initialize masks and parameters
                    if dataset_str == 'cifar-10':
                        num_examples_snip = 128
                    else:
                        num_examples_snip = 100

                    snip_input = dataset['train']['input'][:num_examples_snip]

                    snip_label = dataset['train']['label'][:num_examples_snip]

                    snip_batch = (snip_input, snip_label)

                    _, params = init_fun_no_dropout(random.PRNGKey(run_index),
                                                    tuple(batch_input_shape))

                    if not self.ntt_setup_dict['GLOBAL_PRUNE_BOOL']:
                        logger.info("Use layerwise snip")

                    masks = get_snip_masks(
                        params, nn_density_level, f_no_dropout, snip_batch,
                        batch_input_shape,
                        self.ntt_setup_dict['GLOBAL_PRUNE_BOOL'])

                    masked_params = get_sparse_params_filtered_by_masks(
                        params, masks)

                elif wiring_str == 'logit_snip':
                    # randomly initialize masks and parameters
                    if dataset_str == 'cifar-10':
                        num_examples_snip = 128
                    else:
                        num_examples_snip = 100

                    snip_input = dataset['train']['input'][:num_examples_snip]

                    _, params = init_fun_no_dropout(random.PRNGKey(run_index),
                                                    tuple(batch_input_shape))

                    masks = get_logit_snip_masks(
                        params, nn_density_level, f_no_dropout, snip_input,
                        batch_input_shape,
                        self.ntt_setup_dict['GLOBAL_PRUNE_BOOL'])
                    #                     get_snip_masks(params, nn_density_level, f_no_dropout, snip_batch, batch_input_shape)

                    masked_params = get_sparse_params_filtered_by_masks(
                        params, masks)

                else:
                    raise ValueError('The wiring string is undefined.')

            # optionally, add dropout layers #Test without dropout masks
                if len(DROPOUT_LAYER_POS) > 100:
                    dropout_masked_params = [
                        ()
                    ] * (len(masked_params) + len(DROPOUT_LAYER_POS))

                    dropout_masks = [[]] * (len(masked_params) +
                                            len(DROPOUT_LAYER_POS))

                    print(len(masked_params))  #check dropout position
                    #pprint(masked_params) # check

                    num_inserted = 0
                    for i in range(len(dropout_masked_params)):
                        if i in DROPOUT_LAYER_POS:
                            num_inserted += 1
                        else:
                            dropout_masked_params[i] = masked_params[
                                i - num_inserted]
                            dropout_masks[i] = masks[i - num_inserted]

                    masks = dropout_masks
                    masked_params = dropout_masked_params

                if init_weight_rescale_bool == True:
                    logger.info(
                        "Init weight rescaled: W_scaled = W/sqrt(nn_density_level)"
                    )
                    scaled_params = []

                    for i in range(len(masked_params)):
                        if len(masked_params[i]) == 2:
                            scaled_params.append(
                                (masked_params[i][0] *
                                 np.sqrt(1 / nn_density_level),
                                 masked_params[i][1]))
                        else:
                            scaled_params.append(masked_params[i])

                    masked_params = scaled_params

                optimizer_with_params = optimizer_dict[OPTIMIZER_STR](
                    step_size=STEP_SIZE)

                opt_init, opt_update, get_params = optimizer_with_params

                opt_state = opt_init(masked_params)

                train_acc_list = []

                test_acc_list = []

                itercount = itertools.count()

                for iteration in range(total_batch):

                    batch_xs, batch_ys = next(gen_batches)

                    batch_xs = batch_xs.reshape(batch_input_shape)

                    if key_dropout is not None:
                        key_dropout, subkey_dropout = random.split(key_dropout)

                    opt_state = step(next(itercount),
                                     opt_state,
                                     batch_xs,
                                     batch_ys,
                                     masks=masks,
                                     key=subkey_dropout)

                    if iteration % RECORD_ACC_FREQ == 0:

                        masked_trans_params = get_params(opt_state)

                        train_acc = accuracy(masked_trans_params, f_test,
                                             train_input, train_label,
                                             key_dropout)
                        test_acc = accuracy(masked_trans_params, f_test,
                                            test_input, test_label,
                                            key_dropout)

                        train_acc_list.append(train_acc)
                        test_acc_list.append(test_acc)

                        logger.info(
                            "NN density %.2f | Run %03d/%03d | Iteration %03d/%03d | Train acc %.2f%% | Test acc %.2f%%",
                            nn_density_level, run_index,
                            self.ntt_setup_dict['NUM_RUNS'], iteration + 1,
                            total_batch, train_acc * 100, test_acc * 100)

                trained_masked_trans_params = get_params(opt_state)

                train_acc_list_runs.append(train_acc_list)
                test_acc_list_runs.append(test_acc_list)
                trained_masked_params_runs.append(trained_masked_trans_params)

            train_acc_list_runs = np.array(train_acc_list_runs)
            test_acc_list_runs = np.array(test_acc_list_runs)

            train_results_dict[str(nn_density_level)] = train_acc_list_runs
            test_results_dict[str(nn_density_level)] = test_acc_list_runs
            trained_masked_dict[str(
                nn_density_level)] = trained_masked_params_runs

            if save_supervised_result_bool:

                supervised_model_wiring_dir_run = supervised_model_wiring_dir + '/density_' + str(
                    round(nn_density_level, 2)) + '/'

                while os.path.exists(supervised_model_wiring_dir_run):
                    temp = supervised_model_wiring_dir_run + '_0'
                    supervised_model_wiring_dir_run = temp

                os.makedirs(supervised_model_wiring_dir_run)

                model_summary_str = '[u]' + self.ntt_file_name + '_[s]' + dataset_str + '_density_' + str(
                    round(nn_density_level, 2))

                np.save(
                    supervised_model_wiring_dir_run + '/' +
                    'supervised_trained_' + model_summary_str, [
                        nn_density_level, train_acc_list_runs,
                        test_acc_list_runs, trained_masked_params_runs
                    ])

        output = dict(train_results=train_results_dict,
                      test_results=test_results_dict,
                      trained_params=trained_masked_dict)

        return output
Пример #19
0
chi = 0.3
# uB associated parameter
B = 2
# constant cost 
c_h = 0.5
# social welfare after the unemployment
welfare = 5
# tax rate before and after retirement
tau_L = 0.2
tau_R = 0.1
# number of states S
nS = 27


# probability of survival
Pa = jnp.array(np.load("constant/prob.npy"))
# deterministic income
detEarning = jnp.array(np.load("constant/detEarningHigh.npy"))
# Define transition matrix of economical states S
Ps = np.genfromtxt('constant/Ps.csv',delimiter=',')
fix = (np.sum(Ps, axis = 1) - 1)
for i in range(nS):
    for j in range(nS):
        if Ps[i,j] - fix[i] > 0:
            Ps[i,j] = Ps[i,j] - fix[i]
            break
Ps = jnp.array(Ps)
# The possible GDP growth, stock return, bond return
gkfe = np.genfromtxt('constant/gkfe.csv',delimiter=',')
gkfe = jnp.array(gkfe)
# GDP growth depending on current S state
Пример #20
0
    def test_sample_loss_fn(self):
        example = self._make_example()
        example = dataclasses.replace(
            example,
            edges=sparse_operator.SparseCoordOperator(
                input_indices=jnp.array([[0], [0], [0], [0], [1], [2], [0],
                                         [0]]),
                output_indices=jnp.array([[1, 2], [2, 3], [2, 2], [3, 0],
                                          [0, 2], [0, 3], [0, 0], [0, 0]]),
                values=jnp.array([1, 1, 1, 1, 1, 1, 0, 0])))

        @flax.nn.module
        def mock_model_def(example):
            del example
            side_outputs.SideOutput(
                -jnp.arange(5).astype("float32").reshape((1, 5)),
                name="one_sample_log_prob_per_edge_per_node")
            side_outputs.SideOutput(0.3, name="one_sample_reward_baseline")

            return model_util.safe_logit(
                jnp.array([
                    [0.0, 0.0, 0.0, 0.0, 0.0],
                    [0.0, 0.0, 1.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0, 1.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0, 0.0],
                    [0.0, 0.0, 0.0, 0.0, 0.0],
                ]))

        _, params = mock_model_def.init(jax.random.PRNGKey(0), example)
        mock_model = flax.nn.Model(mock_model_def, params)

        _, _, _, loss, metrics = train_edge_supervision_lib.sample_loss_fn(
            mock_model, (example, jax.random.PRNGKey(0)),
            target_edge_index=0,
            num_edge_types=3,
            num_rollouts=1,
            leave_one_out_baseline=False)

        np.testing.assert_allclose(metrics["reward"], 0.75, rtol=1e-5)
        np.testing.assert_allclose(metrics["shifted_reward"],
                                   0.75 - 0.3,
                                   rtol=1e-5)
        np.testing.assert_allclose(metrics["policy_log_prob"], -1.5, rtol=1e-5)
        np.testing.assert_allclose(metrics["learned_baseline"], 0.3, rtol=1e-5)
        np.testing.assert_allclose(metrics["baseline_penalty"],
                                   0.001 * (0.75 * (0.7 * 0.7) + 0.25 *
                                            (0.3 * 0.3)),
                                   rtol=1e-5)
        np.testing.assert_allclose(metrics["reinforce_term"],
                                   (0 * 0.7 + 1 * 0.7 + 2 * 0.7 + 3 * -0.3) /
                                   4,
                                   rtol=1e-5)

        np.testing.assert_allclose(loss,
                                   metrics["reinforce_term"] +
                                   metrics["baseline_penalty"],
                                   rtol=1e-5)

        (output_logits, targets, valid_mask, loss,
         metrics) = train_edge_supervision_lib.sample_loss_fn(
             mock_model, (example, jax.random.PRNGKey(0)),
             target_edge_index=0,
             num_edge_types=3,
             num_rollouts=20,
             leave_one_out_baseline=True)

        self.assertEqual(output_logits.shape, (5, 5))
        self.assertEqual(targets.shape, (5, 5))
        self.assertEqual(valid_mask.shape, (5, 5))

        np.testing.assert_allclose(metrics["reward"], 0.75, rtol=1e-5)
        np.testing.assert_allclose(metrics["shifted_reward"], 0, rtol=1e-5)
        np.testing.assert_allclose(metrics["learned_baseline"], 0.3, rtol=1e-5)
        np.testing.assert_allclose(metrics["baseline_penalty"], 0.0, rtol=1e-5)
Пример #21
0
 def testOneHotOutOfBound(self):
   actual = nn.one_hot(jnp.array([-1, 3]), 3)
   expected = jnp.array([[0., 0., 0.],
                        [0., 0., 0.]])
   self.assertAllClose(actual, expected)
Пример #22
0
import neos.transforms as transforms
import jax.numpy as jnp
import neos.models as models
import jax
import scipy.optimize
import neos.fit as fit
from neos.cls import cls_maker
import pyhf
import funnyscipy

bounds = jnp.array([[0, 10], [0, 20]])

# check that we map to inf space (i.e. -pi/2 to pi/2)
w = jnp.linspace(0, 10)
x = transforms.toinf(w, bounds[0])
print(x.min(), x.max())

# check that we can map very large values to bounded space
w = jnp.linspace(-1000, 1000, 1001)
x = transforms.to_bounded(w, bounds[0])
print(x.min(), x.max())

# define NLL functions in both parameter spaces


def make_nll_boundspace(hyperpars):
    s, b, db = hyperpars

    def nll_boundspace(pars):
        truth_pars = [0, 1]
        m = models.hepdata_like([s], [b], [db])
Пример #23
0
 def testOneHotCustomDtype(self):
   actual = nn.one_hot(jnp.array([0, 1, 2]), 3, dtype=jnp.bool_)
   expected = jnp.array([[True, False, False],
                        [False, True, False],
                        [False, False, True]])
   self.assertAllClose(actual, expected)
Пример #24
0
 def test_new(self):
     stat = metrics.MeanStat.new(jnp.array([2, 3, 1]), jnp.array([1, 0, 1]))
     npt.assert_array_equal(stat.accum, [2, 0, 1])
     npt.assert_array_equal(stat.weight, [1, 0, 1])
Пример #25
0
 def value(P, r, pis):
     return np.array([
         utils.value_functional(P, r, pi, discount) for pi in pis
     ])  # jax doesnt seem to like me changing the batch size to a vmap?!?
Пример #26
0
 def test_reduce(self):
     stat = metrics.MeanStat.new(jnp.array([1, 2, 4]), jnp.array([1, 1, 0]))
     reduced_stat = stat.reduce()
     self.assertEqual(reduced_stat.accum, 3)
     self.assertEqual(reduced_stat.weight, 2)
Пример #27
0
 def setUp(self):
   super().setUp()
   self.init_params = (jnp.array([1., 2.]), jnp.array([3., 4.]))
   self.per_step_updates = (jnp.array([500., 5.]), jnp.array([300., 3.]))
Пример #28
0
 def test_reduce(self):
     stat = metrics.SumStat.new(jnp.array([1, 2, 1]))
     reduced_stat = stat.reduce()
     self.assertEqual(reduced_stat.accum, 4)
Пример #29
0
def load_pretrained(*, pretrained_path, init_params, model_config, logger):
  """Loads/converts a pretrained checkpoint for fine tuning.
  
  Args:
    logger: Logger to use to output diagnostic messages.
    init_params: Parameters from model. Will be used for the head of the model
      and to verify that the model is compatible with the stored checkpoint.
    init_file: File pointing to pretrained checkpoint.
    model_config: Configuration of the model. Will be used to configure the
      head and rescale the position embeddings.

  Returns:
    Parameters like `init_params`, but loaded with pretrained weights from
    `init_file` and adapted accordingly.
  """

  restored_params = inspect_params(
      params=load(pretrained_path),
      expected=init_params,
      logger=logger,
      fail_if_extra=False,
      fail_if_missing=False)

  # The following allows implementing fine-tuning head variants depending on the
  # value of `representation_size` in the fine-tuning job:
  # - `None` : drop the whole head and attach a nn.Linear.
  # - same number as in pre-training means : keep the head but reset the last
  #    layer (logits) for the new task.
  if model_config.representation_size is None:
    if 'pre_logits' in restored_params:
      logger.info('load_pretrained: drop-head variant')
      restored_params['pre_logits'] = {}
  restored_params['head']['kernel'] = init_params['head']['kernel']
  restored_params['head']['bias'] = init_params['head']['bias']

  if 'posembed_input' in restored_params.get('Transformer', {}):
    # Rescale the grid of position embeddings. Param shape is (1,N,1024)
    posemb = restored_params['Transformer']['posembed_input']['pos_embedding']
    posemb_new = init_params['Transformer']['posembed_input']['pos_embedding']
    if posemb.shape != posemb_new.shape:
      logger.info('load_pretrained: resized variant: %s to %s', posemb.shape,
                  posemb_new.shape)
      ntok_new = posemb_new.shape[1]

      if model_config.classifier == 'token':
        posemb_tok, posemb_grid = posemb[:, :1], posemb[0, 1:]
        ntok_new -= 1
      else:
        posemb_tok, posemb_grid = posemb[:, :0], posemb[0]

      gs_old = int(np.sqrt(len(posemb_grid)))
      gs_new = int(np.sqrt(ntok_new))
      logger.info('load_pretrained: grid-size from %s to %s', gs_old, gs_new)
      posemb_grid = posemb_grid.reshape(gs_old, gs_old, -1)

      zoom = (gs_new / gs_old, gs_new / gs_old, 1)
      posemb_grid = scipy.ndimage.zoom(posemb_grid, zoom, order=1)
      posemb_grid = posemb_grid.reshape(1, gs_new * gs_new, -1)
      posemb = jnp.array(np.concatenate([posemb_tok, posemb_grid], axis=1))
      restored_params['Transformer']['posembed_input']['pos_embedding'] = posemb

  return restored_params
Пример #30
0
  def testNTK_NTKNNGPAgreement(self, train_shape, test_shape, network,
                               out_logits):
    _, x_test, x_train, y_train = self._get_inputs(out_logits, test_shape,
                                                   train_shape)
    _, _, ker_fun = _build_network(train_shape[1:], network, out_logits)

    reg = 1e-7
    predictor = predict.gradient_descent_mse_ensemble(ker_fun,
                                                      x_train,
                                                      y_train,
                                                      diag_reg=reg)

    ts = np.logspace(-2, 8, 10).reshape((5, 2))

    for t in (None, 'ts'):
      for x in (None, 'x_test'):
        with self.subTest(t=t, x=x):
          x = x if x is None else x_test
          t = t if t is None else ts

          ntk = predictor(t=t, get='ntk', x_test=x)

          # Test time broadcasting
          if t is not None:
            ntk_ind = np.array([predictor(t=t, get='ntk', x_test=x)
                                for t in t.ravel()]).reshape(
                                    t.shape + ntk.shape[2:])
            self.assertAllClose(ntk_ind, ntk)

          # Create a hacked kernel function that always returns the ntk kernel
          def always_ntk(x1, x2, get=('nngp', 'ntk')):
            out = ker_fun(x1, x2, get=('nngp', 'ntk'))
            if get == 'nngp' or get == 'ntk':
              return out.ntk
            else:
              return out._replace(nngp=out.ntk)

          predictor_ntk = predict.gradient_descent_mse_ensemble(always_ntk,
                                                                x_train,
                                                                y_train,
                                                                diag_reg=reg)

          ntk_nngp = predictor_ntk(t=t, get='nngp', x_test=x)

          # Test if you use nngp equations with ntk, you get the same mean
          self.assertAllClose(ntk, ntk_nngp)

          # Next test that if you go through the NTK code path, but with only
          # the NNGP kernel, we recreate the NNGP dynamics.
          # Create a hacked kernel function that always returns the nngp kernel
          def always_nngp(x1, x2, get=('nngp', 'ntk')):
            out = ker_fun(x1, x2, get=('nngp', 'ntk'))
            if get == 'nngp' or get == 'ntk':
              return out.nngp
            else:
              return out._replace(ntk=out.nngp)

          predictor_nngp = predict.gradient_descent_mse_ensemble(always_nngp,
                                                                 x_train,
                                                                 y_train,
                                                                 diag_reg=reg)

          nngp_cov = predictor(t=t,
                               get='nngp',
                               x_test=x,
                               compute_cov=True).covariance

          # test time broadcasting for covariance
          nngp_ntk_cov = predictor_nngp(t=t,
                                        get='ntk',
                                        x_test=x,
                                        compute_cov=True).covariance
          if t is not None:
            nngp_ntk_cov_ind = np.array(
                [predictor_nngp(t=t,
                                get='ntk',
                                x_test=x,
                                compute_cov=True).covariance for
                 t in t.ravel()]).reshape(t.shape + nngp_cov.shape[2:])
            self.assertAllClose(nngp_ntk_cov_ind, nngp_ntk_cov)

          # Test if you use ntk equations with nngp, you get the same cov
          # Although, due to accumulation of numerical errors, only roughly.
          self.assertAllClose(nngp_cov, nngp_ntk_cov)