Пример #1
0
    def __init__(self, kern, Z, num_outputs, mean_function, **kwargs):
        """
        A sparse variational GP layer with a Gaussian likelihood, where the 
        GP is integrated out

        :kern: The kernel for the layer (input_dim = D_in)
        :param Z: Inducing points (M, D_in)
        :param mean_function: The mean function
        :return:
        """

        Collapsed_Layer.__init__(self, **kwargs)
        self.feature = InducingPoints(Z)
        self.kern = kern
        self.mean_function = mean_function
        self.num_outputs = num_outputs
Пример #2
0
 def __init__(self, Z, mean_function, kern, num_latent=1, whiten=True, name=None):
     super(Latent, self).__init__(name=name)
     self.mean_function = mean_function
     self.kern = kern
     self.num_latent = num_latent
     M = Z.shape[0]
     # M = tf.print(M,[M,'any thing i want'],message='Debug message:',summarize=100)
     
     self.feature = InducingPoints(Z)
     num_inducing = len(self.feature)
     self.whiten = whiten
     
     self.q_mu = Parameter(np.zeros((num_inducing, self.num_latent), dtype=settings.float_type))
     
     q_sqrt = np.tile(np.eye(M)[None, :, :], [self.num_latent, 1, 1])
     transform = transforms.LowerTriangular(M, num_matrices=self.num_latent)
     self.q_sqrt = Parameter(q_sqrt, transform=transform)
Пример #3
0
    def __init__(self, kern, Z, num_outputs, mean_function,
                 white=False, input_prop_dim=None, **kwargs):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.

        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)

        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)

        The layer holds D_out independent GPs with the same kernel and inducing points.

        :param kern: The kernel for the layer (input_dim = D_in)
        :param Z: Inducing points (M, D_in)
        :param num_outputs: The number of GP outputs (q_mu is shape (M, num_outputs))
        :param mean_function: The mean function
        :return:
        """
        Layer.__init__(self, input_prop_dim, **kwargs)
        self.num_inducing = Z.shape[0]

        q_mu = np.zeros((self.num_inducing, num_outputs))
        self.q_mu = Parameter(q_mu)

        q_sqrt = np.tile(np.eye(self.num_inducing)[None, :, :], [num_outputs, 1, 1])
        transform = transforms.LowerTriangular(self.num_inducing, num_matrices=num_outputs)
        self.q_sqrt = Parameter(q_sqrt, transform=transform)

        self.feature = InducingPoints(Z)
        self.kern = kern
        self.mean_function = mean_function

        self.num_outputs = num_outputs
        self.white = white

        if not self.white:  # initialize to prior
            Ku = self.kern.compute_K_symm(Z)
            Lu = np.linalg.cholesky(Ku + np.eye(Z.shape[0])*settings.jitter)
            self.q_sqrt = np.tile(Lu[None, :, :], [num_outputs, 1, 1])

        self.needs_build_cholesky = True
Пример #4
0
def test_sample_conditional(session_tf, whiten):
    q_mu = np.random.randn(Data.M, Data.P)  # M x P
    q_sqrt = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    Z = Data.X[:Data.M, ...]  # M x D
    Xs = np.ones((int(10e5), Data.D), dtype=float_type)

    feature = InducingPoints(Z.copy())
    kernel = RBF(Data.D)

    values = {"Z": Z, "Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt}
    placeholders = _create_placeholder_dict(values)
    feed_dict = _create_feed_dict(placeholders, values)

    # Path 1
    sample = sample_conditional(placeholders["Xnew"],
                                placeholders["Z"],
                                kernel,
                                placeholders["q_mu"],
                                q_sqrt=placeholders["q_sqrt"],
                                white=whiten)
    value = session_tf.run(sample, feed_dict=feed_dict)

    # Path 2
    sample2 = sample_conditional(placeholders["Xnew"],
                                 feature,
                                 kernel,
                                 placeholders["q_mu"],
                                 q_sqrt=placeholders["q_sqrt"],
                                 white=whiten)
    value2 = session_tf.run(sample2, feed_dict=feed_dict)

    # check if mean and covariance of samples are similar
    np.testing.assert_array_almost_equal(np.mean(value, axis=0),
                                         np.mean(value2, axis=0),
                                         decimal=1)
    np.testing.assert_array_almost_equal(np.cov(value, rowvar=False),
                                         np.cov(value2, rowvar=False),
                                         decimal=1)
    def __init__(self, kern, Z, num_outputs, mean_function, dropout):
        """
        A sparse variational GP layer in whitened representation. This layer holds the kernel,
        variational parameters, inducing points and mean function.

        The underlying model at inputs X is
        f = Lv + mean_function(X), where v \sim N(0, I) and LL^T = kern.K(X)

        The variational distribution over the inducing points is
        q(v) = N(q_mu, q_sqrt q_sqrt^T)

        The layer holds D_out independent GPs with the same kernel and inducing points.

        :kern: The kernel for the layer (input_dim = D_in)
        :param q_mu: mean initialization (M, D_out)
        :param q_sqrt: sqrt of variance initialization (D_out,M,M)
        :param Z: Inducing points (M, D_in)
        :param mean_function: The mean function
        :return:
        """
        Parameterized.__init__(self)
        M = Z.shape[0]

        q_mu = np.zeros((M, num_outputs))
        q_mu = q_mu.astype(np.float64, copy=False)
        self.q_mu = Parameter(q_mu)

        q_sqrt = np.tile(np.eye(M)[None, :, :], [num_outputs, 1, 1])
        q_sqrt = q_sqrt.astype(np.float64, copy=False)
        transform = transforms.LowerTriangular(M, num_matrices=num_outputs)
        self.q_sqrt = Parameter(q_sqrt, transform=transform)

        self.feature = InducingPoints(Z)
        self.kern = kern
        self.mean_function = mean_function
        self.dropout = dropout
        self.q_mu_temp = q_mu
        self.q_sqrt_temp = q_sqrt
Пример #6
0
def test_separate_independent_mof(session_tf):
    """
    Same test as above but we use different (i.e. separate) inducing features
    for each of the output dimensions.
    """
    np.random.seed(0)

    # Model 1 (INefficient)
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    m1.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2 (efficient)
    q_mu_2 = np.random.randn(Data.M, Data.P)
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feat_list_2 = [
        InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P)
    ]
    feature_2 = mf.SeparateIndependentMof(feat_list_2)
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    m2.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3 (Inefficient): an idenitical feature is used P times,
    # and treated as a separate feature.
    q_mu_3 = np.random.randn(Data.M, Data.P)
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kern_list = [
        RBF(Data.D, variance=0.5, lengthscales=1.2) for _ in range(Data.P)
    ]
    kernel_3 = mk.SeparateIndependentMok(kern_list)
    feat_list_3 = [
        InducingPoints(Data.X[:Data.M, ...].copy()) for _ in range(Data.P)
    ]
    feature_3 = mf.SeparateIndependentMof(feat_list_3)
    m3 = SVGP(Data.X,
              Data.Y,
              kernel_3,
              Gaussian(),
              feature_3,
              q_mu=q_mu_3,
              q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    m3.q_mu.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
Пример #7
0
def test_shared_independent_mok(session_tf):
    """
    In this test we use the same kernel and the same inducing features
    for each of the outputs. The outputs are considered to be uncorrelated.
    This is how GPflow handled multiple outputs before the multioutput framework was added.
    We compare three models here:
        1) an ineffient one, where we use a SharedIndepedentMok with InducingPoints.
           This combination will uses a Kff of size N x P x N x P, Kfu if size N x P x M x P
           which is extremely inefficient as most of the elements are zero.
        2) efficient: SharedIndependentMok and SharedIndependentMof
           This combinations uses the most efficient form of matrices
        3) the old way, efficient way: using Kernel and InducingPoints
        Model 2) and 3) follow more or less the same code path.
    """
    # Model 1
    q_mu_1 = np.random.randn(Data.M * Data.P, 1)  # MP x 1
    q_sqrt_1 = np.tril(np.random.randn(Data.M * Data.P,
                                       Data.M * Data.P))[None,
                                                         ...]  # 1 x MP x MP
    kernel_1 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_1 = InducingPoints(Data.X[:Data.M, ...].copy())
    m1 = SVGP(Data.X,
              Data.Y,
              kernel_1,
              Gaussian(),
              feature_1,
              q_mu=q_mu_1,
              q_sqrt=q_sqrt_1)
    m1.set_trainable(False)
    m1.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m1, maxiter=Data.MAXITER)

    # Model 2
    q_mu_2 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_2 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_2 = RBF(Data.D, variance=0.5, lengthscales=1.2)
    feature_2 = InducingPoints(Data.X[:Data.M, ...].copy())
    m2 = SVGP(Data.X,
              Data.Y,
              kernel_2,
              Gaussian(),
              feature_2,
              q_mu=q_mu_2,
              q_sqrt=q_sqrt_2)
    m2.set_trainable(False)
    m2.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m2, maxiter=Data.MAXITER)

    # Model 3
    q_mu_3 = np.reshape(q_mu_1, [Data.M, Data.P])  # M x P
    q_sqrt_3 = np.array([
        np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.P)
    ])  # P x M x M
    kernel_3 = mk.SharedIndependentMok(
        RBF(Data.D, variance=0.5, lengthscales=1.2), Data.P)
    feature_3 = mf.SharedIndependentMof(
        InducingPoints(Data.X[:Data.M, ...].copy()))
    m3 = SVGP(Data.X,
              Data.Y,
              kernel_3,
              Gaussian(),
              feature_3,
              q_mu=q_mu_3,
              q_sqrt=q_sqrt_3)
    m3.set_trainable(False)
    m3.q_sqrt.set_trainable(True)
    gpflow.training.ScipyOptimizer().minimize(m3, maxiter=Data.MAXITER)

    check_equality_predictions(session_tf, [m1, m2, m3])
Пример #8
0
def build_model(ARGS, X, Y, apply_name=True):

    if ARGS.mode == 'CVAE':

        layers = []
        for l in ARGS.configuration.split('_'):
            try:
                layers.append(int(l))
            except:
                pass

        with defer_build():
            name = 'CVAE' if apply_name else None
            model = CVAE(X, Y, 1, layers, batch_size=ARGS.minibatch_size, name=name)

        model.compile()

        global_step = tf.Variable(0, dtype=tf.int32)
        op_increment = tf.assign_add(global_step, 1)

        lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, 0.98, staircase=True), dtype=tf.float64)
        op_adam = AdamOptimizer(lr).make_optimize_tensor(model)

        model.train_op = lambda s: s.run([op_adam, op_increment])
        model.init_op = lambda s: s.run(tf.variables_initializer([global_step]))
        model.global_step = global_step

        model.compile()

    else:
        N, D = X.shape

        # first layer inducing points
        if N > ARGS.M:
            Z = kmeans2(X, ARGS.M, minit='points')[0]
        else:
            M_pad = ARGS.M - N
            Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0)

        #################################### layers
        P = np.linalg.svd(X, full_matrices=False)[2]
        # PX = P.copy()

        layers = []
        # quad_layers = []

        DX = D
        DY = 1

        D_in = D
        D_out = D
        with defer_build():
            lik = Gaussian()
            lik.variance = ARGS.likelihood_variance

            if len(ARGS.configuration) > 0:
                for c, d in ARGS.configuration.split('_'):
                    if c == 'G':
                        num_gps = int(d)
                        A = np.zeros((D_in, D_out))
                        D_min = min(D_in, D_out)
                        A[:D_min, :D_min] = np.eye(D_min)
                        mf = Linear(A=A)
                        mf.b.set_trainable(False)

                        def make_kern():
                            k = RBF(D_in, lengthscales=float(D_in) ** 0.5, variance=1., ARD=True)
                            k.variance.set_trainable(False)
                            return k

                        PP = np.zeros((D_out, num_gps))
                        PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)]
                        ZZ = np.random.randn(ARGS.M, D_in)
                        ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]

                        kern = SharedMixedMok(make_kern(), W=PP)
                        inducing = MixedKernelSharedMof(InducingPoints(ZZ))

                        l = GPLayer(kern, inducing, num_gps, mean_function=mf)
                        if ARGS.fix_linear is True:
                            kern.W.set_trainable(False)
                            mf.set_trainable(False)

                        layers.append(l)

                        D_in = D_out

                    elif c == 'L':
                        d = int(d)
                        D_in += d
                        layers.append(LatentVariableLayer(d, XY_dim=DX+1))

            kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1., ARD=True)
            ZZ = np.random.randn(ARGS.M, D_in)
            ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]
            layers.append(GPLayer(kern, InducingPoints(ZZ), DY))


            #################################### model
            name = 'Model' if apply_name else None

            if ARGS.mode == 'VI':
                model = DGP_VI(X, Y, layers, lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)

            elif ARGS.mode == 'SGHMC':
                for layer in layers:
                    if hasattr(layer, 'q_sqrt'):
                        del layer.q_sqrt
                        layer.q_sqrt = None
                        layer.q_mu.set_trainable(False)

                model = DGP_VI(X, Y, layers, lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)


            elif ARGS.mode == 'IWAE':
                model = DGP_IWVI(X, Y, layers, lik,
                                 minibatch_size=ARGS.minibatch_size,
                                 num_samples=ARGS.num_IW_samples,
                                 name=name)



        global_step = tf.Variable(0, dtype=tf.int32)
        op_increment = tf.assign_add(global_step, 1)

        if not ('SGHMC' == ARGS.mode):
            for layer in model.layers[:-1]:
                if isinstance(layer, GPLayer):
                    layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5

            model.compile()

            #################################### optimization

            var_list = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]

            model.layers[-1].q_mu.set_trainable(False)
            model.layers[-1].q_sqrt.set_trainable(False)

            gamma = tf.cast(tf.train.exponential_decay(ARGS.gamma, global_step, 1000, ARGS.gamma_decay, staircase=True),
                            dtype=tf.float64)
            lr = tf.cast(tf.train.exponential_decay(ARGS.lr, global_step, 1000, ARGS.lr_decay, staircase=True), dtype=tf.float64)

            op_ng = NatGradOptimizer(gamma=gamma).make_optimize_tensor(model, var_list=var_list)

            op_adam = AdamOptimizer(lr).make_optimize_tensor(model)

            def train(s):
                s.run(op_increment)
                s.run(op_ng)
                s.run(op_adam)

            model.train_op = train
            model.init_op = lambda s: s.run(tf.variables_initializer([global_step]))
            model.global_step = global_step

        else:
            model.compile()

            hmc_vars = []
            for layer in layers:
                if hasattr(layer, 'q_mu'):
                    hmc_vars.append(layer.q_mu.unconstrained_tensor)

            hyper_train_op = AdamOptimizer(ARGS.lr).make_optimize_tensor(model)

            sghmc_optimizer = SGHMC(model, hmc_vars, hyper_train_op, 100)

            def train_op(s):
                s.run(op_increment),
                sghmc_optimizer.sghmc_step(s),
                sghmc_optimizer.train_hypers(s)

            model.train_op = train_op
            model.sghmc_optimizer = sghmc_optimizer
            def init_op(s):
                epsilon = 0.01
                mdecay = 0.05
                with tf.variable_scope('hmc'):
                    sghmc_optimizer.generate_update_step(epsilon, mdecay)
                v = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='hmc')
                s.run(tf.variables_initializer(v))
                s.run(tf.variables_initializer([global_step]))

            model.init_op = init_op
            model.global_step = global_step

    return model
Пример #9
0
def build_model(ARGS, X, Y, apply_name=True):
    N, D = X.shape

    # first layer inducing points
    if N > ARGS.M:
        Z = kmeans2(X, ARGS.M, minit="points")[0]
    else:
        M_pad = ARGS.M - N
        Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0)

    #################################### layers
    P = np.linalg.svd(X, full_matrices=False)[2]

    layers = []

    DX = D
    DY = 1

    D_in = D
    D_out = D
    with defer_build():
        lik = Gaussian()
        lik.variance = ARGS.likelihood_variance

        if len(ARGS.configuration) > 0:
            for c, d in ARGS.configuration.split("_"):
                if c == "G":
                    num_gps = int(d)
                    A = np.zeros((D_in, D_out))
                    D_min = min(D_in, D_out)
                    A[:D_min, :D_min] = np.eye(D_min)
                    mf = Linear(A=A)
                    mf.b.set_trainable(False)

                    def make_kern():
                        k = RBF(D_in,
                                lengthscales=float(D_in)**0.5,
                                variance=1.0,
                                ARD=True)
                        k.variance.set_trainable(False)
                        return k

                    PP = np.zeros((D_out, num_gps))
                    PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)]
                    ZZ = np.random.randn(ARGS.M, D_in)
                    ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]

                    kern = SharedMixedMok(make_kern(), W=PP)
                    inducing = MixedKernelSharedMof(InducingPoints(ZZ))

                    l = GPLayer(kern,
                                inducing,
                                num_gps,
                                layer_num=len(layers),
                                mean_function=mf)
                    if ARGS.fix_linear is True:
                        kern.W.set_trainable(False)
                        mf.set_trainable(False)

                    layers.append(l)

                    D_in = D_out

                elif c == "L":
                    d = int(d)
                    D_in += d
                    encoder_dims = [
                        int(dim.strip())
                        for dim in ARGS.encoder_dims.split(",")
                    ]
                    layers.append(
                        LatentVariableLayer(d,
                                            XY_dim=DX + 1,
                                            encoder_dims=encoder_dims,
                                            qz_mode=ARGS.qz_mode))

        kern = RBF(D_in, lengthscales=float(D_in)**0.5, variance=1.0, ARD=True)
        ZZ = np.random.randn(ARGS.M, D_in)
        ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]
        layers.append(GPLayer(kern, InducingPoints(ZZ), DY))

        #################################### model
        name = "Model" if apply_name else None

        if ARGS.mode == "VI":
            model = DGP_VI(X,
                           Y,
                           layers,
                           lik,
                           minibatch_size=ARGS.minibatch_size,
                           name=name)

        elif ARGS.mode == "IWAE":
            model = DGP_IWVI(
                X=X,
                Y=Y,
                layers=layers,
                likelihood=lik,
                minibatch_size=ARGS.minibatch_size,
                num_samples=ARGS.num_IW_samples,
                name=name,
                encoder_minibatch_size=ARGS.encoder_minibatch_size,
            )

        elif ARGS.mode == "CIWAE":
            model = DGP_CIWAE(
                X,
                Y,
                layers,
                lik,
                minibatch_size=ARGS.minibatch_size,
                num_samples=ARGS.num_IW_samples,
                name=name,
                beta=ARGS.beta,
            )

        else:
            raise ValueError(f"Unknown mode {ARGS.mode}.")

    global_step = tf.Variable(0, dtype=tf.int32)
    op_increment = tf.assign_add(global_step, 1)

    for layer in model.layers[:-1]:
        if isinstance(layer, GPLayer):
            layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5

    model.compile()

    #################################### optimization

    # Whether to train the final layer with the other parameters, using Adam, or by itself, using natural
    # gradients.
    if ARGS.use_nat_grad_for_final_layer:
        # Turn off training so the parameters are not optimised by Adam. We pass them directly to the natgrad
        # optimiser, which bypasses this flag.
        model.layers[-1].q_mu.set_trainable(False)
        model.layers[-1].q_sqrt.set_trainable(False)

        gamma = tf.cast(
            tf.train.exponential_decay(ARGS.gamma,
                                       global_step,
                                       1000,
                                       ARGS.gamma_decay,
                                       staircase=True),
            dtype=tf.float64,
        )
        final_layer_vars = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]
        final_layer_opt_op = NatGradOptimizer(
            gamma=gamma).make_optimize_tensor(model, var_list=final_layer_vars)
    else:
        final_layer_opt_op = NoOp()

    lr = tf.cast(
        tf.train.exponential_decay(ARGS.lr,
                                   global_step,
                                   decay_steps=1000,
                                   decay_rate=ARGS.lr_decay,
                                   staircase=True),
        dtype=tf.float64,
    )

    encoder_lr = tf.cast(
        tf.train.exponential_decay(
            ARGS.encoder_lr,
            global_step,
            decay_steps=1000,
            decay_rate=ARGS.encoder_lr_decay,
            staircase=True,
        ),
        dtype=tf.float64,
    )

    dreg_optimizer = DregOptimizer(
        enable_dreg=ARGS.use_dreg,
        optimizer=ARGS.optimizer,
        encoder_optimizer=ARGS.encoder_optimizer,
        learning_rate=lr,
        encoder_learning_rate=encoder_lr,
        assert_no_nans=ARGS.assert_no_nans,
        encoder_grad_clip_value=ARGS.clip_encoder_grads,
    )
    other_layers_opt_op = dreg_optimizer.make_optimize_tensor(model)

    model.lr = lr
    model.train_op = tf.group(op_increment, final_layer_opt_op,
                              other_layers_opt_op)
    model.init_op = lambda s: s.run(tf.variables_initializer([global_step]))
    model.global_step = global_step

    return model
    def _make_part_model(self,
                         X,
                         Y,
                         weights,
                         Z,
                         q_mu,
                         q_sqrt,
                         W,
                         freqs,
                         minibatch_size=None,
                         priors=None):
        """
        Create a gpflow model for a selection of data
        X: array (N, Din)
        Y: array (N, P, Nf)
        weights: array like Y the statistical weights of each datapoint
        minibatch_size : int 
        Z: list of array (M, Din)
            The inducing points mean locations.
        q_mu: list of array (M, L)
        q_sqrt: list of array (L, M, M)
        W: array [P,L]
        freqs: array [Nf,] the freqs
        priors : dict of priors for the global model
        Returns:
        model : gpflow.models.Model 
        """
        N, P, Nf = Y.shape
        _, Din = X.shape

        assert priors is not None
        likelihood_var = priors['likelihood_var']
        tec_kern_time_ls = priors['tec_kern_time_ls']
        tec_kern_dir_ls = priors['tec_kern_dir_ls']
        tec_kern_var = priors['tec_kern_var']
        tec_mean = priors['tec_mean']
        Z_var = priors['Z_var']

        P, L = W.shape

        with defer_build():

            # Define the likelihood
            likelihood = WrappedPhaseGaussianMulti(
                tec_scale=priors['tec_scale'], freqs=freqs)
            likelihood.variance = np.exp(likelihood_var[0])  #median as initial
            likelihood.variance.prior = LogNormal(likelihood_var[0],
                                                  likelihood_var[1]**2)
            likelihood.variance.set_trainable(True)

            def _kern():
                kern_thin_layer = ThinLayer(np.array([0., 0., 0.]),
                                            priors['tec_scale'],
                                            active_dims=slice(2, 6, 1))
                kern_time = Matern32(1, active_dims=slice(6, 7, 1))
                kern_dir = Matern32(2, active_dims=slice(0, 2, 1))

                ###
                # time kern
                kern_time.lengthscales = np.exp(tec_kern_time_ls[0])
                kern_time.lengthscales.prior = LogNormal(
                    tec_kern_time_ls[0], tec_kern_time_ls[1]**2)
                kern_time.lengthscales.set_trainable(True)

                kern_time.variance = 1.  #np.exp(tec_kern_var[0])
                #kern_time.variance.prior = LogNormal(tec_kern_var[0],tec_kern_var[1]**2)
                kern_time.variance.set_trainable(False)  #

                ###
                # directional kern
                kern_dir.variance = np.exp(tec_kern_var[0])
                kern_dir.variance.prior = LogNormal(tec_kern_var[0],
                                                    tec_kern_var[1]**2)
                kern_dir.variance.set_trainable(True)

                kern_dir.lengthscales = np.exp(tec_kern_dir_ls[0])
                kern_dir.lengthscales.prior = LogNormal(
                    tec_kern_dir_ls[0], tec_kern_dir_ls[1]**2)
                kern_dir.lengthscales.set_trainable(True)

                kern = kern_dir * kern_time  #(kern_thin_layer + kern_dir)*kern_time
                return kern

            kern = mk.SeparateMixedMok([_kern() for _ in range(L)], W)

            feature_list = []
            for _ in range(L):
                feat = InducingPoints(Z)
                #feat.Z.prior = Gaussian(Z,Z_var)
                feature_list.append(feat)
            feature = mf.MixedKernelSeparateMof(feature_list)

            mean = Zero()

            model = HomoscedasticPhaseOnlySVGP(weights,
                                               X,
                                               Y,
                                               kern,
                                               likelihood,
                                               feat=feature,
                                               mean_function=mean,
                                               minibatch_size=minibatch_size,
                                               num_latent=P,
                                               num_data=N,
                                               whiten=False,
                                               q_mu=q_mu,
                                               q_sqrt=q_sqrt)
            model.compile()
        return model
Пример #11
0
    def build_model(self,
                    ARGS,
                    X,
                    Y,
                    conditioning=False,
                    apply_name=True,
                    noise_var=None,
                    mean_function=None):

        if conditioning == False:
            N, D = X.shape

            # first layer inducing points
            if N > ARGS.M:
                Z = kmeans2(X, ARGS.M, minit='points')[0]
            else:
                # This is the old way of initializing Zs
                # M_pad = ARGS.M - N
                # Z = np.concatenate([X.copy(), np.random.randn(M_pad, D)], 0)

                # This is the new way of initializing Zs
                min_x, max_x = self.bounds[0]
                min_x = (min_x - self.x_mean) / self.x_std
                max_x = (max_x - self.x_mean) / self.x_std

                Z = np.linspace(min_x, max_x, num=ARGS.M)  # * X.shape[1])
                Z = Z.reshape((-1, X.shape[1]))
                #print(min_x)
                #print(max_x)
                #print(Z)

            #################################### layers
            P = np.linalg.svd(X, full_matrices=False)[2]
            # PX = P.copy()

            layers = []
            # quad_layers = []

            DX = D
            DY = 1

            D_in = D
            D_out = D

            with defer_build():

                # variance initialiaztion
                lik = Gaussian()
                lik.variance = ARGS.likelihood_variance

                if len(ARGS.configuration) > 0:
                    for c, d in ARGS.configuration.split('_'):
                        if c == 'G':
                            num_gps = int(d)
                            A = np.zeros((D_in, D_out))
                            D_min = min(D_in, D_out)
                            A[:D_min, :D_min] = np.eye(D_min)
                            mf = Linear(A=A)
                            mf.b.set_trainable(False)

                            def make_kern():
                                k = RBF(D_in,
                                        lengthscales=float(D_in)**0.5,
                                        variance=1.,
                                        ARD=True)
                                k.variance.set_trainable(False)
                                return k

                            PP = np.zeros((D_out, num_gps))
                            PP[:, :min(num_gps, DX)] = P[:, :min(num_gps, DX)]
                            ZZ = np.random.randn(ARGS.M, D_in)
                            # print(Z.shape)
                            # print(ZZ.shape)
                            ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]

                            kern = SharedMixedMok(make_kern(), W=PP)
                            inducing = MixedKernelSharedMof(InducingPoints(ZZ))

                            l = GPLayer(kern,
                                        inducing,
                                        num_gps,
                                        mean_function=mf)
                            if ARGS.fix_linear is True:
                                kern.W.set_trainable(False)
                                mf.set_trainable(False)

                            layers.append(l)

                            D_in = D_out

                        elif c == 'L':
                            d = int(d)
                            D_in += d
                            layers.append(LatentVariableLayer(d,
                                                              XY_dim=DX + 1))

                # kernel initialization
                kern = RBF(D_in,
                           lengthscales=float(D_in)**0.5,
                           variance=1.,
                           ARD=True)
                ZZ = np.random.randn(ARGS.M, D_in)
                ZZ[:, :min(D_in, DX)] = Z[:, :min(D_in, DX)]
                layers.append(GPLayer(kern, InducingPoints(ZZ), DY))
                self.layers = layers
                self.lik = lik

            # global_step = tf.Variable(0, dtype=tf.int32)
            # self.global_step = global_step
        else:
            lik = self._gp.likelihood
            layers = self._gp.layers._list
            # val = self.session.run(self.global_step)
            # global_step = tf.Variable(val, dtype=tf.int32)
            # self.global_step = global_step
            self._gp.clear()

        with defer_build():

            #################################### model
            name = 'Model' if apply_name else None

            if ARGS.mode == 'VI':
                model = DGP_VI(X,
                               Y,
                               layers,
                               lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)

            elif ARGS.mode == 'SGHMC':
                for layer in layers:
                    if hasattr(layer, 'q_sqrt'):
                        del layer.q_sqrt
                        layer.q_sqrt = None
                        layer.q_mu.set_trainable(False)

                model = DGP_VI(X,
                               Y,
                               layers,
                               lik,
                               minibatch_size=ARGS.minibatch_size,
                               name=name)

            elif ARGS.mode == 'IWAE':
                model = DGP_IWVI(X,
                                 Y,
                                 layers,
                                 lik,
                                 minibatch_size=ARGS.minibatch_size,
                                 num_samples=ARGS.num_IW_samples,
                                 name=name)

        global_step = tf.Variable(0, dtype=tf.int32)
        op_increment = tf.assign_add(global_step, 1)

        if not ('SGHMC' == ARGS.mode):
            for layer in model.layers[:-1]:
                if isinstance(layer, GPLayer):
                    layer.q_sqrt = layer.q_sqrt.read_value() * 1e-5

            model.compile()

            #################################### optimization

            var_list = [[model.layers[-1].q_mu, model.layers[-1].q_sqrt]]

            model.layers[-1].q_mu.set_trainable(False)
            model.layers[-1].q_sqrt.set_trainable(False)

            gamma = tf.cast(tf.train.exponential_decay(ARGS.gamma,
                                                       global_step,
                                                       1000,
                                                       ARGS.gamma_decay,
                                                       staircase=True),
                            dtype=tf.float64)
            lr = tf.cast(tf.train.exponential_decay(ARGS.lr,
                                                    global_step,
                                                    1000,
                                                    ARGS.lr_decay,
                                                    staircase=True),
                         dtype=tf.float64)

            op_ng = NatGradOptimizer(gamma=gamma).make_optimize_tensor(
                model, var_list=var_list)

            op_adam = AdamOptimizer(lr).make_optimize_tensor(model)

            def train(s):
                s.run(op_increment)
                s.run(op_ng)
                s.run(op_adam)

            model.train_op = train
            model.init_op = lambda s: s.run(
                tf.variables_initializer([global_step]))
            model.global_step = global_step

        else:
            model.compile()

            sghmc_vars = []
            for layer in layers:
                if hasattr(layer, 'q_mu'):
                    sghmc_vars.append(layer.q_mu.unconstrained_tensor)

            hyper_train_op = AdamOptimizer(ARGS.lr).make_optimize_tensor(model)

            self.sghmc_optimizer = SGHMC(model, sghmc_vars, hyper_train_op,
                                         100)

            def train_op(s):
                s.run(op_increment),
                self.sghmc_optimizer.sghmc_step(s),
                self.sghmc_optimizer.train_hypers(s)

            model.train_op = train_op
            model.sghmc_optimizer = self.sghmc_optimizer

            def init_op(s):
                epsilon = 0.01
                mdecay = 0.05
                with tf.variable_scope('sghmc'):
                    self.sghmc_optimizer.generate_update_step(epsilon, mdecay)
                v = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='sghmc')
                s.run(tf.variables_initializer(v))
                s.run(tf.variables_initializer([global_step]))

            # Added jitter due to input matrix invertability problems
            custom_config = gpflow.settings.get_settings()
            custom_config.numerics.jitter_level = 1e-8

            model.init_op = init_op
            model.global_step = global_step

        # build the computation graph for the gradient
        self.X_placeholder = tf.placeholder(tf.float64,
                                            shape=[None, X.shape[1]])
        self.Fs, Fmu, Fvar = model._build_predict(self.X_placeholder)
        self.mean_grad = tf.gradients(Fmu, self.X_placeholder)
        self.var_grad = tf.gradients(Fvar, self.X_placeholder)

        # calculated the gradient of the mean for the quantile-filtered distribution
        # print(Fs)
        # q = np.quantile(Fs, self.quantile, axis=0)
        # qFs = [f for f in Fs if f < q]
        # q_mean = np.mean(qFs, axis=0)
        # q_var = np.var(qFs, axis=0)
        # self.qmean_grad = tf.gradients(q_mean, self.X_placeholder)
        # self.qvar_grad = tf.gradients(q_var, self.X_placeholder)

        return model
Пример #12
0
def init_layers(graph_adj, node_feature, kernels, n_layers, all_layers_dim, num_inducing,
                gc_kernel=True, mean_function="linear", white=False, q_diag=False):

    assert mean_function in ["linear", "zero"]  # mean function must be linear or zero

    layers = []

    # get initial Z
    sparse_adj = tuple_to_sparse_matrix(graph_adj[0], graph_adj[1], graph_adj[2])
    X_running = node_feature.copy()

    for i in range(n_layers):

        tf.logging.info("initialize {}th layer".format(i + 1))

        dim_in = all_layers_dim[i]
        dim_out = all_layers_dim[i + 1]

        conv_X = sparse_adj.dot(X_running)
        Z_running = kmeans2(conv_X, num_inducing[i], minit="points")[0]

        kernel = kernels[i]

        if gc_kernel and kernel.gc_weight:
            # Z_running = pca(Z_running, kernel.base_kernel.input_dim)  # 将维度降到和输出维度一致
            X_dim = X_running.shape[1]
            kernel_input_dim = kernel.base_kernel.input_dim
            if X_dim > kernel_input_dim:
                Z_running = pca(Z_running, kernel.base_kernel.input_dim)  # 将维度降到和输出维度一致
            elif X_dim < kernel_input_dim:
                Z_running = np.concatenate([Z_running, np.zeros((Z_running.shape[0], kernel_input_dim - X_dim))], axis=1)

        # print(type(Z_running))
        # print(Z_running)

        if dim_in > dim_out:
            _, _, V = np.linalg.svd(X_running, full_matrices=False)
            W = V[:dim_out, :].T
        elif dim_in < dim_out:
            W = np.concatenate([np.eye(dim_in), np.zeros((dim_in, dim_out - dim_in))], 1)

        if mean_function == "zero":
            mf = Zero()
        else:

            if dim_in == dim_out:
                mf = Identity()
            else:
                mf = Linear(W)
                mf.set_trainable(False)

        # self.Ku = Kuu(GraphConvolutionInducingpoints(Z_running), kernel, jitter=settings.jitter)
        # print("successfully calculate Ku")
        if gc_kernel:
            feature = GraphConvolutionInducingpoints(Z_running)
        else:
            feature = InducingPoints(Z_running)

        layers.append(svgp_layer(kernel, Z_running, feature, dim_out, mf, gc_kernel, white=white, q_diag=q_diag))

        if dim_in != dim_out:
            # Z_running = Z_running.dot(W)
            X_running = X_running.dot(W)

    return layers