Пример #1
0
def test_mlgk_diag(caseitem):
    '''diagonal similarities'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)
        R = mlgk(G)

        D = mlgk.diag(G)
        assert (len(D) == 2)
        assert (D[0] == pytest.approx(R[0, 0], 1e-7))
        assert (D[1] == pytest.approx(R[1, 1], 1e-7))
        '''nodal diags'''
        R_nodal = mlgk(G, nodal=True)
        d_nodal = np.diag(R_nodal)**-0.5
        K_nodal = np.diag(d_nodal).dot(R_nodal).dot(np.diag(d_nodal))
        '''check submatrices'''
        n = np.array([len(g.nodes) for g in G])
        N = np.cumsum(n)
        start = N - n
        end = N
        assert (R_nodal.shape == (N[-1], N[-1]))
        assert (np.count_nonzero(R_nodal - R_nodal.T) == 0)
        for k, (i, j) in enumerate(zip(N - n, N)):
            gnd = MLGK(G[k], knode, kedge, q, q, nodal=True).ravel()
            sub = R_nodal[i:j, :][:, i:j].ravel()
            for r1, r2 in zip(sub, gnd):
                assert (r1 == pytest.approx(r2, 1e-5))
        for i in range(N[-1]):
            assert (K_nodal[i, i] == pytest.approx(1, 1e-7))
        '''check block-diags'''
        D_nodal = mlgk.diag(G, nodal=True)
        assert (len(D_nodal) == N[-1])
        for k in range(2):
            i = start[k]
            j = end[k]
            sub = D_nodal[i:j]
            gnd = np.diag(R_nodal[i:j, :][:, i:j])
            for r1, r2 in zip(sub, gnd):
                assert (r1 == pytest.approx(r2, 1e-7))
Пример #2
0
def test_mlgk_diag_gradient(caseitem, nodal):
    '''derivative w.r.t. hyperparameters'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

        R, dR = mlgk.diag(G, nodal=nodal, eval_gradient=True)

        assert (len(dR.shape) == 2)
        assert (R.shape[0] == dR.shape[0])
        assert (dR.shape[1] >= 1)

        for i in range(len(mlgk.theta)):

            theta = mlgk.theta

            eps = 1e-3

            t = np.copy(theta)
            t[i] += eps
            mlgk.theta = t
            Rr = mlgk.diag(G, nodal=nodal, eval_gradient=False)

            t = np.copy(theta)
            t[i] -= eps
            mlgk.theta = t
            Rl = mlgk.diag(G, nodal=nodal, eval_gradient=False)

            mlgk.theta = theta

            dR_dLogt = (Rr - Rl) / (2 * eps)
            dLogt_dt = 1 / np.exp(theta)[i]
            dR_dt = dR_dLogt * dLogt_dt

            for a, b in zip(dR[:, i].ravel(), dR_dt.ravel()):
                assert (a == pytest.approx(b, rel=0.05, abs=0.05))
Пример #3
0
class Tang2019MolecularKernel:
    """A margianlized graph kernel for **3D molecular structures** as in:
    Tang, Y. H., & de Jong, W. A. (2019). Prediction of atomization energy
    using graph kernel and active learning. *The Journal of chemical physics*,
    150(4), 044107.
    The kernel can be directly used together with Graph.from_ase() to operate
    on molecular structures.

    Parameters
    ----------
    stopping_probability: float in (0, 1)
        The probability for the random walk to stop during each step.
    starting_probability: float
        The probability for the random walk to start from any node. See the `p`
        kwarg of :class:`graphdot.kernel.marginalized.MarginalizedGraphKernel`
    element_prior: float in (0, 1)
        The baseline similarity between distinct elements --- an element
        always have a similarity 1 to itself.
    edge_length_scale: float in (0, inf)
        length scale of the Gaussian kernel on edge length. A rule of thumb is
        that the similarity decays smoothly from 1 to nearly 0 around three
        times of the length scale.
    """
    def __init__(self,
                 stopping_probability=0.01,
                 starting_probability='uniform',
                 element_prior=0.2,
                 edge_length_scale=0.05,
                 **kwargs):
        self.stopping_probability = stopping_probability
        self.starting_probability = starting_probability
        self.element_prior = element_prior
        self.edge_length_scale = edge_length_scale
        self._makekernel(**kwargs)

    def _makekernel(self, **kwargs):
        self.kernel = MarginalizedGraphKernel(
            TensorProduct(element=KroneckerDelta(self.element_prior, 1.0)),
            TensorProduct(length=SquareExponential(self.edge_length_scale)),
            q=self.stopping_probability,
            p=self.starting_probability,
            **kwargs)

    def __call__(self, X, Y=None, **kwargs):
        """Same call signature as
        :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.__call__`
        """
        return self.kernel(X, Y, **kwargs)

    def diag(self, X, **kwargs):
        """Same call signature as
        :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.diag`
        """
        return self.kernel.diag(X, **kwargs)
Пример #4
0
def test_mlgk_dtype():
    g = nx.Graph()
    n = 8
    for i, row in enumerate(np.random.randint(0, 2, (n, n))):
        g.add_node(i, type=0)
        for j, pred in enumerate(row[:i]):
            if pred:
                g.add_edge(i, j, weight=1)

    dfg = Graph.from_networkx(g, weight='weight')

    q = 0.5
    node_kernel = TensorProduct(type=KroneckerDelta(1.0))
    edge_kernel = Constant(1.0)

    for dtype in [np.float, np.float32, np.float64]:
        mlgk = MarginalizedGraphKernel(node_kernel,
                                       edge_kernel,
                                       q=q,
                                       dtype=dtype)

        assert (mlgk([dfg]).dtype == dtype)
        assert (mlgk.diag([dfg]).dtype == dtype)