Пример #1
0
 def _makekernel(self, **kwargs):
     self.kernel = MarginalizedGraphKernel(
         TensorProduct(element=KroneckerDelta(self.element_prior)),
         TensorProduct(length=SquareExponential(self.edge_length_scale)),
         q=self.stopping_probability,
         p=self.starting_probability,
         **kwargs)
Пример #2
0
def test_mlgk_starting_probability(caseitem):
    '''custom starting probability'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q, p=lambda node: 2.0)
        R = mlgk(G)
        R_nodal = mlgk(G, nodal=True)
        gnd_R00 = MLGK(G[0], knode, kedge, q, q) * 2.0**2
        gnd_R11 = MLGK(G[1], knode, kedge, q, q) * 2.0**2
        assert (R[0, 0] == pytest.approx(gnd_R00, 1e-5))
        assert (R[1, 1] == pytest.approx(gnd_R11, 1e-5))

        n = np.array([len(g.nodes) for g in G])
        N = np.cumsum(n)
        start = N - n
        end = N
        for i1, j1, g1 in zip(start, end, G):
            for i2, j2, g2 in zip(start, end, G):
                gnd = R_nodal[i1:j1, :][:, i2:j2]
                sub = mlgk([g1], [g2], nodal=True)
                for r1, r2 in zip(sub, gnd):
                    assert (r1 == pytest.approx(r2, 1e-5))
Пример #3
0
def test_mlgk_on_permuted_graph():
    g = Graph.from_ase(molecule('C6H6'))
    for _ in range(10):
        h = g.permute(np.random.permutation(len(g.nodes)))
        kernel = MarginalizedGraphKernel(
            TensorProduct(element=KroneckerDelta(0.5)),
            TensorProduct(length=SquareExponential(0.1)))
        assert (kernel([g], [h]).item() == pytest.approx(kernel([g]).item()))
Пример #4
0
def test_mlgk_diag(caseitem):
    '''diagonal similarities'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)
        R = mlgk(G)

        D = mlgk.diag(G)
        assert (len(D) == 2)
        assert (D[0] == pytest.approx(R[0, 0], 1e-7))
        assert (D[1] == pytest.approx(R[1, 1], 1e-7))
        '''nodal diags'''
        R_nodal = mlgk(G, nodal=True)
        d_nodal = np.diag(R_nodal)**-0.5
        K_nodal = np.diag(d_nodal).dot(R_nodal).dot(np.diag(d_nodal))
        '''check submatrices'''
        n = np.array([len(g.nodes) for g in G])
        N = np.cumsum(n)
        start = N - n
        end = N
        assert (R_nodal.shape == (N[-1], N[-1]))
        assert (np.count_nonzero(R_nodal - R_nodal.T) == 0)
        for k, (i, j) in enumerate(zip(N - n, N)):
            gnd = MLGK(G[k], knode, kedge, q, q, nodal=True).ravel()
            sub = R_nodal[i:j, :][:, i:j].ravel()
            for r1, r2 in zip(sub, gnd):
                assert (r1 == pytest.approx(r2, 1e-5))
        for i in range(N[-1]):
            assert (K_nodal[i, i] == pytest.approx(1, 1e-7))
        '''check block-diags'''
        D_nodal = mlgk.diag(G, nodal=True)
        assert (len(D_nodal) == N[-1])
        for k in range(2):
            i = start[k]
            j = end[k]
            sub = D_nodal[i:j]
            gnd = np.diag(R_nodal[i:j, :][:, i:j])
            for r1, r2 in zip(sub, gnd):
                assert (r1 == pytest.approx(r2, 1e-7))
Пример #5
0
def test_mlgk_kernel_range_check():
    MarginalizedGraphKernel(
        node_kernel=KroneckerDelta(1e-7),
        edge_kernel=TensorProduct(attribute=SquareExponential(1.0)))
    MarginalizedGraphKernel(
        node_kernel=TensorProduct(feature=KroneckerDelta(0.5)),
        edge_kernel=TensorProduct(attribute=SquareExponential(1.0)))
    with pytest.warns(DeprecationWarning):
        MarginalizedGraphKernel(
            node_kernel=KroneckerDelta(0),
            edge_kernel=TensorProduct(attribute=SquareExponential(1.0)))
    with pytest.warns(DeprecationWarning):
        MarginalizedGraphKernel(
            node_kernel=TensorProduct(feature=KroneckerDelta(0.5)) + 1,
            edge_kernel=SquareExponential(1.0))
    with pytest.warns(DeprecationWarning):
        MarginalizedGraphKernel(
            node_kernel=TensorProduct(feature=KroneckerDelta(0.5)),
            edge_kernel=TensorProduct(attribute=SquareExponential(1.0)) + 1)
    with pytest.warns(DeprecationWarning):
        MarginalizedGraphKernel(
            node_kernel=KroneckerDelta(0.5) * 2,
            edge_kernel=TensorProduct(attribute=SquareExponential(1.0)))
    with pytest.warns(DeprecationWarning):
        MarginalizedGraphKernel(
            node_kernel=TensorProduct(feature=KroneckerDelta(0.5)),
            edge_kernel=TensorProduct(attribute=SquareExponential(1.0)) * 2)
Пример #6
0
def test_mlgk_gradient(caseitem, nodal):
    '''derivative w.r.t. hyperparameters'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

        np.set_printoptions(precision=4, linewidth=999, suppress=True)

        R, dR = mlgk(G, nodal=nodal, eval_gradient=True)

        assert (len(dR.shape) == 3)
        assert (R.shape[0] == dR.shape[0])
        assert (R.shape[1] == dR.shape[1])
        assert (dR.shape[2] >= 1)

        for i in range(len(mlgk.theta)):

            theta = mlgk.theta

            eps = 1e-3

            t = np.copy(theta)
            t[i] += eps
            mlgk.theta = t
            Rr = mlgk(G, nodal=nodal)

            t = np.copy(theta)
            t[i] -= eps
            mlgk.theta = t
            Rl = mlgk(G, nodal=nodal)

            mlgk.theta = theta

            dR_dLogt = (Rr - Rl) / (2 * eps)
            dLogt_dt = 1 / np.exp(theta)[i]
            dR_dt = dR_dLogt * dLogt_dt

            assert np.allclose(dR[:, :, i], dR_dt, rtol=0.05, atol=0.05)
Пример #7
0
class Tang2019MolecularKernel:
    """A margianlized graph kernel for **3D molecular structures** as in:
    Tang, Y. H., & de Jong, W. A. (2019). Prediction of atomization energy
    using graph kernel and active learning. *The Journal of chemical physics*,
    150(4), 044107.
    The kernel can be directly used together with Graph.from_ase() to operate
    on molecular structures.

    Parameters
    ----------
    stopping_probability: float in (0, 1)
        The probability for the random walk to stop during each step.
    starting_probability: float
        The probability for the random walk to start from any node. See the `p`
        kwarg of :class:`graphdot.kernel.marginalized.MarginalizedGraphKernel`
    element_prior: float in (0, 1)
        The baseline similarity between distinct elements --- an element
        always have a similarity 1 to itself.
    edge_length_scale: float in (0, inf)
        length scale of the Gaussian kernel on edge length. A rule of thumb is
        that the similarity decays smoothly from 1 to nearly 0 around three
        times of the length scale.
    """
    def __init__(self,
                 stopping_probability=0.01,
                 starting_probability='uniform',
                 element_prior=0.2,
                 edge_length_scale=0.05,
                 **kwargs):
        self.stopping_probability = stopping_probability
        self.starting_probability = starting_probability
        self.element_prior = element_prior
        self.edge_length_scale = edge_length_scale
        self._makekernel(**kwargs)

    def _makekernel(self, **kwargs):
        self.kernel = MarginalizedGraphKernel(
            TensorProduct(element=KroneckerDelta(self.element_prior, 1.0)),
            TensorProduct(length=SquareExponential(self.edge_length_scale)),
            q=self.stopping_probability,
            p=self.starting_probability,
            **kwargs)

    def __call__(self, X, Y=None, **kwargs):
        """Same call signature as
        :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.__call__`
        """
        return self.kernel(X, Y, **kwargs)

    def diag(self, X, **kwargs):
        """Same call signature as
        :py:meth:`graphdot.kernel.marginalized.MarginalizedGraphKernel.diag`
        """
        return self.kernel.diag(X, **kwargs)
Пример #8
0
def test_mlgk_fixed_hyperparameters():

    g = nx.Graph()
    g.add_node(0, feature=0)
    g.add_node(1, feature=1)
    g.add_node(2, feature=0)
    g.add_edge(0, 1, attribute=1.0)
    g.add_edge(0, 2, attribute=2.0)

    G = [Graph.from_networkx(g)]
    knodeV = TensorProduct(feature=KroneckerDelta(0.5))
    knodeF = TensorProduct(feature=KroneckerDelta(0.5, h_bounds='fixed'))
    kedgeV = TensorProduct(attribute=SquareExponential(1.0))
    kedgeF = TensorProduct(
        attribute=SquareExponential(1.0, length_scale_bounds='fixed'))

    kernelVV = MarginalizedGraphKernel(knodeV, kedgeV)
    kernelVF = MarginalizedGraphKernel(knodeV, kedgeF)
    kernelFV = MarginalizedGraphKernel(knodeF, kedgeV)
    kernelFF = MarginalizedGraphKernel(knodeF, kedgeF)
    assert (len(kernelVV.theta) == len(kernelVF.theta) + 1)
    assert (len(kernelVV.theta) == len(kernelFV.theta) + 1)
    assert (len(kernelVV.theta) == len(kernelFF.theta) + 2)
    assert (len(kernelVV.bounds) == len(kernelVF.bounds) + 1)
    assert (len(kernelVV.bounds) == len(kernelFV.bounds) + 1)
    assert (len(kernelVV.bounds) == len(kernelFF.bounds) + 2)

    Rvv, dRvv = kernelVV(G, eval_gradient=True)
    Rvf, dRvf = kernelVF(G, eval_gradient=True)
    Rfv, dRfv = kernelFV(G, eval_gradient=True)
    Rff, dRff = kernelFF(G, eval_gradient=True)

    assert (Rvv == pytest.approx(Rvf))
    assert (Rvv == pytest.approx(Rfv))
    assert (Rvv == pytest.approx(Rff))
    assert (dRvv.shape[2] == dRvf.shape[2] + 1)
    assert (dRvv.shape[2] == dRfv.shape[2] + 1)
    assert (dRvv.shape[2] == dRff.shape[2] + 2)
    assert (dRvv[:, :, kernelVF.active_theta_mask] == pytest.approx(dRvf))
    assert (dRvv[:, :, kernelFV.active_theta_mask] == pytest.approx(dRfv))
    assert (dRvv[:, :, kernelFF.active_theta_mask] == pytest.approx(dRff))
Пример #9
0
def test_marginalized_graph_kernel_2nd_launch(benchmark, batch):

    graphs = [Graph.from_networkx(g, weight='weight')
              for g in make_graphs(batch, 48)]

    knode = TensorProduct(label=KroneckerDelta(0.5))
    kedge = TensorProduct(label=KroneckerDelta(0.5))
    kernel = MarginalizedGraphKernel(knode, kedge)

    def fun():
        kernel(graphs, nodal=False)

    benchmark.pedantic(fun, iterations=3, rounds=3, warmup_rounds=0)
Пример #10
0
def test_mlgk_dtype():
    g = nx.Graph()
    n = 8
    for i, row in enumerate(np.random.randint(0, 2, (n, n))):
        g.add_node(i, type=0)
        for j, pred in enumerate(row[:i]):
            if pred:
                g.add_edge(i, j, weight=1)

    dfg = Graph.from_networkx(g, weight='weight')

    q = 0.5
    node_kernel = TensorProduct(type=KroneckerDelta(1.0))
    edge_kernel = Constant(1.0)

    for dtype in [np.float, np.float32, np.float64]:
        mlgk = MarginalizedGraphKernel(node_kernel,
                                       edge_kernel,
                                       q=q,
                                       dtype=dtype)

        assert (mlgk([dfg]).dtype == dtype)
        assert (mlgk.diag([dfg]).dtype == dtype)
Пример #11
0
def test_mlgk_diag_gradient(caseitem, nodal):
    '''derivative w.r.t. hyperparameters'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

        R, dR = mlgk.diag(G, nodal=nodal, eval_gradient=True)

        assert (len(dR.shape) == 2)
        assert (R.shape[0] == dR.shape[0])
        assert (dR.shape[1] >= 1)

        for i in range(len(mlgk.theta)):

            theta = mlgk.theta

            eps = 1e-3

            t = np.copy(theta)
            t[i] += eps
            mlgk.theta = t
            Rr = mlgk.diag(G, nodal=nodal, eval_gradient=False)

            t = np.copy(theta)
            t[i] -= eps
            mlgk.theta = t
            Rl = mlgk.diag(G, nodal=nodal, eval_gradient=False)

            mlgk.theta = theta

            dR_dLogt = (Rr - Rl) / (2 * eps)
            dLogt_dt = 1 / np.exp(theta)[i]
            dR_dt = dR_dLogt * dLogt_dt

            for a, b in zip(dR[:, i].ravel(), dR_dt.ravel()):
                assert (a == pytest.approx(b, rel=0.05, abs=0.05))
Пример #12
0
def test_mlgk_lmin(caseitem):
    '''exclude first step'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

        g = G[0]
        R0 = mlgk([g], nodal=True, lmin=0)
        R1 = mlgk([g], nodal=True, lmin=1)
        for i, n1 in g.nodes.iterrows():
            for j, n2 in g.nodes.iterrows():
                assert (R0[i, j] == pytest.approx(R1[i, j] + knode(n1, n2),
                                                  abs=1e-7))
Пример #13
0
def test_mlgk_self_loops():

    kedge = Constant(1.0)
    knode = Constant(1.0)
    q = 0.1
    mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

    np.random.seed(2)
    for i in range(10):
        n = np.random.randint(4, 20)
        A = np.random.randn(n, n)
        A = A + A.T

        G = [Graph.from_networkx(nx.from_numpy_array(A), weight='weight')]

        K = mlgk(G).item()
        K0 = MLGK(G[0], knode, kedge, q, q, nodal=False)

        assert (K == pytest.approx(K0, 5e-4))
Пример #14
0
def test_mlgk_large():
    g = nx.Graph()
    n = 24
    for i, row in enumerate(np.random.randint(0, 2, (n, n))):
        g.add_node(i, type=0)
        for j, pred in enumerate(row[:i]):
            if pred:
                g.add_edge(i, j, weight=1)

    dfg = Graph.from_networkx(g, weight='weight')

    q = 0.5
    node_kernel = TensorProduct(type=KroneckerDelta(1.0))
    edge_kernel = Constant(1.0)
    mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q)

    dot = mlgk([dfg])
    gold = MLGK(dfg, node_kernel, edge_kernel, q, q)

    assert (dot.shape == (1, 1))
    assert (dot.item() == pytest.approx(gold))
Пример #15
0
def test_mlgk_self_similarity(caseitem):
    '''overall similarities within X'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

        R = mlgk(G)
        d = np.diag(R)**-0.5
        K = np.diag(d).dot(R).dot(np.diag(d))

        assert (R.shape == (len(G), len(G)))
        assert (np.count_nonzero(R - R.T) == 0)
        assert (R[0, 0] == pytest.approx(MLGK(G[0], knode, kedge, q, q), 1e-5))
        assert (R[1, 1] == pytest.approx(MLGK(G[1], knode, kedge, q, q), 1e-5))
        assert (K[0, 0] == pytest.approx(1, 1e-7))
        assert (K[1, 1] == pytest.approx(1, 1e-7))
Пример #16
0
def test_mlgk_typecheck():
    node_kernel = Constant(1.0)
    edge_kernel = Constant(1.0)
    mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=0.5)
    G = [
        Graph.from_networkx(unlabeled_graph1),
        Graph.from_networkx(labeled_graph1),
        Graph.from_networkx(weighted_graph1, weight='w')
    ]

    with pytest.raises(TypeError):
        mlgk([G[0], G[1]])
    with pytest.raises(TypeError):
        mlgk([G[0], G[2]])
    with pytest.raises(TypeError):
        mlgk([G[1], G[2]])
    with pytest.raises(TypeError):
        mlgk([G[1], G[0]])
    with pytest.raises(TypeError):
        mlgk([G[2], G[0]])
    with pytest.raises(TypeError):
        mlgk([G[2], G[1]])
Пример #17
0
def test_mlgk_cross_similarity(caseitem):
    '''similarities across X and Y'''

    _, case = caseitem

    G = case['graphs']
    knode = case['knode']
    kedge = case['kedge']
    for q in case['q']:

        mlgk = MarginalizedGraphKernel(knode, kedge, q=q)
        R = mlgk(G)

        for x, y in zip(mlgk(G[:1], G).ravel(), R[:1, :].ravel()):
            assert (x == pytest.approx(y, 1e-6))
        for x, y in zip(mlgk(G[1:], G).ravel(), R[1:, :].ravel()):
            assert (x == pytest.approx(y, 1e-6))
        for x, y in zip(mlgk(G, G[:1]).ravel(), R[:, :1].ravel()):
            assert (x == pytest.approx(y, 1e-6))
        for x, y in zip(mlgk(
                G,
                G[1:],
        ).ravel(), R[:, 1:].ravel()):
            assert (x == pytest.approx(y, 1e-6))
Пример #18
0
graphs = list(map(lambda smi: Graph.from_rdkit(MolFromSmiles(smi)), smiles))
train_X = graphs[::2]
train_y = energy[::2]
test_X = graphs[1::2]
test_y = energy[1::2]
core = train_X[::2]

kernel = MarginalizedGraphKernel(
    node_kernel=Additive(
        aromatic=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        atomic_number=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)),
        charge=kC(0.5, (0.1, 1.0)) * kSE(1.0),
        chiral=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        hcount=kC(0.5, (0.1, 1.0)) * kSE(1.0),
        hybridization=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        ring_list=kC(0.5, (0.01, 1.0)) * kConv(kDelta(0.5,
                                                      (0.1, 0.9)))).normalized,
    edge_kernel=Additive(
        aromatic=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        conjugated=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        order=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)),
        ring_stereo=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)),
        stereo=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9))).normalized,
    p=Uniform(1.0, (0.1, 40.0)),
    q=0.05)

gpr = LowRankApproximateGPR(kernel=kernel, alpha=1.0, optimizer=True)
gpr.fit(core, train_X, train_y, verbose=True)
predict_y = gpr.predict(test_X)

print('Prediction:', predict_y)
print('Ground truth:', test_y)
Пример #19
0
 def fun():
     return MarginalizedGraphKernel(knode, kedge)
Пример #20
0
 def fun():
     kernel = MarginalizedGraphKernel(knode, kedge)
     kernel(graphs, nodal=False)
Пример #21
0
molecules = [
    molecule('CH4'),
    molecule('NH3'),
    molecule('CH3OH'),
    molecule('H2O'),
]

graphs = [Graph.from_ase(m) for m in molecules]

metric = MaxiMin(node_kernel=TensorProduct(element=KroneckerDelta(0.5)),
                 edge_kernel=TensorProduct(length=SquareExponential(0.1)),
                 q=0.01)
kernel = Normalization(
    MarginalizedGraphKernel(
        node_kernel=TensorProduct(element=KroneckerDelta(0.5)),
        edge_kernel=TensorProduct(length=SquareExponential(0.1)),
        q=0.01))


def check_hausdorff(X, Y=None):
    # GPU direct computation
    D = metric(X, Y)
    # Manual approach
    K = kernel(X, Y, nodal=True)
    d = np.sqrt(np.maximum(0, 2 - 2 * K))
    starts1 = np.cumsum([0] + [len(g.nodes) for g in X])[:-1]
    starts2 = np.cumsum([0] + [len(g.nodes) for g in Y])[:-1] if Y else starts1
    d1 = np.maximum.reduceat(np.minimum.reduceat(d, starts2, axis=1),
                             starts1,
                             axis=0)
    d2 = np.maximum.reduceat(np.minimum.reduceat(d, starts1, axis=0),
Пример #22
0
g2.add_node(2)
g2.add_edge(0, 1)
g2.add_edge(1, 2)

# 0 --- 1
#  \  /
#   2
g3 = nx.Graph()
g3.add_node(0)
g3.add_node(1)
g3.add_node(2)
g3.add_edge(0, 1)
g3.add_edge(0, 2)
g3.add_edge(1, 2)

# define trivial node and edge kernelets
knode = Constant(1.0)
kedge = Constant(1.0)

# compose the marginalized graph kernel and compute pairwise similarity
mlgk = MarginalizedGraphKernel(knode, kedge, q=0.05)

R = mlgk([Graph.from_networkx(g) for g in [g1, g2, g3]])

# normalize the similarity matrix
d = np.diag(R)**-0.5
K = np.diag(d).dot(R).dot(np.diag(d))

# all entries should be approximately 1 plus round-off error
print(K)
Пример #23
0
from graphdot import Graph
from graphdot.kernel.marginalized import MarginalizedGraphKernel
from graphdot.kernel.fix import Normalization
from graphdot.microkernel import (TensorProduct, DotProduct, Constant)

# The 'category' attribute on the nodes could have variable lengths.
# So does the 'spectra' attributes on the edges.
g1 = nx.Graph()
g1.add_node(0, soap=[0.5, 1.5, 2.5, 0.5])
g1.add_node(1, soap=[0.5, 1.5, 2.5, 0.5])
g1.add_edge(0, 1, w=1.0)

g2 = nx.Graph()
g2.add_node(0, soap=[0.5, 1.5, 2.5, 3.5])
g2.add_node(1, soap=[1.5, 1.5, 0.5, 3.5])
g2.add_node(2, soap=[0.5, 2.5, 2.5, 0.5])
g2.add_edge(0, 1, w=2.0)
g2.add_edge(0, 2, w=0.5)
g2.add_edge(1, 2, w=0.5)

# compose the marginalized graph kernel and compute pairwise similarity
mlgk = Normalization(
    MarginalizedGraphKernel(
        node_kernel=TensorProduct(soap=DotProduct().normalized),
        edge_kernel=Constant(1),
        q=0.05))

G = [Graph.from_networkx(g, weight='w') for g in [g1, g2]]
print(f'Whole-graph similarity\n{mlgk(G)}')
print(f'Nodal similarity\n{mlgk(G, nodal=True)}')