Пример #1
0
    def from_reaction_template(cls, template_smarts):
        template = ReactionTemplate(template_smarts)
        _rdkit_config = rdkit_config(reaction_center=template.ReactingAtomsMN,
                                     reactant_or_product='reactant',
                                     IsSanitized=False,
                                     set_morgan_identifier=False)
        reaction = Graph.from_rdkit(template.reactants[0],
                                    _rdkit_config).to_networkx()
        for reactant in template.reactants[1:]:
            g = Graph.from_rdkit(reactant, _rdkit_config).to_networkx()
            reaction = nx.disjoint_union(reaction, g)

        _rdkit_config = rdkit_config(reaction_center=template.ReactingAtomsMN,
                                     reactant_or_product='product',
                                     IsSanitized=False,
                                     set_morgan_identifier=False)
        for product in template.products:
            g = Graph.from_rdkit(product, _rdkit_config).to_networkx()
            reaction = nx.disjoint_union(reaction, g)

        g = _from_networkx(cls, reaction)
        if g.nodes.to_pandas()['ReactingCenter'].max() <= 0:
            raise RuntimeError(f'No reacting atoms are found in reactants: '
                               f'{template_smarts}')
        if g.nodes.to_pandas()['ReactingCenter'].min() >= 0:
            raise RuntimeError(f'No reacting atoms are found in products: '
                               f'{template_smarts}')
        return g
Пример #2
0
def test_molecular_kernel():
    molecules = [molecule('H2'), molecule('O2'), molecule('CH4')]

    graphs = [Graph.from_ase(m) for m in molecules]

    kernel = Tang2019MolecularKernel(starting_probability='uniform')

    R = kernel(graphs)
    D = np.diag(np.diag(R)**-0.5)
    K = D.dot(R).dot(D)

    assert (R.shape == (3, 3))
    for i in range(len(molecules)):
        assert (K[i, i] == pytest.approx(1, 1e-6))

    R_nodal = kernel(graphs, nodal=True)
    D_nodal = np.diag(np.diag(R_nodal)**-0.5)
    K_nodal = D_nodal.dot(R_nodal).dot(D_nodal)

    natoms = np.sum([len(m) for m in molecules])
    assert (R_nodal.shape == (natoms, natoms))
    for i in range(natoms):
        assert (K_nodal[i, i] == pytest.approx(1, 1e-6))

    kernel_nocarbon = Tang2019MolecularKernel(
        starting_probability=lambda n: 0.0 if n[1]['element'] == 6 else 1.0)

    R_nocarbon_nodal = kernel_nocarbon(graphs, nodal=True)
    k = 0
    for i, m in enumerate(molecules):
        for j, a in enumerate(m):
            if a.symbol == 'C':
                assert (R_nocarbon_nodal[k, :].sum() == 0)
                assert (R_nocarbon_nodal[:, k].sum() == 0)
            k += 1
Пример #3
0
def test_mlgk_on_permuted_graph():
    g = Graph.from_ase(molecule('C6H6'))
    for _ in range(10):
        h = g.permute(np.random.permutation(len(g.nodes)))
        kernel = MarginalizedGraphKernel(
            TensorProduct(element=KroneckerDelta(0.5)),
            TensorProduct(length=SquareExponential(0.1)))
        assert (kernel([g], [h]).item() == pytest.approx(kernel([g]).item()))
Пример #4
0
def test_marginalized_graph_kernel_2nd_launch(benchmark, batch):

    graphs = [Graph.from_networkx(g, weight='weight')
              for g in make_graphs(batch, 48)]

    knode = TensorProduct(label=KroneckerDelta(0.5))
    kedge = TensorProduct(label=KroneckerDelta(0.5))
    kernel = MarginalizedGraphKernel(knode, kedge)

    def fun():
        kernel(graphs, nodal=False)

    benchmark.pedantic(fun, iterations=3, rounds=3, warmup_rounds=0)
Пример #5
0
def test_mlgk_typecheck():
    node_kernel = Constant(1.0)
    edge_kernel = Constant(1.0)
    mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=0.5)
    G = [
        Graph.from_networkx(unlabeled_graph1),
        Graph.from_networkx(labeled_graph1),
        Graph.from_networkx(weighted_graph1, weight='w')
    ]

    with pytest.raises(TypeError):
        mlgk([G[0], G[1]])
    with pytest.raises(TypeError):
        mlgk([G[0], G[2]])
    with pytest.raises(TypeError):
        mlgk([G[1], G[2]])
    with pytest.raises(TypeError):
        mlgk([G[1], G[0]])
    with pytest.raises(TypeError):
        mlgk([G[2], G[0]])
    with pytest.raises(TypeError):
        mlgk([G[2], G[1]])
Пример #6
0
def test_octile_graph_weighted():

    assert(OctileGraph.dtype.isalignedstruct)

    dfg = Graph(
        nodes={
            '!i': [0, 1, 2],
            'charge': [1, -1, 2],
            'conjugate': [False, True, True],
            'hybridization': [2, 3, 1]
        },
        edges={
            '!i': [0, 0],
            '!j': [1, 2],
            'length': [0.5, 1.0],
            '!w': [1.0, 2.0]
        },
        title='H2O')

    og = OctileGraph(dfg)
    assert(og.n_node == len(dfg.nodes))
    assert(og.p_octile != 0)
    assert(og.p_degree != 0)
    assert(og.p_node != 0)
    with pytest.raises(AttributeError):
        og.p_octile = np.uintp(0)
    with pytest.raises(AttributeError):
        og.p_degree = np.uintp(0)
    with pytest.raises(AttributeError):
        og.p_node = np.uintp(0)

    assert(og.node_t.isalignedstruct)
    for name in og.node_t.names:
        assert(name in dfg.nodes.columns)
    assert('charge' in og.node_t.names)
    assert('conjugate' in og.node_t.names)
    assert('hybridization' in og.node_t.names)

    assert(og.edge_t.isalignedstruct)
    assert(len(og.edge_t.names) == 2)
    assert('weight' in og.edge_t.names)
    assert('label' in og.edge_t.names)

    for name in og.edge_t['label'].names:
        assert(name in dfg.edges.columns)
    for name in dfg.edges.columns:
        if name in ['!i', '!j', '!w']:
            continue
        assert(name in og.edge_t['label'].names)
Пример #7
0
def test_molecular_kernel_custom_pstart():
    molecules = [molecule('H2'), molecule('O2'), molecule('CH4')]

    graphs = [Graph.from_ase(m) for m in molecules]

    kernel_nocarbon = Tang2019MolecularKernel(
        starting_probability=(lambda ns: np.where(ns.element == 6, 0.0, 1.0),
                              'n.element == 6 ? 0.f : 1.f'))

    R_nocarbon_nodal = kernel_nocarbon(graphs, nodal=True)
    k = 0
    for i, m in enumerate(molecules):
        for j, a in enumerate(m):
            if a.symbol == 'C':
                assert (R_nocarbon_nodal[k, :].sum() == 0)
                assert (R_nocarbon_nodal[:, k].sum() == 0)
            k += 1
Пример #8
0
def test_octile_graph_weighted():

    assert (OctileGraph.dtype.isalignedstruct)

    dfg = Graph(nodes={
        'index': [0, 1, 2],
        'columns': ['charge', 'conjugate', 'hybridization'],
        'data': [[1, False, 2], [-1, True, 3], [2, True, 1]]
    },
                edges={
                    'index': [0, 1],
                    'columns': ['!ij', 'length', '!w'],
                    'data': [[(0, 1), 0.5, 1.0], [(0, 2), 1.0, 2.0]]
                },
                title='H2O')

    og = OctileGraph(dfg)
    assert (og.n_node == len(dfg.nodes))
    assert (og.padded_size >= og.n_node and og.padded_size % 8 == 0)
    assert (og.n_octile == (og.padded_size // 8)**2)
    assert (og.p_octile != 0)
    assert (og.p_degree != 0)
    assert (og.p_node != 0)
    with pytest.raises(AttributeError):
        og.p_octile = np.uintp(0)
    with pytest.raises(AttributeError):
        og.p_degree = np.uintp(0)
    with pytest.raises(AttributeError):
        og.p_node = np.uintp(0)

    assert (og.node_type.isalignedstruct)
    for name in og.node_type.names:
        assert (name in dfg.nodes.columns)
    for name in dfg.nodes.columns:
        assert (name in og.node_type.names)

    assert (og.edge_type.isalignedstruct)
    assert (len(og.edge_type.names) == 2)
    assert ('weight' in og.edge_type.names)
    assert ('label' in og.edge_type.names)

    for name in og.edge_type['label'].names:
        assert (name in dfg.edges.columns)
    for name in dfg.edges.drop(['!ij', '!w'], axis=1).columns:
        assert (name in og.edge_type['label'].names)
Пример #9
0
def test_mlgk_self_loops():

    kedge = Constant(1.0)
    knode = Constant(1.0)
    q = 0.1
    mlgk = MarginalizedGraphKernel(knode, kedge, q=q)

    np.random.seed(2)
    for i in range(10):
        n = np.random.randint(4, 20)
        A = np.random.randn(n, n)
        A = A + A.T

        G = [Graph.from_networkx(nx.from_numpy_array(A), weight='weight')]

        K = mlgk(G).item()
        K0 = MLGK(G[0], knode, kedge, q, q, nodal=False)

        assert (K == pytest.approx(K0, 5e-4))
Пример #10
0
def test_mlgk_fixed_hyperparameters():

    g = nx.Graph()
    g.add_node(0, feature=0)
    g.add_node(1, feature=1)
    g.add_node(2, feature=0)
    g.add_edge(0, 1, attribute=1.0)
    g.add_edge(0, 2, attribute=2.0)

    G = [Graph.from_networkx(g)]
    knodeV = TensorProduct(feature=KroneckerDelta(0.5))
    knodeF = TensorProduct(feature=KroneckerDelta(0.5, h_bounds='fixed'))
    kedgeV = TensorProduct(attribute=SquareExponential(1.0))
    kedgeF = TensorProduct(
        attribute=SquareExponential(1.0, length_scale_bounds='fixed'))

    kernelVV = MarginalizedGraphKernel(knodeV, kedgeV)
    kernelVF = MarginalizedGraphKernel(knodeV, kedgeF)
    kernelFV = MarginalizedGraphKernel(knodeF, kedgeV)
    kernelFF = MarginalizedGraphKernel(knodeF, kedgeF)
    assert (len(kernelVV.theta) == len(kernelVF.theta) + 1)
    assert (len(kernelVV.theta) == len(kernelFV.theta) + 1)
    assert (len(kernelVV.theta) == len(kernelFF.theta) + 2)
    assert (len(kernelVV.bounds) == len(kernelVF.bounds) + 1)
    assert (len(kernelVV.bounds) == len(kernelFV.bounds) + 1)
    assert (len(kernelVV.bounds) == len(kernelFF.bounds) + 2)

    Rvv, dRvv = kernelVV(G, eval_gradient=True)
    Rvf, dRvf = kernelVF(G, eval_gradient=True)
    Rfv, dRfv = kernelFV(G, eval_gradient=True)
    Rff, dRff = kernelFF(G, eval_gradient=True)

    assert (Rvv == pytest.approx(Rvf))
    assert (Rvv == pytest.approx(Rfv))
    assert (Rvv == pytest.approx(Rff))
    assert (dRvv.shape[2] == dRvf.shape[2] + 1)
    assert (dRvv.shape[2] == dRfv.shape[2] + 1)
    assert (dRvv.shape[2] == dRff.shape[2] + 2)
    assert (dRvv[:, :, kernelVF.active_theta_mask] == pytest.approx(dRvf))
    assert (dRvv[:, :, kernelFV.active_theta_mask] == pytest.approx(dRfv))
    assert (dRvv[:, :, kernelFF.active_theta_mask] == pytest.approx(dRff))
Пример #11
0
def test_mlgk_large():
    g = nx.Graph()
    n = 24
    for i, row in enumerate(np.random.randint(0, 2, (n, n))):
        g.add_node(i, type=0)
        for j, pred in enumerate(row[:i]):
            if pred:
                g.add_edge(i, j, weight=1)

    dfg = Graph.from_networkx(g, weight='weight')

    q = 0.5
    node_kernel = TensorProduct(type=KroneckerDelta(1.0))
    edge_kernel = Constant(1.0)
    mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q)

    dot = mlgk([dfg])
    gold = MLGK(dfg, node_kernel, edge_kernel, q, q)

    assert (dot.shape == (1, 1))
    assert (dot.item() == pytest.approx(gold))
Пример #12
0
def test_mlgk_dtype():
    g = nx.Graph()
    n = 8
    for i, row in enumerate(np.random.randint(0, 2, (n, n))):
        g.add_node(i, type=0)
        for j, pred in enumerate(row[:i]):
            if pred:
                g.add_edge(i, j, weight=1)

    dfg = Graph.from_networkx(g, weight='weight')

    q = 0.5
    node_kernel = TensorProduct(type=KroneckerDelta(1.0))
    edge_kernel = Constant(1.0)

    for dtype in [np.float, np.float32, np.float64]:
        mlgk = MarginalizedGraphKernel(node_kernel,
                                       edge_kernel,
                                       q=q,
                                       dtype=dtype)

        assert (mlgk([dfg]).dtype == dtype)
        assert (mlgk.diag([dfg]).dtype == dtype)
Пример #13
0
def test_molecular_kernel():
    molecules = [molecule('H2'), molecule('O2'), molecule('CH4')]

    graphs = [Graph.from_ase(m) for m in molecules]

    kernel = Tang2019MolecularKernel()

    R = kernel(graphs)
    D = np.diag(np.diag(R)**-0.5)
    K = D.dot(R).dot(D)

    assert (R.shape == (3, 3))
    for i in range(len(molecules)):
        assert (K[i, i] == pytest.approx(1, 1e-6))

    R_nodal = kernel(graphs, nodal=True)
    D_nodal = np.diag(np.diag(R_nodal)**-0.5)
    K_nodal = D_nodal.dot(R_nodal).dot(D_nodal)

    natoms = np.sum([len(m) for m in molecules])
    assert (R_nodal.shape == (natoms, natoms))
    for i in range(natoms):
        assert (K_nodal[i, i] == pytest.approx(1, 1e-6))
Пример #14
0
vario_graph1.add_node('O1', rings=(5, 6))
vario_graph1.add_node('H1', rings=(3, ))
vario_graph1.add_node('H2', rings=(2, 3, 4))
vario_graph1.add_edge('O1', 'H1', spectrum=(3, 4), w=1.0)
vario_graph1.add_edge('O1', 'H2', spectrum=(3, 5), w=2.0)

vario_graph2 = nx.Graph(title='H2')
vario_graph2.add_node('H1', rings=(3, 4))
vario_graph2.add_node('H2', rings=(3, ))
vario_graph2.add_edge('H1', 'H2', spectrum=(2, 4), w=3.0)

case_dict = {
    'unlabeled': {
        'graphs':
        Graph.unify_datatype([
            Graph.from_networkx(unlabeled_graph1),
            Graph.from_networkx(unlabeled_graph2)
        ]),
        'knode':
        Constant(1.0),
        'kedge':
        Constant(1.0),
        'q': [0.01, 0.05, 0.1, 0.5]
    },
    'labeled': {
        'graphs':
        Graph.unify_datatype([
            Graph.from_networkx(labeled_graph1),
            Graph.from_networkx(labeled_graph2)
        ]),
        'knode':
        TensorProduct(hybridization=KroneckerDelta(0.3),
Пример #15
0
g2.add_node(2)
g2.add_edge(0, 1)
g2.add_edge(1, 2)

# 0 --- 1
#  \  /
#   2
g3 = nx.Graph()
g3.add_node(0)
g3.add_node(1)
g3.add_node(2)
g3.add_edge(0, 1)
g3.add_edge(0, 2)
g3.add_edge(1, 2)

# define trivial node and edge kernelets
knode = Constant(1.0)
kedge = Constant(1.0)

# compose the marginalized graph kernel and compute pairwise similarity
mlgk = MarginalizedGraphKernel(knode, kedge, q=0.05)

R = mlgk([Graph.from_networkx(g) for g in [g1, g2, g3]])

# normalize the similarity matrix
d = np.diag(R)**-0.5
K = np.diag(d).dot(R).dot(np.diag(d))

# all entries should be approximately 1 plus round-off error
print(K)
Пример #16
0
import numpy as np
import pandas as pd
from ase.build import molecule, bulk
from graphdot import Graph
from graphdot.kernel.molecular import Tang2019MolecularKernel

# build sample molecules
small_title = ['H2O', 'HCl', 'NaCl']
bulk_title = ['NaCl-bulk', 'NaCl-bulk2']
bulk = [
    bulk('NaCl', 'rocksalt', a=5.64),
    bulk('NaCl', 'rocksalt', a=5.66),
]
molecules = [molecule(name) for name in small_title] + bulk

# convert to molecular graphs
graphs = [Graph.from_ase(m) for m in molecules]

# use pre-defined molecular kernel
kernel = Tang2019MolecularKernel(edge_length_scale=0.1)

R = kernel(graphs)

# normalize the similarity matrix
d = np.diag(R)**-0.5
K = np.diag(d).dot(R).dot(np.diag(d))

# note the difference between the NaCl variants
title = small_title + bulk_title
print(pd.DataFrame(K, columns=title, index=title))
Пример #17
0
g1 = nx.Graph()
g1.add_node(0, category=(1, 2), symbol=1)
g1.add_node(1, category=(2, ), symbol=2)
g1.add_edge(0, 1, w=1.0, spectra=[0.5, 0.2])

g2 = nx.Graph()
g2.add_node(0, category=(1, 3), symbol=1)
g2.add_node(1, category=(2, 3, 5), symbol=2)
g2.add_node(2, category=(1, ), symbol=1)
g2.add_edge(0, 1, w=2.0, spectra=[0.1, 0.9, 1.5])
g2.add_edge(0, 2, w=0.5, spectra=[0.4])
g2.add_edge(1, 2, w=0.5, spectra=[0.3, 0.6])

# Define node and edge base kernels using the R-convolution framework
# Reference: Haussler, David. Convolution kernels on discrete structures. 1999.
knode = TensorProduct(symbol=KroneckerDelta(0.5),
                      category=Convolution(KroneckerDelta(0.5)))

kedge = TensorProduct(spectra=Convolution(SquareExponential(0.3)))

# compose the marginalized graph kernel and compute pairwise similarity
mlgk = MarginalizedGraphKernel(knode, kedge, q=0.05)

R = mlgk([Graph.from_networkx(g, weight='w') for g in [g1, g2]])

# normalize the similarity matrix
d = np.diag(R)**-0.5
K = np.diag(d).dot(R).dot(np.diag(d))

print(K)
Пример #18
0
from graphdot.kernel.marginalized.starting_probability import Uniform
from graphdot.microkernel import (Additive, Convolution as kConv, Constant as
                                  kC, KroneckerDelta as kDelta,
                                  SquareExponential as kSE)
from graphdot.model.gaussian_process import LowRankApproximateGPR

smiles = [
    'CC', 'CCC', 'CCCC', 'CCCCC', 'CCCCCC', 'CCCCCCC', 'CCCCCCCC', 'CCCCCCCCC',
    'CCCCCCCCCC', 'CCCCCCCCCCC', 'CCCCCCCCCCCC'
]
energy = [
    -719.05, -1014.16, -1309.27, -1604.29, -1899.33, -2194.35, -2489.38,
    -2784.41, -3079.44, -3374.47, -3669.50
]

graphs = list(map(lambda smi: Graph.from_rdkit(MolFromSmiles(smi)), smiles))
train_X = graphs[::2]
train_y = energy[::2]
test_X = graphs[1::2]
test_y = energy[1::2]
core = train_X[::2]

kernel = MarginalizedGraphKernel(
    node_kernel=Additive(
        aromatic=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        atomic_number=kC(0.5, (0.1, 1.0)) * kDelta(0.8, (0.1, 0.9)),
        charge=kC(0.5, (0.1, 1.0)) * kSE(1.0),
        chiral=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        hcount=kC(0.5, (0.1, 1.0)) * kSE(1.0),
        hybridization=kC(0.5, (0.1, 1.0)) * kDelta(0.5, (0.1, 0.9)),
        ring_list=kC(0.5, (0.01, 1.0)) * kConv(kDelta(0.5,
Пример #19
0
weighted_graph1 = nx.Graph(title='H2O')
weighted_graph1.add_node('O1', hybridization=Hybrid.SP2, charge=1)
weighted_graph1.add_node('H1', hybridization=Hybrid.SP3, charge=-1)
weighted_graph1.add_node('H2', hybridization=Hybrid.SP, charge=2)
weighted_graph1.add_edge('O1', 'H1', order=1, length=0.5, w=1.0)
weighted_graph1.add_edge('O1', 'H2', order=2, length=1.0, w=2.0)

weighted_graph2 = nx.Graph(title='H2')
weighted_graph2.add_node('H1', hybridization=Hybrid.SP, charge=1)
weighted_graph2.add_node('H2', hybridization=Hybrid.SP, charge=1)
weighted_graph2.add_edge('H1', 'H2', order=2, length=1.0, w=3.0)

case_dict = {
    'unlabeled': {
        'graphs': [
            Graph.from_networkx(unlabeled_graph1),
            Graph.from_networkx(unlabeled_graph2)
        ],
        'knode':
        Constant(1.0),
        'kedge':
        Constant(1.0),
        'q': [0.01, 0.05, 0.1, 0.5]
    },
    'labeled': {
        'graphs': [
            Graph.from_networkx(labeled_graph1),
            Graph.from_networkx(labeled_graph2)
        ],
        'knode':
        TensorProduct(hybridization=KroneckerDelta(0.3, 1.0),
Пример #20
0
from graphdot.kernel.marginalized.basekernel import KroneckerDelta

# build sample molecules
smiles_list = [
    'CC',  # ethane
    'CCO',  # acetic acid
    'CCN',  # ethylamine
    'C=C',  # ethene
    'CC=C',  # propene
    'CC=CC',  # 2-n-butene
]

# convert to molecular graphs
# nodes(atoms) has 'aromatic', 'charge', 'element', 'hcount' attributes
# edges(bonds) has the 'order' attribute
graphs = [Graph.from_smiles(smi) for smi in smiles_list]

# define node and edge kernelets
knode = TensorProduct(aromatic=KroneckerDelta(0.8, 1.0),
                      charge=SquareExponential(1.0),
                      element=KroneckerDelta(0.5, 1.0),
                      hcount=SquareExponential(1.0))

kedge = TensorProduct(order=KroneckerDelta(0.5, 1.0))

# compose the marginalized graph kernel and compute pairwise similarity
kernel = MarginalizedGraphKernel(knode, kedge, q=0.05)

R = kernel(graphs)

# normalize the similarity matrix and then print
Пример #21
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pytest
from ase.build import molecule
from graphdot import Graph
from graphdot.metric.maximin import MaxiMin
from graphdot.microkernel import (
    KroneckerDelta,
    SquareExponential,
    TensorProduct,
)
from graphdot.kernel.marginalized.starting_probability import Uniform

G = [Graph.from_ase(molecule(f)) for f in ['CH3SCH3', 'CH3OCH3']]
H = [Graph.from_ase(molecule(f)) for f in ['CH4', 'NH3', 'H2O']]


def test_maximin_basic():
    metric = MaxiMin(node_kernel=TensorProduct(element=KroneckerDelta(0.5)),
                     edge_kernel=TensorProduct(length=SquareExponential(0.1)),
                     q=0.01)
    distance = metric(G)
    assert distance.shape == (len(G), len(G))
    assert np.allclose(distance.diagonal(), 0, atol=1e-3)
    assert np.all(distance >= 0)
    assert np.allclose(distance, distance.T, rtol=1e-14, atol=1e-14)

    distance = metric(G, G)
    assert distance.shape == (len(G), len(G))
    assert np.allclose(distance.diagonal(), 0, atol=1e-3)