def test_mlgk_kernel_range_check(): MarginalizedGraphKernel( node_kernel=KroneckerDelta(1e-7), edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)), edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=KroneckerDelta(0), edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)) + 1, edge_kernel=SquareExponential(1.0)) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)), edge_kernel=TensorProduct(attribute=SquareExponential(1.0)) + 1) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=KroneckerDelta(0.5) * 2, edge_kernel=TensorProduct(attribute=SquareExponential(1.0))) with pytest.warns(DeprecationWarning): MarginalizedGraphKernel( node_kernel=TensorProduct(feature=KroneckerDelta(0.5)), edge_kernel=TensorProduct(attribute=SquareExponential(1.0)) * 2)
def _makekernel(self, **kwargs): self.kernel = MarginalizedGraphKernel( TensorProduct(element=KroneckerDelta(self.element_prior)), TensorProduct(length=SquareExponential(self.edge_length_scale)), q=self.stopping_probability, p=self.starting_probability, **kwargs)
def test_mlgk_on_permuted_graph(): g = Graph.from_ase(molecule('C6H6')) for _ in range(10): h = g.permute(np.random.permutation(len(g.nodes))) kernel = MarginalizedGraphKernel( TensorProduct(element=KroneckerDelta(0.5)), TensorProduct(length=SquareExponential(0.1))) assert (kernel([g], [h]).item() == pytest.approx(kernel([g]).item()))
def _combine_microk(rule: Literal['Tensorproduct', 'Additive', 'Additive_p'], microk_dict: Dict): if rule == 'Tensorproduct': return TensorProduct(**microk_dict) elif rule == 'Additive': return Normalize(Additive(**microk_dict)) elif rule == 'Additive_p': return Additive_p(**microk_dict) else: raise RuntimeError(f'Unknown type: {rule}')
def test_maximin_basic(): metric = MaxiMin(node_kernel=TensorProduct(element=KroneckerDelta(0.5)), edge_kernel=TensorProduct(length=SquareExponential(0.1)), q=0.01) distance = metric(G) assert distance.shape == (len(G), len(G)) assert np.allclose(distance.diagonal(), 0, atol=1e-3) assert np.all(distance >= 0) assert np.allclose(distance, distance.T, rtol=1e-14, atol=1e-14) distance = metric(G, G) assert distance.shape == (len(G), len(G)) assert np.allclose(distance.diagonal(), 0, atol=1e-3) assert np.all(distance >= 0) assert np.allclose(distance, distance.T, rtol=1e-4, atol=1e-4) distance = metric(G, H) assert distance.shape == (len(G), len(H)) assert np.all(distance >= 0)
def test_mlgk_fixed_hyperparameters(): g = nx.Graph() g.add_node(0, feature=0) g.add_node(1, feature=1) g.add_node(2, feature=0) g.add_edge(0, 1, attribute=1.0) g.add_edge(0, 2, attribute=2.0) G = [Graph.from_networkx(g)] knodeV = TensorProduct(feature=KroneckerDelta(0.5)) knodeF = TensorProduct(feature=KroneckerDelta(0.5, h_bounds='fixed')) kedgeV = TensorProduct(attribute=SquareExponential(1.0)) kedgeF = TensorProduct( attribute=SquareExponential(1.0, length_scale_bounds='fixed')) kernelVV = MarginalizedGraphKernel(knodeV, kedgeV) kernelVF = MarginalizedGraphKernel(knodeV, kedgeF) kernelFV = MarginalizedGraphKernel(knodeF, kedgeV) kernelFF = MarginalizedGraphKernel(knodeF, kedgeF) assert (len(kernelVV.theta) == len(kernelVF.theta) + 1) assert (len(kernelVV.theta) == len(kernelFV.theta) + 1) assert (len(kernelVV.theta) == len(kernelFF.theta) + 2) assert (len(kernelVV.bounds) == len(kernelVF.bounds) + 1) assert (len(kernelVV.bounds) == len(kernelFV.bounds) + 1) assert (len(kernelVV.bounds) == len(kernelFF.bounds) + 2) Rvv, dRvv = kernelVV(G, eval_gradient=True) Rvf, dRvf = kernelVF(G, eval_gradient=True) Rfv, dRfv = kernelFV(G, eval_gradient=True) Rff, dRff = kernelFF(G, eval_gradient=True) assert (Rvv == pytest.approx(Rvf)) assert (Rvv == pytest.approx(Rfv)) assert (Rvv == pytest.approx(Rff)) assert (dRvv.shape[2] == dRvf.shape[2] + 1) assert (dRvv.shape[2] == dRfv.shape[2] + 1) assert (dRvv.shape[2] == dRff.shape[2] + 2) assert (dRvv[:, :, kernelVF.active_theta_mask] == pytest.approx(dRvf)) assert (dRvv[:, :, kernelFV.active_theta_mask] == pytest.approx(dRfv)) assert (dRvv[:, :, kernelFF.active_theta_mask] == pytest.approx(dRff))
def __init__(self, use_charge=False, adjacency='default', q=0.01, element_delta=0.2, bond_eps=0.02, charge_eps=0.2): self.use_charge = use_charge if adjacency == 'default': self.adjacency = AtomicAdjacency(shape='tent2', zoom=0.75) else: self.adjacency = adjacency self.q = q if use_charge: self.node_kernel = TensorProduct( element=KroneckerDelta(element_delta), charge=SquareExponential(charge_eps), ) else: self.node_kernel = TensorProduct( element=KroneckerDelta(element_delta)) self.edge_kernel = TensorProduct(length=SquareExponential(bond_eps))
def test_mlgk_large(): g = nx.Graph() n = 24 for i, row in enumerate(np.random.randint(0, 2, (n, n))): g.add_node(i, type=0) for j, pred in enumerate(row[:i]): if pred: g.add_edge(i, j, weight=1) dfg = Graph.from_networkx(g, weight='weight') q = 0.5 node_kernel = TensorProduct(type=KroneckerDelta(1.0)) edge_kernel = Constant(1.0) mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q) dot = mlgk([dfg]) gold = MLGK(dfg, node_kernel, edge_kernel, q, q) assert (dot.shape == (1, 1)) assert (dot.item() == pytest.approx(gold))
def test_mlgk_dtype(): g = nx.Graph() n = 8 for i, row in enumerate(np.random.randint(0, 2, (n, n))): g.add_node(i, type=0) for j, pred in enumerate(row[:i]): if pred: g.add_edge(i, j, weight=1) dfg = Graph.from_networkx(g, weight='weight') q = 0.5 node_kernel = TensorProduct(type=KroneckerDelta(1.0)) edge_kernel = Constant(1.0) for dtype in [np.float, np.float32, np.float64]: mlgk = MarginalizedGraphKernel(node_kernel, edge_kernel, q=q, dtype=dtype) assert (mlgk([dfg]).dtype == dtype) assert (mlgk.diag([dfg]).dtype == dtype)
g1 = nx.Graph() g1.add_node(0, category=(1, 2), symbol=1) g1.add_node(1, category=(2, ), symbol=2) g1.add_edge(0, 1, w=1.0, spectra=[0.5, 0.2]) g2 = nx.Graph() g2.add_node(0, category=(1, 3), symbol=1) g2.add_node(1, category=(2, 3, 5), symbol=2) g2.add_node(2, category=(1, ), symbol=1) g2.add_edge(0, 1, w=2.0, spectra=[0.1, 0.9, 1.5]) g2.add_edge(0, 2, w=0.5, spectra=[0.4]) g2.add_edge(1, 2, w=0.5, spectra=[0.3, 0.6]) # Define node and edge base kernels using the R-convolution framework # Reference: Haussler, David. Convolution kernels on discrete structures. 1999. knode = TensorProduct(symbol=KroneckerDelta(0.5), category=Convolution(KroneckerDelta(0.5))) kedge = TensorProduct(spectra=Convolution(SquareExponential(0.3))) # compose the marginalized graph kernel and compute pairwise similarity mlgk = MarginalizedGraphKernel(knode, kedge, q=0.05) R = mlgk([Graph.from_networkx(g, weight='w') for g in [g1, g2]]) # normalize the similarity matrix d = np.diag(R)**-0.5 K = np.diag(d).dot(R).dot(np.diag(d)) print(K)
g2.add_edge(0, 1) g2.add_edge(1, 2) # {1.0, 1} -- {2.0, 1} # \ / # {1.0, 2} g3 = nx.Graph() g3.add_node(0, radius=1.0, category=1) g3.add_node(1, radius=2.0, category=1) g3.add_node(2, radius=1.0, category=2) g3.add_edge(0, 1) g3.add_edge(0, 2) g3.add_edge(1, 2) # define node and edge kernelets knode = TensorProduct(radius=SquareExponential(0.5), category=KroneckerDelta(0.5)) kedge = Constant(1.0) # compose the marginalized graph kernel and compute pairwise similarity mlgk = MarginalizedGraphKernel(knode, kedge, q=0.05) R = mlgk([Graph.from_networkx(g) for g in [g1, g2, g3]]) # normalize the similarity matrix d = np.diag(R)**-0.5 K = np.diag(d).dot(R).dot(np.diag(d)) print(K)
def __call__(self, graphs, diags, node_kernel, edge_kernel, p, q, eps, ftol, gtol, jobs, starts, gramian, active, gradient, nX, nY, nJ, traits, timer): ''' transfer graphs and starting probabilities to GPU ''' timer.tic('transferring graphs to GPU') og_last = None graphs_d = umempty(len(graphs), dtype=OctileGraph.dtype) for i, g in enumerate(graphs): og, ogstate = self._register_graph(g) if i > 0: self._assert_homogeneous(og_last, og) og_last = og graphs_d[i] = ogstate weighted = og_last.weighted node_t = og_last.node_t edge_t = og_last.edge_t timer.toc('transferring graphs to GPU') ''' allocate global job counter ''' timer.tic('allocate global job counter') i_job_global = umzeros(1, np.uint32) timer.toc('allocate global job counter') ''' code generation ''' timer.tic('code generation') if weighted: edge_kernel = TensorProduct(weight=Product(), label=edge_kernel) use_theta_grid = traits.eval_gradient is True node_kernel_src = self.gencode_kernel(node_kernel, 'node_kernel') edge_kernel_src = self.gencode_kernel(edge_kernel, 'edge_kernel') p_start_src = self.gencode_probability(p, 'p_start') with self.template.context(traits=traits) as template: self.source = template.render( node_kernel=node_kernel_src, edge_kernel=edge_kernel_src, p_start=p_start_src, node_t=decltype(node_t), edge_t=decltype(edge_t) ) timer.toc('code generation') ''' JIT ''' timer.tic('JIT') kernel = self.module.get_function('graph_maximin_distance') timer.toc('JIT') ''' calculate launch configuration ''' timer.tic('calculating launch configuration') launch_block_count = (self.device.MULTIPROCESSOR_COUNT * self.block_per_sm) shmem_bytes_per_warp = self.module.get_global( 'shmem_bytes_per_warp' )[1] shmem_bytes_per_block = (shmem_bytes_per_warp * self.block_size // self.device.WARP_SIZE) max_graph_size = np.max([len(g.nodes) for g in graphs]) scratch_pcg = self.allocate_pcg_scratch( launch_block_count, max_graph_size ) ''' copy micro kernel parameters to GPU ''' for name, uker in [('node_kernel', node_kernel), ('edge_kernel', edge_kernel)]: states = np.array( self.pack_state(uker, diff_grid=use_theta_grid, diff_eps=eps), dtype=uker.dtype ) p_uker, _ = self.module.get_global(name) cuda.memcpy_htod(p_uker, states[:1]) if use_theta_grid: p_diff_grid, _ = self.module.get_global(f'{name}_diff_grid') p_flat_theta, _ = self.module.get_global(f'{name}_flat_theta') cuda.memcpy_htod(p_diff_grid, states[1:]) cuda.memcpy_htod( p_flat_theta, np.fromiter(flatten(uker.theta), dtype=np.float32) ) p_p_start, _ = self.module.get_global('p_start') cuda.memcpy_htod( p_p_start, np.array([p.state], dtype=p.dtype) ) timer.toc('calculating launch configuration') ''' GPU kernel execution ''' timer.tic('GPU kernel execution') kernel( graphs_d, diags, scratch_pcg, jobs, starts, gramian, active, gradient if gradient is not None else np.uintp(0), i_job_global, np.uint32(len(jobs)), np.uint32(nX), np.uint32(nY), np.uint32(nJ), np.float32(q), np.float32(q), # placeholder for q0 np.float32(eps), np.float32(ftol), np.float32(gtol), grid=(launch_block_count, 1, 1), block=(self.block_size, 1, 1), shared=shmem_bytes_per_block, ) self.ctx.synchronize() timer.toc('GPU kernel execution')
from graphdot import Graph from graphdot.kernel.marginalized import MarginalizedGraphKernel from graphdot.kernel.fix import Normalization from graphdot.microkernel import (TensorProduct, DotProduct, Constant) # The 'category' attribute on the nodes could have variable lengths. # So does the 'spectra' attributes on the edges. g1 = nx.Graph() g1.add_node(0, soap=[0.5, 1.5, 2.5, 0.5]) g1.add_node(1, soap=[0.5, 1.5, 2.5, 0.5]) g1.add_edge(0, 1, w=1.0) g2 = nx.Graph() g2.add_node(0, soap=[0.5, 1.5, 2.5, 3.5]) g2.add_node(1, soap=[1.5, 1.5, 0.5, 3.5]) g2.add_node(2, soap=[0.5, 2.5, 2.5, 0.5]) g2.add_edge(0, 1, w=2.0) g2.add_edge(0, 2, w=0.5) g2.add_edge(1, 2, w=0.5) # compose the marginalized graph kernel and compute pairwise similarity mlgk = Normalization( MarginalizedGraphKernel( node_kernel=TensorProduct(soap=DotProduct().normalized), edge_kernel=Constant(1), q=0.05)) G = [Graph.from_networkx(g, weight='w') for g in [g1, g2]] print(f'Whole-graph similarity\n{mlgk(G)}') print(f'Nodal similarity\n{mlgk(G, nodal=True)}')
from graphdot.kernel.fix import Normalization from graphdot.metric.maximin import MaxiMin from ase.build import molecule np.set_printoptions(linewidth=999, precision=4, suppress=True) molecules = [ molecule('CH4'), molecule('NH3'), molecule('CH3OH'), molecule('H2O'), ] graphs = [Graph.from_ase(m) for m in molecules] metric = MaxiMin(node_kernel=TensorProduct(element=KroneckerDelta(0.5)), edge_kernel=TensorProduct(length=SquareExponential(0.1)), q=0.01) kernel = Normalization( MarginalizedGraphKernel( node_kernel=TensorProduct(element=KroneckerDelta(0.5)), edge_kernel=TensorProduct(length=SquareExponential(0.1)), q=0.01)) def check_hausdorff(X, Y=None): # GPU direct computation D = metric(X, Y) # Manual approach K = kernel(X, Y, nodal=True) d = np.sqrt(np.maximum(0, 2 - 2 * K))
'CC', # ethane 'CCO', # acetic acid 'CCN', # ethylamine 'C=C', # ethene 'CC=C', # propene 'CC=CC', # 2-n-butene ] # convert to molecular graphs # nodes(atoms) has 'aromatic', 'charge', 'element', 'hcount' attributes # edges(bonds) has the 'order' attribute graphs = [Graph.from_smiles(smi) for smi in smiles_list] # define node and edge kernelets knode = TensorProduct(aromatic=KroneckerDelta(0.8), charge=SquareExponential(1.0), element=KroneckerDelta(0.5), hcount=SquareExponential(1.0)) kedge = TensorProduct(order=KroneckerDelta(0.5)) # compose the marginalized graph kernel and compute pairwise similarity kernel = MarginalizedGraphKernel(knode, kedge, q=0.05) R = kernel(graphs) # normalize the similarity matrix and then print d = np.diag(R)**-0.5 K = np.diag(d).dot(R).dot(np.diag(d)) print(pd.DataFrame(K, columns=smiles_list, index=smiles_list))
Graph.from_networkx(unlabeled_graph2) ]), 'knode': Constant(1.0), 'kedge': Constant(1.0), 'q': [0.01, 0.05, 0.1, 0.5] }, 'labeled': { 'graphs': Graph.unify_datatype([ Graph.from_networkx(labeled_graph1), Graph.from_networkx(labeled_graph2) ]), 'knode': TensorProduct(hybridization=KroneckerDelta(0.3), charge=SquareExponential(1.) + 0.01).normalized, 'kedge': Additive(order=KroneckerDelta(0.3), length=SquareExponential(0.05)).normalized, 'q': [0.01, 0.05, 0.1, 0.5] }, 'weighted': { 'graphs': Graph.unify_datatype([ Graph.from_networkx(weighted_graph1, weight='w'), Graph.from_networkx(weighted_graph2, weight='w') ]), 'knode': Additive(hybridization=KroneckerDelta(0.3), charge=SquareExponential(1.0)).normalized, 'kedge':