def _wl_spkernel_do(Gn, node_label, edge_label, height): """Calculate Weisfeiler-Lehman shortest path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph List of graphs between which the kernels are calculated. node_label : string node attribute used as label. edge_label : string edge attribute used as label. height : int subtree height. Return ------ Kmatrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ pass from pygraph.utils.utils import getSPGraph # init. height = int(height) Kmatrix = np.zeros((len(Gn), len(Gn))) # init kernel Gn = [getSPGraph(G, edge_weight=edge_label) for G in Gn] # get shortest path graphs of Gn # initial for height = 0 for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data=True): for e2 in Gn[j].edges(data=True): if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2][ 'cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): Kmatrix[i][j] += 1 Kmatrix[j][i] = Kmatrix[i][j] # iterate each height for h in range(1, height + 1): all_set_compressed = { } # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs for G in Gn: # for each graph set_multisets = [] for node in G.nodes(data=True): # Multiset-label determination. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] # sorting each multiset multiset.sort() multiset = node[1][node_label] + ''.join( multiset) # concatenate to a string and add the prefix set_multisets.append(multiset) # label compression set_unique = list( set(set_multisets)) # set of unique multiset labels # a dictionary mapping original labels to new ones. set_compressed = {} # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label for value in set_unique: if value in all_set_compressed.keys(): set_compressed.update({value: all_set_compressed[value]}) else: set_compressed.update( {value: str(num_of_labels_occured + 1)}) num_of_labels_occured += 1 all_set_compressed.update(set_compressed) # relabel nodes for node in G.nodes(data=True): node[1][node_label] = set_compressed[set_multisets[node[0]]] # calculate subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data=True): for e2 in Gn[j].edges(data=True): if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2][ 'cost'] and ( (e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): Kmatrix[i][j] += 1 Kmatrix[j][i] = Kmatrix[i][j] return Kmatrix
def wrapper_getSPGraph(weight, itr_item): g = itr_item[0] i = itr_item[1] return i, getSPGraph(g, edge_weight=weight)