Пример #1
0
def test_weisfeiler_lehman():
    """Picklability test for the Weisfeiler Lehman kernel."""
    train, _ = generate_dataset(n_graphs=100,
                                r_vertices=(10, 20),
                                r_connectivity=(0.4, 0.8),
                                r_weight_edges=(1, 1),
                                n_graphs_test=40,
                                random_state=rs,
                                features=('nl', 3))

    wl_st_kernel = WeisfeilerLehman(verbose=verbose,
                                    normalize=normalize,
                                    base_graph_kernel=VertexHistogram)
    wl_st_kernel.fit(train)
    assert is_picklable(wl_st_kernel)
Пример #2
0
    def similarity(self, g1adj, g2adj):
        ds1list = [i for i, x in enumerate(list(dominant_set(g1adj))) if x > 0]
        ds2list = [i for i, x in enumerate(list(dominant_set(g2adj))) if x > 0]
        ds1adj = []
        ds2adj = []
        for i in ds1list:
            a = []
            for j in ds1list:
                a += [g1adj[i][j]]
            ds1adj += [a]
        for i in ds2list:
            a = []
            for j in ds2list:
                a += [g2adj[i][j]]
            ds2adj += [a]

        a, b, c = from_adj_to_set(ds1adj)
        d, e, f = from_adj_to_set(ds2adj)

        # tmp = ShortestPath(normalize=True).fit_transform([[a, b, c], [d, e, f]])[0][1]
        tmp = WeisfeilerLehman(n_iter=self.n_iter,
                               normalize=True).fit_transform([[a, b, c],
                                                              [d, e, f]])[0][1]

        return tmp
Пример #3
0
def test_weisfeiler_lehman():
    """Eigenvalue test for the Weisfeiler Lehman kernel."""
    wl_st_kernel = WeisfeilerLehman(verbose=verbose, normalize=normalize,
                                    base_graph_kernel=VertexHistogram)
    if verbose:
        print_kernel("WL/Subtree", wl_st_kernel, dataset_tr, dataset_te)
    else:
        positive_eig(wl_st_kernel, dataset)
Пример #4
0
def gk_function(algorithm, graphs, par):
    """ Function to run the kernel on the param grid. Since different
    kernels have different numbers of parameters, this is necessary. """
    print("parameters", par)
    if algorithm == "SP_gkl":
        gk = ShortestPath(with_labels=True).fit_transform(graphs)
    elif algorithm == "EH_gkl":
        gk = EdgeHistogram().fit_transform(graphs)
    elif algorithm == "WL_gkl":
        gk = WeisfeilerLehman(n_iter=par).fit_transform(graphs)
    elif algorithm == "RW_gkl":
        lam, p = par
        gk = RandomWalkLabeled(lamda=lam, p=p).fit_transform(graphs)
    elif algorithm == "CSM_gkl":
        c, k = par
        # testing lambda function. c should reset for each iteration
        gk = SubgraphMatching(
                k=k, 
                ke=lambda p1, p2: ke_kernel(p1, p2, c), # inline lambda 
                kv=kv_kernel
                ).fit_transform(graphs) 
    return(gk)
Пример #5
0
    def __init__(self,
                 kernel,
                 detector,
                 labeled=True,
                 WL_iter=5,
                 PK_bin_width=1,
                 LOF_n_neighbors=20,
                 LOF_n_leaf=30,
                 **kwargs):
        kernels = {
            'WL':
            WeisfeilerLehman(n_iter=WL_iter,
                             normalize=True,
                             base_graph_kernel=VertexHistogram),
            'PK':
            Propagation(t_max=WL_iter, w=PK_bin_width, normalize=True)
            if labeled else PropagationAttr(
                t_max=WL_iter, w=PK_bin_width, normalize=True),
        }
        detectors = {
            'OCSVM':
            OneClassSVM(kernel='precomputed', nu=0.1),
            'LOF':
            LocalOutlierFactor(n_neighbors=LOF_n_neighbors,
                               leaf_size=LOF_n_leaf,
                               metric='precomputed',
                               contamination=0.1),
            # 'IF': current similarity forest also has problem
        }

        assert kernel in kernels.keys()
        assert detector in detectors.keys()

        self.kernel = kernels[kernel]
        self.detector = detectors[detector]
        self.kernel_name = kernel
        self.detector_name = detector
        self.labeled = labeled
Пример #6
0
def test_weisfeiler_lehman():
    """Random input test for the Weisfeiler Lehman kernel."""
    train, test = generate_dataset(n_graphs=100,
                                   r_vertices=(10, 20),
                                   r_connectivity=(0.4, 0.8),
                                   r_weight_edges=(1, 1),
                                   n_graphs_test=40,
                                   random_state=rs,
                                   features=('nl', 3))

    wl_st_kernel = WeisfeilerLehman(verbose=verbose,
                                    normalize=normalize,
                                    base_kernel=VertexHistogram)

    try:
        wl_st_kernel.fit_transform(train)
        wl_st_kernel.transform(test)
        assert True
    except Exception as exception:
        assert False, exception
            results = results.append(pd.DataFrame(data), ignore_index=True)

    return results


GRAKEL_KERNELS = {
    "GK-SPath":
    lambda: ShortestPath(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-EHist":
    lambda: EdgeHistogram(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-VHist":
    lambda: VertexHistogram(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-GSamp":
    lambda: GraphletSampling(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-WL-1":
    lambda: WeisfeilerLehman(
        n_iter=1, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-WL-2":
    lambda: WeisfeilerLehman(
        n_iter=2, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-WL-3":
    lambda: WeisfeilerLehman(
        n_iter=3, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-WL-4":
    lambda: WeisfeilerLehman(
        n_iter=4, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-WL-5":
    lambda: WeisfeilerLehman(
        n_iter=5, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-NH":
    lambda: NeighborhoodHash(n_jobs=N_JOBS,
                             normalize=NORMALIZING_GRAPH_KERNELS),
Script makes use of :class:`grakel.WeisfeilerLehman`, :class:`grakel.VertexHistogram`
"""
from __future__ import print_function
print(__doc__)

import numpy as np

from grakel.datasets import fetch_dataset
from grakel.utils import cross_validate_Kfold_SVM
from grakel.kernels import WeisfeilerLehman, VertexHistogram

# Loads the MUTAG dataset
MUTAG = fetch_dataset("MUTAG", verbose=False)
G, y = MUTAG.data, MUTAG.target

# Generates a list of kernel matrices using the Weisfeiler-Lehman subtree kernel
# Each kernel matrix is generated by setting the number of iterations of the
# kernel to a different value (from 2 to 7)
Ks = list()
for i in range(1, 7):
    gk = WeisfeilerLehman(n_iter=i,
                          base_kernel=VertexHistogram,
                          normalize=True)
    K = gk.fit_transform(G)
    Ks.append(K)

# Performs 10-fold cross-validation over different kernels and the parameter C of
# SVM and repeats the experiment 10 times with different folds
accs = cross_validate_Kfold_SVM([Ks], y, n_iter=10)
print("Average accuracy:", str(round(np.mean(accs[0]) * 100, 2)) + "%")
print("Standard deviation:", str(round(np.std(accs[0]) * 100, 2)) + "%")
Пример #9
0
clf = SVC(kernel='precomputed', C=1)  # Initialize SVM
clf.fit(K_train, y_train)  # Train SVM
y_pred = clf.predict(K_test)  # Predict

print("Classification accuracy using ShortestPath",
      accuracy_score(y_test, y_pred))

gk = PyramidMatch(with_labels=True)

K_train = gk.fit_transform(G_train)
K_test = gk.transform(G_test)

clf = SVC(kernel='precomputed', C=1)  # Initialize SVM
clf.fit(K_train, y_train)  # Train SVM
y_pred = clf.predict(K_test)  # Predict

print("Classification accuracy using PyramidMatch",
      accuracy_score(y_test, y_pred))

gk = WeisfeilerLehman(base_kernel=VertexHistogram)

K_train = gk.fit_transform(G_train)
K_test = gk.transform(G_test)

clf = SVC(kernel='precomputed', C=1)  # Initialize SVM
clf.fit(K_train, y_train)  # Train SVM
y_pred = clf.predict(K_test)  # Predict

print("Classification accuracy using WeisfeilerLehman",
      accuracy_score(y_test, y_pred))
Пример #10
0
    print('>>> 10-fold cross-validation --- fold %d' % curr_fold)
    kf2 = StratifiedKFold(n_splits=9, shuffle=False)

    train_val_data = [dataset.data[i] for i in train_val_idxs]
    train_val_targets = [dataset.target[i] for i in train_val_idxs]

    for train_idxs, _ in kf2.split(train_val_data, train_val_targets):
        print(len(train_idxs), len(dataset.data))
        train_dataset_data = [train_val_data[i] for i in train_idxs]
        train_dataset_target = [train_val_targets[i] for i in train_idxs]
        break

    test_data = [dataset.data[i] for i in test_idxs]
    test_targets = [dataset.target[i] for i in test_idxs]

    # Uses the Weisfeiler-Lehman subtree kernel to generate the kernel matrices
    gk = WeisfeilerLehman(n_iter=4,
                          base_graph_kernel=VertexHistogram,
                          normalize=True)
    K_train = gk.fit_transform(train_dataset_data)
    K_test = gk.transform(test_data)

    # Uses the SVM classifier to perform classification
    clf = SVC(kernel="precomputed")
    clf.fit(K_train, train_dataset_target)
    y_pred = clf.predict(K_test)

    # Computes and prints the classification accuracy
    acc = accuracy_score(test_targets, y_pred)
    print("Accuracy:", str(round(acc * 100, 2)) + "%")
Пример #11
0
def segk(nodes, edgelist, radius, dim, kernel):
    n = len(nodes)

    if kernel == 'shortest_path':
        gk = [
            ShortestPath(normalize=True, with_labels=True)
            for i in range(radius)
        ]
    elif kernel == 'weisfeiler_lehman':
        gk = [
            WeisfeilerLehman(n_iter=4,
                             normalize=True,
                             base_graph_kernel=VertexHistogram)
            for i in range(radius)
        ]
    else:
        raise ValueError('Use a valid kernel!!')

    idx = np.random.permutation(n)
    sampled_nodes = [nodes[idx[i]] for i in range(dim)]
    remaining_nodes = [nodes[idx[i]] for i in range(dim, len(nodes))]

    egonet_edges, egonet_node_labels = extract_egonets(edgelist, radius)

    E = np.zeros((n, dim))

    K = np.zeros((dim, dim))
    K_prev = np.ones((dim, dim))
    for i in range(1, radius + 1):
        Gs = list()
        for node in sampled_nodes:
            node_labels = {
                v: egonet_node_labels[node][v]
                for v in egonet_node_labels[node]
                if egonet_node_labels[node][v] <= i
            }
            edges = list()
            for edge in egonet_edges[node]:
                if edge[0] in node_labels and edge[1] in node_labels:
                    edges.append((edge[0], edge[1]))
                    edges.append((edge[1], edge[0]))
            Gs.append(Graph(edges, node_labels=node_labels))

        K_i = gk[i - 1].fit_transform(Gs)
        K_i = np.multiply(K_prev, K_i)
        K += K_i
        K_prev = K_i

    U, S, V = svd(K)
    S = np.maximum(S, 1e-12)
    Norm = np.dot(U * 1. / np.sqrt(S), V)
    E[idx[:dim], :] = np.dot(K, Norm.T)

    K = np.zeros((n - dim, dim))
    K_prev = np.ones((n - dim, dim))
    for i in range(1, radius + 1):
        Gs = list()
        for node in remaining_nodes:
            node_labels = {
                v: egonet_node_labels[node][v]
                for v in egonet_node_labels[node]
                if egonet_node_labels[node][v] <= i
            }
            edges = list()
            for edge in egonet_edges[node]:
                if edge[0] in node_labels and edge[1] in node_labels:
                    edges.append((edge[0], edge[1]))
                    edges.append((edge[1], edge[0]))
            Gs.append(Graph(edges, node_labels=node_labels))

        K_i = gk[i - 1].transform(Gs)
        K_i = np.multiply(K_prev, K_i)
        K += K_i
        K_prev = K_i

    E[idx[dim:], :] = np.dot(K, Norm.T)

    return E
Пример #12
0
split = 10
f = open('Accuracy_mean_origin.txt', 'a')
temp_accs = [None] * 6
for iter_number in [2]:
    f.write("origin " + str(split) + "-fold cross-validation\n")

    for key, value in test_dataset.items():
        dataset = fetch_dataset(value, verbose=False)
        G, y = dataset.data, dataset.target
        temp_accs[int(key) - 1] = []
        for i in range(split):
            G_train, G_test, y_train, y_test = K_Flod_spilt(
                split, i, np.array(G), np.array(y),
                random_state_list[int(key) - 1])
            gk = WeisfeilerLehman(n_iter=iter_number,
                                  base_graph_kernel=VertexHistogram,
                                  normalize=True)
            K_train = gk.fit_transform(G_train)
            K_test = gk.transform(G_test)

            # Uses the SVM classifier to perform classification
            # clf = RandomForestClassifier(n_estimators=35, random_state=39)
            # clf = AdaBoostClassifier(n_estimators=35, random_state=44)
            # SVC(kernel="precomputed")
            clf = SVC(kernel="poly")
            clf.fit(K_train, y_train)
            y_pred = clf.predict(K_test)

            # Computes and prints the classification accuracy
            acc = accuracy_score(y_test, y_pred)
            temp_accs[int(key) - 1].append(acc)
Пример #13
0
    tokens_to_ids = dict()
    for token in sent:
        if token not in tokens_to_ids:
            tokens_to_ids[token] = len(tokens_to_ids)
            node_labels[tokens_to_ids[token]] = token

    edges = list()
    for i in range(len(sent) - 1):
        edges.append((tokens_to_ids[sent[i]], tokens_to_ids[sent[i + 1]]))

    word_networks.append(Graph(edges, node_labels=node_labels))

query_sent_id = 54
query_sent = [word_networks[query_sent_id]]

# Initialize Weisfeiler-Lehman subtree kernel
gk = WeisfeilerLehman(niter=2, normalize=True, base_kernel=VertexHistogram)

print("Computing similarities\n")
t0 = time.time()
gk.fit(query_sent)
K = gk.transform(word_networks)
print("done in %0.3fs\n" % (time.time() - t0))

print("Query sentence")
print("--------------")
print(" ".join(sents[query_sent_id]))
print()
print("Most similar sentence")
print("---------------------")
print(" ".join(sents[np.argsort(K[:, 0])[-2]]))
Пример #14
0
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

from grakel.datasets import fetch_dataset
from grakel.kernels import ShortestPath, WeisfeilerLehman
import sklearn

# Loads the MUTAG dataset
MUTAG = fetch_dataset("PROTEINS", verbose=True)
G, y = MUTAG.data, MUTAG.target
print(G,' ',y)

# Splits the dataset into a training and a test set
G_train, G_test, y_train, y_test = train_test_split(G, y, test_size=0.3, random_state=42)

# Uses the shortest path kernel to generate the kernel matrices
gk = WeisfeilerLehman()
K_train = gk.fit_transform(G_train)
K_test = gk.transform(G_test)

# Uses the SVM classifier to perform classification
clf = SVC(kernel="precomputed")
clf.fit(K_train, y_train)
y_pred = clf.predict(K_test)

# Computes and prints the classification accuracy
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", str(round(acc*100, 2)) + "%")
Пример #15
0
                for score_type, score_field in zip(scoring, score_fields)
            }
            data["method"] = method_id
            data["time"] = graphs[f"timings_{kernel_set}_{level}"].sum()
            results = results.append(pd.DataFrame(data), ignore_index=True)

    return results


GRAKEL_KERNELS = {
    "GK-SPath": lambda: ShortestPath(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-EHist": lambda: EdgeHistogram(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-VHist": lambda: VertexHistogram(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-GSamp": lambda: GraphletSampling(normalize=NORMALIZING_GRAPH_KERNELS),
    "GK-WL-1": lambda: WeisfeilerLehman(
        n_iter=1, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS
    ),
    "GK-WL-2": lambda: WeisfeilerLehman(
        n_iter=2, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS
    ),
    "GK-WL-3": lambda: WeisfeilerLehman(
        n_iter=3, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS
    ),
    "GK-WL-4": lambda: WeisfeilerLehman(
        n_iter=4, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS
    ),
    "GK-WL-5": lambda: WeisfeilerLehman(
        n_iter=5, n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS
    ),
    "GK-NH": lambda: NeighborhoodHash(
        n_jobs=N_JOBS, normalize=NORMALIZING_GRAPH_KERNELS