def compute_kernel_distance_matrices(slice_subgraphs, kernel_params):
    # Relabel based on requested graph kernels
    kernel_label_pair_to_relabeled_graphs = get_relabeled_graphs(
        slice_subgraphs, kernel_params)

    # Actually compute the kernel distance matrices
    kernel_to_distance_matrix = {}
    for kp in kernel_params:
        kernel = kp["name"]
        params = kp["params"]
        # Compute Weisfeiler-Lehman subtree pattern kernel
        if kernel == "wlst":
            n_iters = params["n_iters"]
            label = params["label"]
            kernel_label_pair = (kernel, label)
            relabeled_graphs = kernel_label_pair_to_relabeled_graphs[
                kernel_label_pair]
            kernel_mat = gk.CalculateWLKernel(relabeled_graphs, n_iters)
            distance_mat = convert_to_distance_matrix(kernel_mat)
            kernel_params_key = (kernel, label, n_iters)
            kernel_to_distance_matrix[kernel_params_key] = distance_mat
        # Compute edge-histogram kernel
        elif kernel == "eh":
            label = params["label"]
            kernel_label_pair = (kernel, label)
            relabeled_graphs = kernel_label_pair_to_relabeled_graphs[
                kernel_label_pair]
            kernel_mat = gk.CalculateEdgeHistKernel(relabeled_graphs)
            distance_mat = convert_to_distance_matrix(kernel_mat)
            kernel_key = (kernel, label)
            kernel_to_distance_matrix[kernel_key] = distance_mat
        # Compute vertex-histogram kernel
        elif kernel == "vh":
            label = params["label"]
            key = (kernel, label)
            relabeled_graphs = kernel_label_pair_to_relabeled_graphs[key]
            kernel_mat = gk.CalculateVertexHistKernel(relabeled_graphs)
            distance_mat = convert_to_distance_matrix(kernel_mat)
            kernel_to_distance_matrix[key] = distance_mat
        else:
            raise NotImplementedError(
                "Kernel: {} not supported".format(kernel))
    return kernel_to_distance_matrix
Exemplo n.º 2
0
def compute_all_kernels(graphs):
    return (
        gk.CalculateEdgeHistKernel(graphs),
        gk.CalculateVertexHistKernel(graphs),
        gk.CalculateVertexEdgeHistKernel(graphs),
        gk.CalculateVertexVertexEdgeHistKernel(graphs),
        gk.CalculateEdgeHistGaussKernel(graphs),
        gk.CalculateVertexHistGaussKernel(graphs),
        gk.CalculateVertexEdgeHistGaussKernel(graphs),
        gk.CalculateGeometricRandomWalkKernel(graphs),
        # gk.CalculateExponentialRandomWalkKernel(graphs),
        gk.CalculateKStepRandomWalkKernel(graphs, [1.0]),
        gk.CalculateWLKernel(graphs),
        gk.CalculateConnectedGraphletKernel(graphs, 3),
        gk.CalculateConnectedGraphletKernel(graphs, 4),
        gk.CalculateConnectedGraphletKernel(graphs, 5),
        gk.CalculateGraphletKernel(graphs, 3),
        gk.CalculateGraphletKernel(graphs, 4),
        gk.CalculateShortestPathKernel(graphs),
    )
 def calculate_kernel_matrices(self, igraph_list):
     kernel_matrices = []
     kernel_matrices.append(gk.CalculateEdgeHistKernel(igraph_list))
     kernel_matrices.append(gk.CalculateVertexHistKernel(igraph_list))
     kernel_matrices.append(
         gk.CalculateVertexVertexEdgeHistKernel(igraph_list))
     kernel_matrices.append(
         gk.CalculateVertexVertexEdgeHistKernel(igraph_list))
     kernel_matrices.append(gk.CalculateVertexHistGaussKernel(igraph_list))
     kernel_matrices.append(gk.CalculateEdgeHistGaussKernel(igraph_list))
     kernel_matrices.append(
         gk.CalculateVertexEdgeHistGaussKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateGeometricRandomWalkKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateExponentialRandomWalkKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateKStepRandomWalkKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateShortestPathKernel(igraph_list))
     kernel_matrices.append(gk.CalculateWLKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateGraphletKernel(igraph_list))
     #kernel_matrices.append(gk.CalculateConnectedGraphletKernel(igraph_list))
     return kernel_matrices
Exemplo n.º 4
0
def evaluate_vertex_histogram_kernel(graphs,
                                     graph_labels,
                                     label_requests,
                                     n_folds=10,
                                     seed=None):
    """
    """

    # Print progress
    print("Evaluating Vertex-Histogram Kernel")
    print()

    # Just a few sanity checks
    assert (len(graphs) == len(graph_labels))

    # Mapping from vertex labeling to results
    vertex_labeling_to_results = {}

    # Sweep over vertex label options
    for lr in label_requests:

        # Unpack
        requested_vertex_label = lr["vertex"]

        # Print progress
        print("Vertex Label: {}".format(requested_vertex_label))
        print()

        # Convert the base graphs into representations with
        # the requested vertex and edge labels, if any.
        relabeled_graphs = [
            relabel_for_wlst_kernel(g, label=requested_vertex_label)
            for g in graphs
        ]

        # Define lists to track non-determinism fraction prediction results
        # over multiple folds
        true_nd_vals = []
        pred_nd_vals = []

        # Compute kernel matrix
        k_mat = gk.CalculateVertexHistKernel(relabeled_graphs)

        # Define training and testing sets
        graph_indices = list(range(len(graph_labels)))
        kf = KFold(n_splits=n_folds, random_state=seed, shuffle=True)
        for split_idx, (train_indices,
                        test_indices) in enumerate(kf.split(graph_indices)):

            # Print progress
            print("Running split {}/{}".format(split_idx + 1, n_folds))

            # Get training and testing graphs
            g_train = [relabeled_graphs[i] for i in train_indices]
            g_test = [relabeled_graphs[i] for i in test_indices]

            # Get the non-determinism fraction values for the training and
            # testing graphs
            y_train = [graph_labels[i] for i in train_indices]
            y_test = [graph_labels[i] for i in test_indices]

            # Retrieve embeddings of training and test graphs
            k_train = np.zeros((len(train_indices), len(train_indices)))
            k_test = np.zeros((len(test_indices), len(train_indices)))
            for i in range(len(train_indices)):
                for j in range(len(train_indices)):
                    k_train[i][j] = k_mat[train_indices[i]][train_indices[j]]
            for i in range(len(test_indices)):
                for j in range(len(train_indices)):
                    k_test[i][j] = k_mat[test_indices[i]][train_indices[j]]

            # Train SVM regressor using precomputed kernel matrix
            model = svm.SVR("precomputed").fit(k_train, y_train)
            model.fit(k_train, y_train)

            # Evaluate model against the embeddedings of the test graphs
            y_pred = model.predict(k_test)

            # Print progress
            print("Done with split {}/{}".format(split_idx + 1, n_folds))
            print()

            # Aggregate results for this fold
            true_nd_vals += list(y_test)
            pred_nd_vals += list(y_pred)

        # Aggregate results for this vertex labeling
        vertex_labeling_to_results[requested_vertex_label] = {
            "true": true_nd_vals,
            "pred": pred_nd_vals
        }

    return vertex_labeling_to_results
def main(args, logger):

    graphs = [ig.read(filename) for filename in args.FILES]
    labels = read_labels(args.labels)

    # Set the label to be uniform over all graphs in case no labels are
    # available. This essentially changes our iteration to degree-based
    # checks.
    for graph in graphs:
        if 'label' not in graph.vs.attributes():
            graph.vs['label'] = [0] * len(graph.vs)

    logger.info('Read {} graphs and {} labels'.format(len(graphs),
                                                      len(labels)))

    assert len(graphs) == len(labels)

    # Calculate graph kernel
    gram_matrix = gk.CalculateVertexHistKernel(graphs)

    y = LabelEncoder().fit_transform(labels)
    np.random.seed(42)

    mean_accuracies = []

    params = ['balanced']
    cv_results = []
    entry = {}
    for param in params:
        entry[param] = args.__dict__[param]
    entry['dataset'] = dirname(args.FILES[0]).split('/')[1]
    entry['baseline'] = 'vertex hist kernel'
    for i in range(10):
        # Contains accuracy scores for each cross validation step; the
        # means of this list will be used later on.
        accuracy_scores = []
        cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=i)
        for n, indices in enumerate(cv.split(graphs, y)):

            entry_fold = copy.copy(entry)
            train_index = indices[0]
            test_index = indices[1]

            pipeline = Pipeline(
                [('clf',
                  SVC(class_weight='balanced' if args.balanced else None,
                      random_state=42,
                      kernel='precomputed'))], )

            grid_params = {'clf__C': [1e1]}

            X_train, X_test = gram_matrix[
                train_index][:,
                             train_index], gram_matrix[test_index][:,
                                                                   train_index]
            y_train, y_test = y[train_index], y[test_index]

            kgscv = KernelGridSearchCV(pipeline,
                                       param_grid=grid_params,
                                       cv=cv,
                                       random_state=42)
            kgscv.fit(X_train, y_train)
            p = kgscv._best_params
            sc = kgscv._best_score
            clf = kgscv._best_estimator
            clf.fit(X_train, y_train)

            y_pred = clf.predict(X_test)

            acc = accuracy_score(y_test, y_pred)
            accuracy_scores.append(acc)

            for param, param_val in kgscv._best_params.items():
                entry_fold[param] = param_val
                entry[param] = ''
            entry_fold['fold'] = n + 1
            entry_fold['it'] = i
            entry_fold['acc'] = acc * 100
            entry_fold['std'] = 0.0
            cv_results.append(entry_fold)

            logger.info('Best classifier for this fold:{}'.format(
                kgscv._best_params))

        mean_accuracies.append(np.mean(accuracy_scores))
        logger.info(
            '  - Mean 10-fold accuracy: {:2.2f} [running mean over all folds: {:2.2f}]'
            .format(mean_accuracies[-1] * 100,
                    np.mean(mean_accuracies) * 100))
    entry['fold'] = 'all'
    entry['it'] = 'all'
    entry['acc'] = np.mean(mean_accuracies) * 100
    entry['std'] = np.std(mean_accuracies) * 100
    cv_results.append(entry)
    logger.info('Accuracy: {:2.2f} +- {:2.2f}'.format(
        np.mean(mean_accuracies) * 100,
        np.std(mean_accuracies) * 100))

    if exists(args.result_file):
        with open(args.result_file, 'a') as f:
            pd.DataFrame(cv_results).to_csv(f, index=False, header=None)
    else:
        pd.DataFrame(cv_results).to_csv(args.result_file, index=False)
    catagrateryGraphList = []
    for i in range(len(DataSet)):  # 10类数据
        pointsGraphList = []
        pointsGraphList = GetGraphList(DataSet[i])  #获得单类的图列表
        catagrateryGraphList.append(pointsGraphList)
    print catagrateryGraphList

    sum_Graphlist = []
    for i in range(len(catagrateryGraphList)):
        for j in range(len(catagrateryGraphList[i])):
            sum_Graphlist.append(catagrateryGraphList[i][j])

    mutag_list = np.array(sum_Graphlist)
    ### ALL KERNELS COMPUTE
    K1 = gk.CalculateEdgeHistKernel(mutag_list)
    K2 = gk.CalculateVertexHistKernel(mutag_list)
    K3 = gk.CalculateVertexEdgeHistKernel(mutag_list)
    K4 = gk.CalculateVertexVertexEdgeHistKernel(mutag_list)
    K5 = gk.CalculateEdgeHistGaussKernel(mutag_list)
    K6 = gk.CalculateVertexHistGaussKernel(mutag_list)
    K7 = gk.CalculateVertexEdgeHistGaussKernel(mutag_list)
    # K8 = gk.CalculateGeometricRandomWalkKernel(mutag_list)
    # K9 = gk.CalculateExponentialRandomWalkKernel(mutag_list)
    K10 = gk.CalculateKStepRandomWalkKernel(mutag_list)
    K11 = gk.CalculateWLKernel(mutag_list)
    K12 = gk.CalculateConnectedGraphletKernel(mutag_list, 4)
    K13 = gk.CalculateGraphletKernel(mutag_list, 4)
    # K14 = gk.CalculateShortestPathKernel(mutag_list)
    #获得10类,50种图,计算相识度
    pass