Пример #1
0
def test_networkx():
    """Testing Graph object consistency for an edge-dictionary-type initialization object."""

    try:
        import networkx as nx
    except ImportError:
        return

    ga = {0: {0: 1., 1: 1., 3: 3.},
          1: {0: 1., 3: 2.},
          2: {0: 2., 1: 3., 3: 1.},
          3: {0: 1.}}

    ganl = {0: 'l1', 1: 'l2', 2: 'l3', 3: 'l4'}

    gael = {(0, 0): 'el1', (0, 1): 'el2', (0, 3): 'el3',
            (1, 0): 'el4', (1, 3): 'el5', (2, 0): 'el6',
            (2, 1): 'el7', (2, 3): 'el8', (3, 0): 'el9'}

    g = nx.DiGraph()

    for n in ganl.keys():
        g.add_node(n, nl=ganl[n])

    for e in gael.keys():
        g.add_edge(e[0], e[1], w=ga[e[0]][e[1]], el=gael[e])

    gs = list(graph_from_networkx([g], 'nl', 'el', 'w'))[0]

    assert(gs[0] == ga and gs[1] == ganl and gs[2] == gael)
Пример #2
0
def graph_similarity(G, H, method='random_walk', **kwargs):
    """
    Parameters
    ----------
    G, H: nx.Graph

    """
    assert method in ['random_walk']
    if method == 'random_walk':
        kernel = RandomWalk(**kwargs)

    return kernel.fit_transform(graph_from_networkx([G, H]))[0, 1]
Пример #3
0
for n in range(3, 103):
    Gs.append(nx.path_graph(n))
    y.append(0)
    Gs.append(nx.cycle_graph(n))
    y.append(1)

############## Question 2
# Classify the synthetic graphs using graph kernels

# Split dataset into a training and a test set
# hint: use the train_test_split function of scikit-learn

G_train, G_test, y_train, y_test = train_test_split(Gs, y, test_size=0.1)

# Transform NetworkX graphs to objects that can be processed by GraKeL
G_train = list(graph_from_networkx(G_train))
G_test = list(graph_from_networkx(G_test))

# Use the shortest path kernel to generate the two kernel matrices ("K_train" and "K_test")
# hint: the graphs do not contain node labels. Set the with_labels argument of the the shortest path kernel to False

gk = ShortestPath(with_labels=False)

K_train = gk.fit_transform(G_train)
K_test = gk.transform(G_test)

clf = SVC(kernel='precomputed', C=1)  # Initialize SVM
clf.fit(K_train, y_train)  # Train SVM
y_pred = clf.predict(K_test)  # Predict

# Compute the classification accuracy
Пример #4
0
  def get_all(
    self, output_type="dense", idxs=None, train_idxs=None,
    shuffle=False, name_suffix=""):
    ds_name = self.name + name_suffix

    if shuffle:
      if idxs is None:
        idxs = np.arange(self.num_graphs)
      else:
        idxs = np.array(idxs)

      np.random.shuffle(idxs)

    if output_type == "dense":
      graphs, targets = self.dataset

      if idxs is not None:
        graphs = graphs[idxs]
        targets = targets[idxs]

      ds = ds_utils.to_dense_ds(
        graphs, targets, self._dim_node_features, self._num_node_labels)
      ds = ds.batch(self.dense_batch_size)
    elif output_type == "grakel":
      graphs, targets = self.dataset

      if idxs is not None:
        graphs = graphs[idxs]
        targets = targets[idxs]

      if self._num_node_labels > 0:
        node_labels_tag = "label"
      elif self.node_one_labels:
        node_labels_tag = "label_one"
      else:
        node_labels_tag = None

      if self._num_edge_labels > 0:
        edge_labels_tag = "label"
      elif self.edge_one_labels:
        edge_labels_tag = "label_one"
      else:
        edge_labels_tag = None

      return gk.graph_from_networkx(
        graphs,
        node_labels_tag=node_labels_tag,
        edge_labels_tag=edge_labels_tag), targets
    # Custom kernel:
    elif callable(output_type):
      _, targets = self.dataset
      gram = self._compute_gram_matrix(output_type)

      if gram is None:
        return

      if idxs is not None:
        train_idxs = idxs if train_idxs is None else train_idxs
        gram, targets = gram[idxs, :][:, train_idxs], targets[idxs]

      return gram, targets
    elif output_type == "wl2":
      batches = self._get_wl2_batches(ds_name, idxs)

      if batches is None:
        return

      if isinstance(batches, tf.data.Dataset):
        batches.name = ds_name
        return batches

      ds = ds_utils.wl_batches_to_dataset(*batches)
    elif output_type == "wl2c":
      batches = self._get_wl2c_batches(ds_name, idxs)

      if batches is None:
        return

      if isinstance(batches, tf.data.Dataset):
        batches.name = ds_name
        return batches

      ds = ds_utils.wl_batches_to_dataset(*batches, compact=True)
    elif output_type == "en":
      batches = self._get_en_batches(ds_name, idxs)

      if batches is None:
        return

      if isinstance(batches, tf.data.Dataset):
        batches.name = ds_name
        return batches

      ds = ds_utils.wl_batches_to_dataset(*batches, en_encode=True)
    elif output_type == "wl1":
      batches = self._get_wl1_batches(ds_name, idxs)

      if batches is None:
        return

      if isinstance(batches, tf.data.Dataset):
        batches.name = ds_name
        return batches

      ds = ds_utils.wl_batches_to_dataset(*batches, wl1_encode=True)

    ds.name = ds_name

    return ds
Пример #5
0
def run_samples_lasso(N, B, alpha, theta1, theta2, s1, s2):
    import myKernels.RandomWalk as rw
    test_info = pd.DataFrame()
    k = theta1.shape[0]
    for sample in tqdm.tqdm(range(N)):

        Gs1 = []
        Gs2 = []
        error_1 = []
        error_2 = []
        n = 50

        for i in range(50):
            x1 = np.random.multivariate_normal(mean=np.zeros(k),
                                               cov=theta1,
                                               size=100)
            A1 = np.corrcoef(x1.T)
            if alpha == 0:
                np.fill_diagonal(A1, 0)
                A1[np.abs(A1) < 1e-5] = 0
            else:
                gl = graphical_lasso(A1, alpha=alpha, max_iter=1000)
                A1 = gl[0]
                A1[np.abs(A1) < 1e-5] = 0
                np.fill_diagonal(A1, 0)

            Gs1.append(nx.from_numpy_matrix(A1))
            error_1.append(
                np.sum(
                    np.logical_xor(
                        np.abs(np.triu(A1, 1)) > 0,
                        np.abs(np.triu(theta1, 1)) > 0)))
            x2 = np.random.multivariate_normal(mean=np.zeros(k),
                                               cov=theta2,
                                               size=100)
            A2 = np.corrcoef(x2.T)
            if alpha == 0:
                np.fill_diagonal(A2, 0)
                A2[np.abs(A2) < 1e-5] = 0
            else:
                gl = graphical_lasso(A2, alpha=alpha, max_iter=1000)
                A2 = gl[0]
                A2[np.abs(A2) < 1e-5] = 0
                np.fill_diagonal(A2, 0)
            Gs2.append(nx.from_numpy_matrix(A2))
            error_2.append(
                np.sum(
                    np.logical_xor(
                        np.abs(np.triu(A2, 1)) > 0,
                        np.abs(np.triu(theta2, 1)) > 0)))

        Gs = Gs1 + Gs2

        try:
            #rw_kernel = rw.RandomWalk(Gs, c = 0.0001, normalize=0)
            #K = rw_kernel.fit_ARKU_plus(r = 6, normalize_adj=False,   edge_attr= None, verbose=False)
            graph_list = gk.graph_from_networkx(Gs)
            kernel = [{"name": "SP", "with_labels": 0}]
            init_kernel = gk.GraphKernel(kernel=kernel, normalize=0)
            K = init_kernel.fit_transform(graph_list)
        except:
            continue

        MMD_functions = [mg.MMD_b, mg.MMD_u]

        kernel_hypothesis = mg.BoostrapMethods(MMD_functions)
        function_arguments = [dict(n=n, m=n), dict(n=n, m=n)]
        kernel_hypothesis.Bootstrap(K, function_arguments, B=B)
        #print(f'p_value {kernel_hypothesis.p_values}')
        #print(f"MMD_u {kernel_hypothesis.sample_test_statistic['MMD_u']}")

        test_info = pd.concat(
            (test_info,
             pd.DataFrame(
                 {
                     'p_val': kernel_hypothesis.p_values['MMD_u'],
                     'sample': sample,
                     'mean_error_1': np.mean(error_1),
                     'mean_error_2': np.mean(error_2),
                     'alpha': alpha,
                     's1': s1,
                     's2': s2
                 },
                 index=[0])),
            ignore_index=True)

    return test_info