Пример #1
0
    def convert_edge_to_directed(self):
        """

        Each column of edge_index (u, v) represents an directed edge from u to v.
        Note that it does not cover the edge from v to u. You should provide (v, u) to cover it.
        This is not convenient for users.
        Thus, we allow users to provide edge_index in undirected form and convert it later.
        That is, we can only provide (u, v) and convert it to (u, v) and (v, u) with `convert_edge_to_directed` method.
        :return:
        """
        self.edge_index, [self.edge_weight, self.edge_graph_index] = \
            convert_edge_to_directed(self.edge_index, [self.edge_weight, self.edge_graph_index])
        return self
Пример #2
0
    def process(self):

        dataset_str = "cora"
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            data_name = "ind.{}.{}".format(dataset_str, names[i])
            data_path = os.path.join(self.raw_root_path, data_name)
            with open(data_path, 'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pickle.load(f, encoding='latin1'))
                else:
                    objects.append(pickle.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        with open(os.path.join(self.raw_root_path,
                               "ind.{}.test.index".format(dataset_str)),
                  "r",
                  encoding="utf-8") as f:
            test_idx_reorder = [int(line.strip()) for line in f]
            test_idx_range = np.sort(test_idx_reorder)

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

        labels = np.vstack((ally, ty))
        labels[test_idx_reorder, :] = labels[test_idx_range, :]

        test_index = test_idx_range.tolist()
        train_index = list(range(len(y)))
        valid_index = list(range(len(y), len(y) + 500))

        x = np.array(features.todense()).astype(np.float32)
        inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True)
        inv_sum_x[np.isnan(inv_sum_x)] = 1.0
        inv_sum_x[np.isinf(inv_sum_x)] = 1.0
        x *= inv_sum_x

        edge_index = np.array(nx.from_dict_of_lists(graph).edges).T
        edge_index, _ = remove_self_loop_edge(edge_index)
        edge_index, _ = convert_edge_to_directed(edge_index)
        y = np.argmax(labels, axis=-1).astype(np.int32)

        graph = Graph(x=x, edge_index=edge_index, y=y)

        return graph, (train_index, valid_index, test_index)
Пример #3
0
    def process(self):
        dataset = NodePropPredDataset(name=self.dataset_name, root=self.download_root_path)

        graph, label = dataset[0]  # graph: library-agnostic graph object

        x = graph["node_feat"]
        edge_index = graph["edge_index"]

        # convert edge_index to directed
        edge_index, _ = convert_edge_to_directed(edge_index, None)

        label = label.flatten().astype(np.int32)
        graph = Graph(x=x, edge_index=edge_index, y=label)

        split_index = dataset.get_idx_split()
        train_index, valid_index, test_index = split_index["train"], split_index["valid"], split_index["test"]

        return graph, (train_index, valid_index, test_index)
Пример #4
0
    def process(self):

        splits = ["train", "valid", "test"]

        split_data_dict = {split: [] for split in splits}

        for split in split_data_dict.keys():
            split_graph_ids = np.load(
                os.path.join(self.raw_root_path,
                             "{}_graph_id.npy".format(split)))
            split_features = np.load(
                os.path.join(self.raw_root_path,
                             "{}_feats.npy".format(split))).astype(np.float32)
            split_labels = np.load(
                os.path.join(self.raw_root_path,
                             "{}_labels.npy".format(split))).astype(np.int32)

            nx_graph_path = os.path.join(self.raw_root_path,
                                         "{}_graph.json".format(split))
            with open(nx_graph_path, "r", encoding="utf-8") as f:
                nx_graph = nx.DiGraph(
                    nx.json_graph.node_link_graph(json.load(f)))

            split_unique_graph_ids = sorted(set(split_graph_ids))

            for graph_id in split_unique_graph_ids:
                mask_indices = np.where(split_graph_ids == graph_id)[0]

                min_node_index = np.min(mask_indices)

                edge_index = nx_graph.subgraph(mask_indices).edges
                edge_index = np.array(edge_index).T - min_node_index

                edge_index, _ = convert_edge_to_directed(edge_index,
                                                         edge_weight=None)

                graph = Graph(x=split_features[mask_indices],
                              edge_index=edge_index,
                              y=split_labels[mask_indices])
                split_data_dict[split].append(graph)
                # print("split: ", split)

        processed_data = [split_data_dict[split] for split in splits]
        return processed_data
Пример #5
0
# Node Features => (num_nodes, num_features)
x = np.random.randn(5, 20).astype(np.float32)  # 5 nodes, 20 features

# Edge Index => (2, num_edges)
# Each column of edge_index (u, v) represents an directed edge from u to v.
# Note that it does not cover the edge from v to u. You should provide (v, u) to cover it.
# This is not convenient for users.
# Thus, we allow users to provide edge_index in undirected form and convert it later.
# That is, we can only provide (u, v) and convert it to (u, v) and (v, u) with `convert_edge_to_directed` method.
edge_index = np.array([[0, 0, 1, 3], [1, 2, 2, 1]])

# Edge Weight => (num_edges)
edge_weight = np.array([0.9, 0.8, 0.1, 0.2]).astype(np.float32)

# Make the edge_index directed such that we can use it as the input of GCN
edge_index, [edge_weight] = convert_edge_to_directed(edge_index, [edge_weight])

# We can convert these numpy array as TensorFlow Tensors and pass them to gnn functions
outputs = tfg.nn.gcn(
    tf.Variable(x),
    tf.constant(edge_index),
    tf.constant(edge_weight),
    tf.Variable(tf.random.truncated_normal([20, 2]))  # GCN Weight
)
print(outputs)

# Usually, we use a graph object to manager these information
# edge_weight is optional, we can set it to None if you don't need it
graph = tfg.Graph(x=x, edge_index=edge_index, edge_weight=edge_weight)

# You can easily convert these numpy arrays as Tensors with the Graph Object API
Пример #6
0
# Edge Index => (2, num_edges)
# Each column of edge_index (u, v) represents an directed edge from u to v.
# Note that it does not cover the edge from v to u. You should provide (v, u) to cover it.
# This is not convenient for users.
# Thus, we allow users to provide edge_index in undirected form and convert it later.
# That is, we can only provide (u, v) and convert it to (u, v) and (v, u) with `convert_edge_to_directed` method.
edge_index = np.array([
    [0, 0, 1, 3],
    [1, 2, 2, 1]
])

# Edge Weight => (num_edges)
edge_weight = np.array([0.9, 0.8, 0.1, 0.2]).astype(np.float32)

# Make the edge_index directed such that we can use it as the input of GCN
edge_index, edge_weight = convert_edge_to_directed(edge_index, edge_weight=edge_weight)


# We can convert these numpy array as TensorFlow Tensors and pass them to gnn functions
outputs = tfg.nn.gcn(
    tf.Variable(x),
    tf.constant(edge_index),
    tf.constant(edge_weight),
    tf.Variable(tf.random.truncated_normal([20, 2])) # GCN Weight
)
print(outputs)

# Usually, we use a graph object to manager these information
# edge_weight is optional, we can set it to None if you don't need it
graph = tfg.Graph(x=x, edge_index=edge_index, edge_weight=edge_weight)
Пример #7
0
    def process(self):

        dataset_str = self.dataset_name
        names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
        objects = []
        for i in range(len(names)):
            data_name = "ind.{}.{}".format(dataset_str, names[i])
            data_path = os.path.join(self.raw_root_path, data_name)
            with open(data_path, 'rb') as f:
                if sys.version_info > (3, 0):
                    objects.append(pickle.load(f, encoding='latin1'))
                else:
                    objects.append(pickle.load(f))

        x, y, tx, ty, allx, ally, graph = tuple(objects)

        with open(os.path.join(self.raw_root_path,
                               "ind.{}.test.index".format(dataset_str)),
                  "r",
                  encoding="utf-8") as f:
            test_idx_reorder = [int(line.strip()) for line in f]
            test_idx_range = np.sort(test_idx_reorder)

        if self.dataset_name == 'citeseer':
            # Fix citeseer dataset (there are some isolated nodes in the graph)
            # Find isolated nodes, add them as zero-vecs into the right position
            test_idx_range_full = list(
                range(min(test_idx_reorder),
                      max(test_idx_reorder) + 1))
            tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
            tx_extended[test_idx_range - min(test_idx_range), :] = tx
            tx = tx_extended
            ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
            ty_extended[test_idx_range - min(test_idx_range), :] = ty
            ty = ty_extended

        features = sp.vstack((allx, tx)).tolil()
        features[test_idx_reorder, :] = features[test_idx_range, :]
        # adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

        labels = np.vstack((ally, ty))
        labels[test_idx_reorder, :] = labels[test_idx_range, :]

        test_index = test_idx_range.tolist()
        if self.task == "semi_supervised":
            train_index = list(range(len(y)))
            valid_index = list(range(len(y), len(y) + 500))
        else:
            train_index = range(len(ally) - 500)
            valid_index = range(len(ally) - 500, len(ally))

        x = np.array(features.todense()).astype(np.float32)
        inv_sum_x = 1.0 / np.sum(x, axis=-1, keepdims=True)
        inv_sum_x[np.isnan(inv_sum_x)] = 1.0
        inv_sum_x[np.isinf(inv_sum_x)] = 1.0
        x *= inv_sum_x

        edge_index = np.array(nx.from_dict_of_lists(graph).edges).T
        edge_index, _ = remove_self_loop_edge(edge_index)
        edge_index, _ = convert_edge_to_directed(edge_index)
        y = np.argmax(labels, axis=-1).astype(np.int32)

        graph = Graph(x=x, edge_index=edge_index, y=y)

        return graph, (train_index, valid_index, test_index)
Пример #8
0
 def convert_edge_to_directed(self):
     self.edge_index, self.edge_weight = convert_edge_to_directed(self.edge_index, self.edge_weight)
     return self