Пример #1
0
def _read_graphs(dataset_name):
    file_prefix = osp.join(DATA_PATH, dataset_name, dataset_name)
    available = [
        f.split(os.sep)[-1][len(dataset_name)+1:-4]
        for f in glob.glob('{}_*.txt'.format(file_prefix))
    ]

    I = io.load_txt(file_prefix + '_graph_indicator.txt').astype(int) - 1
    unique_ids = np.unique(I)
    num_graphs = len(unique_ids)
    graph_sizes = np.bincount(I)
    offsets = np.concatenate(([0], np.cumsum(graph_sizes)[:-1]))
    edges = io.load_txt(file_prefix + '_A.txt', delimiter=',').astype(int) - 1

    A_list = [[] for _ in range(num_graphs)]
    for e in edges:
        graph_id = I[e[0]]
        A_list[graph_id].append(e - offsets[graph_id])
    A_list = map(np.array, A_list)
    A_list = [
        sp.coo_matrix(
            (np.ones_like(A[:, 0]), (A[:, 0], A[:, 1])),
            shape=(graph_sizes[i], graph_sizes[i])
        )
        for i, A in enumerate(A_list)
    ]

    X = []
    if 'node_attributes' in available:
        X_na = io.load_txt(file_prefix + '_node_attributes.txt', delimiter=',')
        if X_na.ndim == 1:
            X_na = X_na[:, None]
        X.append(X_na)
    if 'node_labels' in available:
        X_nl = io.load_txt(file_prefix + '_node_labels.txt')
        X_nl = _normalize(X_nl.reshape(-1, 1), 'ohe')
        X.append(X_nl)
    if len(X) > 0:
        X = np.concatenate(X, -1)

    X_list = []
    start = offsets[0]
    for i in range(num_graphs):
        stop = offsets[i + 1] if i + 1 < len(offsets) else None
        X_list.append(X[start:stop])
        start = stop


    y = None
    if 'graph_attributes' in available:
        y = io.load_txt(file_prefix + '_graph_attributes.txt')
    elif 'graph_labels' in available:
        y = io.load_txt(file_prefix + '_graph_labels.txt')
        y = _normalize(y[:, None], 'ohe')

    return A_list, X_list, y
Пример #2
0
    def read(self):
        fname_template = osp.join(self.path, "{}_{{}}.txt".format(self.name))
        available = [
            f.split(os.sep)[-1][len(self.name) + 1:-4]  # Remove leading name
            for f in glob.glob(fname_template.format("*"))
        ]

        # Batch index
        node_batch_index = (
            io.load_txt(fname_template.format("graph_indicator")).astype(int) -
            1)
        n_nodes = np.bincount(node_batch_index)
        n_nodes_cum = np.concatenate(([0], np.cumsum(n_nodes)[:-1]))

        # Adjacency matrix
        edges = io.load_txt(fname_template.format("A"),
                            delimiter=",").astype(int) - 1
        # Remove duplicates and self-loops from edges
        _, mask = np.unique(edges, axis=0, return_index=True)
        mask = mask[edges[mask, 0] != edges[mask, 1]]
        edges = edges[mask]
        # Split edges into separate edge lists
        edge_batch_idx = node_batch_index[edges[:, 0]]
        n_edges = np.bincount(edge_batch_idx)
        n_edges_cum = np.cumsum(n_edges[:-1])
        edge_lists = np.split(edges - n_nodes_cum[edge_batch_idx, None],
                              n_edges_cum)
        # Create sparse adjacency matrices
        a_list = [
            sp.csr_matrix(
                (np.ones_like(el[:, 0]), (el[:, 0], el[:, 1])),
                shape=(n_nodes[i], n_nodes[i]),
            ) for i, el in enumerate(edge_lists)
        ]

        # Node features
        x_list = []
        if "node_attributes" in available:
            x_attr = io.load_txt(fname_template.format("node_attributes"),
                                 delimiter=",")
            if x_attr.ndim == 1:
                x_attr = x_attr[:, None]
            x_list.append(x_attr)
        if "node_labels" in available:
            x_labs = io.load_txt(fname_template.format("node_labels"))
            if x_labs.ndim == 1:
                x_labs = x_labs[:, None]
            x_labs = np.concatenate(
                [_normalize(xl_[:, None], "ohe") for xl_ in x_labs.T], -1)
            x_list.append(x_labs)
        if len(x_list) > 0:
            x_list = np.concatenate(x_list, -1)
            x_list = np.split(x_list, n_nodes_cum[1:])
        else:
            print("WARNING: this dataset doesn't have node attributes."
                  "Consider creating manual features before using it with a "
                  "Loader.")
            x_list = [None] * len(n_nodes)

        # Edge features
        e_list = []
        if "edge_attributes" in available:
            e_attr = io.load_txt(fname_template.format("edge_attributes"))
            if e_attr.ndim == 1:
                e_attr = e_attr[:, None]
            e_attr = e_attr[mask]
            e_list.append(e_attr)
        if "edge_labels" in available:
            e_labs = io.load_txt(fname_template.format("edge_labels"))
            if e_labs.ndim == 1:
                e_labs = e_labs[:, None]
            e_labs = e_labs[mask]
            e_labs = np.concatenate(
                [_normalize(el_[:, None], "ohe") for el_ in e_labs.T], -1)
            e_list.append(e_labs)
        if len(e_list) > 0:
            e_list = np.concatenate(e_list, -1)
            e_list = np.split(e_list, n_edges_cum)
        else:
            e_list = [None] * len(n_nodes)

        # Labels
        if "graph_attributes" in available:
            labels = io.load_txt(fname_template.format("graph_attributes"))
        elif "graph_labels" in available:
            labels = io.load_txt(fname_template.format("graph_labels"))
            labels = _normalize(labels[:, None], "ohe")
        else:
            raise ValueError("No labels available for dataset {}".format(
                self.name))

        # Convert to Graph
        print("Successfully loaded {}.".format(self.name))
        return [
            Graph(x=x, a=a, e=e, y=y)
            for x, a, e, y in zip(x_list, a_list, e_list, labels)
        ]