示例#1
0
    def process(self):
        with open(os.path.join(self.raw_dir, 'ACM3025.pkl'), 'rb') as f:
            data = pickle.load(f)
        features = torch.from_numpy(
            data['feature'].todense()).float()  # (3025, 1870)
        labels = torch.from_numpy(
            data['label'].todense()).long().nonzero(as_tuple=True)[1]  # (3025)

        # Adjacency matrices for meta-path based neighbors
        # (Mufei): I verified both of them are binary adjacency matrices with self loops
        author_g = dgl.from_scipy(data['PAP'])
        subject_g = dgl.from_scipy(data['PLP'])
        self.gs = [author_g, subject_g]

        num_nodes = data['label'].shape[0]
        train_mask = generate_mask_tensor(
            idx2mask(data['train_idx'][0], num_nodes))
        val_mask = generate_mask_tensor(idx2mask(data['val_idx'][0],
                                                 num_nodes))
        test_mask = generate_mask_tensor(
            idx2mask(data['test_idx'][0], num_nodes))
        for g in self.gs:
            g.ndata['feat'] = features
            g.ndata['label'] = labels
            g.ndata['train_mask'] = train_mask
            g.ndata['val_mask'] = val_mask
            g.ndata['test_mask'] = test_mask
示例#2
0
def dgl_graph_from_vec(vec, graph_params):
    """
    Create graph from flatten vector as a thresholed weighted matrix with properties
    as type torch 
    """

    if graph_params.flatten:
        W = vec_to_sym(vec)
    else:
        W = vec
    # create graph
    # add signal on nodes
    u = getattr(feature_generation, graph_params.node_feat)(W)
    if graph_params.thr_type == 'pos':
        W[W < graph_params.threshold] = 0
    else:
        W[np.abs(W) < graph_params.threshold] = 0

    # convert to pytorch?
    W = sparse.csr_matrix(W).tocoo()
    edge_weight = torch.tensor(W.data).float()
    u = torch.from_numpy(u.astype(np.float32))

    g = dgl.from_scipy(W)
    g.ndata['feat'] = u
    g.edata['weight'] = edge_weight

    if graph_params.add_self_loop:
        g = dgl.add_self_loop(g)
        g.edata['weight'][-graph_params.n_nodes:] = 1
    return g
示例#3
0
def generate_rand_graph(n, is_hetero):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    if is_hetero:
        return dgl.from_scipy(arr)
    else:
        return DGLGraph(arr, readonly=True)
示例#4
0
def load_ppi_data(root):
    DataType = namedtuple('Dataset', ['num_classes', 'g'])
    adj_full = sp.load_npz(os.path.join(root, 'ppi', 'adj_full.npz'))
    G = dgl.from_scipy(adj_full)
    nodes_num = G.num_nodes()
    role = json.load(open(os.path.join(root, 'ppi', 'role.json'), 'r'))
    tr = list(role['tr'])
    te = list(role['te'])
    va = list(role['va'])
    mask = np.zeros((nodes_num, ), dtype=bool)
    train_mask = mask.copy()
    train_mask[tr] = True
    val_mask = mask.copy()
    val_mask[va] = True
    test_mask = mask.copy()
    test_mask[te] = True

    G.ndata['train_mask'] = torch.tensor(train_mask, dtype=torch.bool)
    G.ndata['val_mask'] = torch.tensor(val_mask, dtype=torch.bool)
    G.ndata['test_mask'] = torch.tensor(test_mask, dtype=torch.bool)

    feats = np.load(os.path.join(root, 'ppi', 'feats.npy'))
    G.ndata['feat'] = torch.tensor(feats, dtype=torch.float)

    class_map = json.load(
        open(os.path.join(root, 'ppi', 'class_map.json'), 'r'))
    labels = np.array([class_map[str(i)] for i in range(nodes_num)])
    G.ndata['label'] = torch.tensor(labels, dtype=torch.float)
    data = DataType(g=G, num_classes=labels.shape[1])
    return data
示例#5
0
    def _prepare(self):
        t0 = time.time()
        print("[I] Preparing Circular Skip Link Graphs v4 ...")
        for sample in self.adj_list:
            _g = dgl.from_scipy(sample)
            g = dgl.transform.remove_self_loop(_g)
            g.ndata['feat'] = torch.zeros(g.number_of_nodes()).long()
            #g.ndata['feat'] = torch.arange(0, g.number_of_nodes()).long() # v1
            #g.ndata['feat'] = torch.randperm(g.number_of_nodes()).long() # v3

            # adding edge features as generic requirement
            g.edata['feat'] = torch.zeros(g.number_of_edges()).long()
            #g.edata['feat'] = torch.arange(0, g.number_of_edges()).long() # v1
            #g.edata['feat'] = torch.ones(g.number_of_edges()).long() # v2

            # NOTE: come back here, to define edge features as distance between the indices of the edges
            ###################################################################
            # srcs, dsts = new_g.edges()
            # edge_feat = []
            # for edge in range(len(srcs)):
            #     a = srcs[edge].item()
            #     b = dsts[edge].item()
            #     edge_feat.append(abs(a-b))
            # g.edata['feat'] = torch.tensor(edge_feat, dtype=torch.int).long()
            ###################################################################

            self.graph_lists.append(g)
        self.num_node_type = self.graph_lists[0].ndata['feat'].size(0)
        self.num_edge_type = self.graph_lists[0].edata['feat'].size(0)
        print("[I] Finished preparation after {:.4f}s".format(time.time() -
                                                              t0))
示例#6
0
def test_topological_nodes(idtype, n=100):
    a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n))
    b = sp.tril(a, -1).tocoo()
    g = dgl.from_scipy(b).astype(idtype)

    layers_dgl = dgl.topological_nodes_generator(g)

    adjmat = g.adjacency_matrix(transpose=True)

    def tensor_topo_traverse():
        n = g.number_of_nodes()
        mask = F.copy_to(F.ones((n, 1)), F.cpu())
        degree = F.spmm(adjmat, mask)
        while F.reduce_sum(mask) != 0.:
            v = F.astype((degree == 0.), F.float32)
            v = v * mask
            mask = mask - v
            frontier = F.copy_to(F.nonzero_1d(F.squeeze(v, 1)), F.cpu())
            yield frontier
            degree -= F.spmm(adjmat, v)

    layers_spmv = list(tensor_topo_traverse())

    assert len(layers_dgl) == len(layers_spmv)
    assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv))
示例#7
0
def predict(adj, features):

    # os.environ["CUDA_VISIBLE_DEVICES"] = '0'
    dev = th.device('cpu')
    '''
    fg = open(sys.argv[1], 'rb')
    adj = pickle.load(fg)
    features = np.load(sys.argv[2])
    '''
    #graph = dgl.DGLGraph()
    adj = adj_preprocess(adj)
    #graph.from_scipy_sparse_matrix(adj)
    graph = dgl.from_scipy(adj)
    features = th.FloatTensor(features).to(dev)
    features[th.where(features < -1.0)[0]] = 0
    features[th.where(features > 1.0)[0]] = 0
    features = 2 * th.atan(features) / th.Tensor([np.pi]).to(dev)
    graph.ndata['features'] = features

    model = TAGCN(100, 128, 20, 3, activation=F.leaky_relu, dropout=0.0)
    model_states = th.load('speit/model.pkl', map_location=dev)
    model.load_state_dict(model_states)
    model = model.to(dev)
    model.eval()

    logits = model(graph, features)
    pred = logits.argmax(1)

    return pred.cpu().numpy()
示例#8
0
def load_acm(remove_self_loop):
    url = 'dataset/ACM3025.pkl'
    data_path = get_download_dir() + '/ACM3025.pkl'
    # download(_get_dgl_url(url), path=data_path)

    with open(
            data_path, 'rb'
    ) as f:  # 导入data数据。dict_keys(['label', 'feature', 'PAP', 'PLP', 'train_idx', 'val_idx', 'test_idx'])
        data = pickle.load(f)

    labels, features = torch.from_numpy(data['label'].todense()).long(), \
                       torch.from_numpy(data['feature'].todense()).float()
    num_classes = labels.shape[1]
    labels = labels.nonzero()[:, 1]  # 将label的one-hot转换成类别

    if remove_self_loop:
        num_nodes = data['label'].shape[0]
        data['PAP'] = sparse.csr_matrix(data['PAP'] - np.eye(num_nodes))
        data['PLP'] = sparse.csr_matrix(data['PLP'] - np.eye(num_nodes))

    # Adjacency matrices for meta path based neighbors
    # (Mufei): I verified both of them are binary adjacency matrices with self loops
    author_g = dgl.from_scipy(
        data['PAP'])  # 定义p-a-p的meta-path; # 建立dgl格式的graph
    subject_g = dgl.from_scipy(data['PLP'])  # 定义p-s-p的meta-path
    gs = [author_g, subject_g]  # 将两个meta-path形成的图组合在一起

    train_idx = torch.from_numpy(data['train_idx']).long().squeeze(0)
    val_idx = torch.from_numpy(data['val_idx']).long().squeeze(0)
    test_idx = torch.from_numpy(data['test_idx']).long().squeeze(0)

    num_nodes = author_g.number_of_nodes()  # 节点数量
    train_mask = get_binary_mask(num_nodes, train_idx)  # 对应位置上的节点设置为1,其余位置为0
    val_mask = get_binary_mask(num_nodes, val_idx)
    test_mask = get_binary_mask(num_nodes, test_idx)

    print('dataset loaded')
    pprint({
        'dataset': 'ACM',
        'train': train_mask.sum().item() / num_nodes,
        'val': val_mask.sum().item() / num_nodes,
        'test': test_mask.sum().item() / num_nodes
    })
    # Returns:
    # gs - PAP,PSP下的图; fetures - 节点特征; labels:labels; num_classes:label数量
    return gs, features, labels, num_classes, train_idx, val_idx, test_idx, \
           train_mask, val_mask, test_mask
示例#9
0
def load_acm(remove_self_loop):
    filename = 'ACM3025.pkl'
    url = 'dataset/' + filename
    data_path = get_download_dir() + '/' + filename
    if osp.exists(data_path):
        print(f'Using existing file {filename}', file=sys.stderr)
    else:
        download(_get_dgl_url(url), path=data_path)

    with open(data_path, 'rb') as f:
        data = pickle.load(f)

    labels, features = torch.from_numpy(data['label'].todense()).long(), \
                       torch.from_numpy(data['feature'].todense()).float()
    num_classes = labels.shape[1]
    labels = labels.nonzero()[:, 1]

    if remove_self_loop:
        num_nodes = data['label'].shape[0]
        data['PAP'] = sparse.csr_matrix(data['PAP'] - np.eye(num_nodes))
        data['PLP'] = sparse.csr_matrix(data['PLP'] - np.eye(num_nodes))

    # Adjacency matrices for meta path based neighbors
    # (Mufei): I verified both of them are binary adjacency matrices with self loops
    author_g = dgl.from_scipy(data['PAP'])
    subject_g = dgl.from_scipy(data['PLP'])
    gs = [author_g, subject_g]

    train_idx = torch.from_numpy(data['train_idx']).long().squeeze(0)
    val_idx = torch.from_numpy(data['val_idx']).long().squeeze(0)
    test_idx = torch.from_numpy(data['test_idx']).long().squeeze(0)

    num_nodes = author_g.number_of_nodes()
    train_mask = get_binary_mask(num_nodes, train_idx)
    val_mask = get_binary_mask(num_nodes, val_idx)
    test_mask = get_binary_mask(num_nodes, test_idx)

    print('dataset loaded')
    pprint({
        'dataset': 'ACM',
        'train': train_mask.sum().item() / num_nodes,
        'val': val_mask.sum().item() / num_nodes,
        'test': test_mask.sum().item() / num_nodes
    })

    return gs, features, labels, num_classes, train_idx, val_idx, test_idx, \
           train_mask, val_mask, test_mask
def track_time(size, scipy_format):
    matrix_dict = {
        "small":
        dgl.data.CiteseerGraphDataset(verbose=False)[0].adjacency_matrix(
            scipy_fmt=scipy_format),
        "large":
        utils.get_livejournal().adjacency_matrix(scipy_fmt=scipy_format)
    }

    # dry run
    dgl.from_scipy(matrix_dict[size])

    # timing
    with utils.Timer() as t:
        for i in range(3):
            dgl.from_scipy(matrix_dict[size])

    return t.elapsed_secs / 3
示例#11
0
 def diffuse(progress_g, weighted_adj, degree):
     device = progress_g.device
     progress_adj = progress_g.adj(scipy_fmt='coo')
     progress_adj.data = progress_g.edata['weight'].cpu().numpy()
     ret_adj = sparse.coo_matrix(
         progress_adj @ (weighted_adj / degree.cpu().numpy()))
     ret_graph = dgl.from_scipy(ret_adj, eweight_name='weight').to(device)
     ret_graph.edata['weight'] = ret_graph.edata['weight'].float().to(
         device)
     return ret_graph
示例#12
0
def mat2graph(adjacent_matrix, weighted=False, init_feat=None):
    g = from_scipy(sparse.csc_matrix(adjacent_matrix))
    g.ndata['in_degrees'] = sum(tensor(adjacent_matrix), 0)
    g.ndata['out_degrees'] = sum(tensor(adjacent_matrix), 1)
    if init_feat is not None:
        g.ndata['init_h'] = tensor(init_feat).float()
    if weighted:
        weight = adjacent_matrix.flatten()
        g.edata['w'] = tensor(weight[weight != 0]).float()
    return g
示例#13
0
文件: utils.py 项目: yifeim/dgl
def load_data(args, multilabel):
    if not os.path.exists('graphsaintdata') and not os.path.exists('data'):
        raise ValueError("The directory graphsaintdata does not exist!")
    elif os.path.exists('graphsaintdata') and not os.path.exists('data'):
        os.rename('graphsaintdata', 'data')
    prefix = "data/{}".format(args.dataset)
    DataType = namedtuple('Dataset', ['num_classes', 'train_nid', 'g'])

    adj_full = scipy.sparse.load_npz(
        './{}/adj_full.npz'.format(prefix)).astype(np.bool)
    g = dgl.from_scipy(adj_full)
    num_nodes = g.num_nodes()

    adj_train = scipy.sparse.load_npz(
        './{}/adj_train.npz'.format(prefix)).astype(np.bool)
    train_nid = np.array(list(set(adj_train.nonzero()[0])))

    role = json.load(open('./{}/role.json'.format(prefix)))
    mask = np.zeros((num_nodes, ), dtype=bool)
    train_mask = mask.copy()
    train_mask[role['tr']] = True
    val_mask = mask.copy()
    val_mask[role['va']] = True
    test_mask = mask.copy()
    test_mask[role['te']] = True

    feats = np.load('./{}/feats.npy'.format(prefix))
    scaler = StandardScaler()
    scaler.fit(feats[train_nid])
    feats = scaler.transform(feats)

    class_map = json.load(open('./{}/class_map.json'.format(prefix)))
    class_map = {int(k): v for k, v in class_map.items()}
    if multilabel:
        # Multi-label binary classification
        num_classes = len(list(class_map.values())[0])
        class_arr = np.zeros((num_nodes, num_classes))
        for k, v in class_map.items():
            class_arr[k] = v
    else:
        num_classes = max(class_map.values()) - min(class_map.values()) + 1
        class_arr = np.zeros((num_nodes, ))
        for k, v in class_map.items():
            class_arr[k] = v

    g.ndata['feat'] = torch.tensor(feats, dtype=torch.float)
    g.ndata['label'] = torch.tensor(
        class_arr, dtype=torch.float if multilabel else torch.long)
    g.ndata['train_mask'] = torch.tensor(train_mask, dtype=torch.bool)
    g.ndata['val_mask'] = torch.tensor(val_mask, dtype=torch.bool)
    g.ndata['test_mask'] = torch.tensor(test_mask, dtype=torch.bool)

    data = DataType(g=g, num_classes=num_classes, train_nid=train_nid)
    return data
示例#14
0
 def attach_graph(g, k):
     device = g.device
     out_graph_list = []
     in_graph_list = []
     wadj, ind, outd = DiffConv.get_weight_matrix(g)
     adj = sparse.coo_matrix(wadj / outd.cpu().numpy())
     outg = dgl.from_scipy(adj, eweight_name='weight').to(device)
     outg.edata['weight'] = outg.edata['weight'].float().to(device)
     out_graph_list.append(outg)
     for i in range(k - 1):
         out_graph_list.append(
             DiffConv.diffuse(out_graph_list[-1], wadj, outd))
     adj = sparse.coo_matrix(wadj.T / ind.cpu().numpy())
     ing = dgl.from_scipy(adj, eweight_name='weight').to(device)
     ing.edata['weight'] = ing.edata['weight'].float().to(device)
     in_graph_list.append(ing)
     for i in range(k - 1):
         in_graph_list.append(
             DiffConv.diffuse(in_graph_list[-1], wadj.T, ind))
     return out_graph_list, in_graph_list
示例#15
0
    def generate_g(self, estimated_adj):
        args = self.args
        if args.symmetric:
            adj = (estimated_adj + estimated_adj.t()) / 2
        else:
            adj = estimated_adj
        a = (adj.cpu() + torch.eye(adj.shape[0])).detach().cpu().numpy()
        b = sp.coo_matrix(a)
        g = dgl.from_scipy(b, 'weight').to(device)

        del a, b

        return g
示例#16
0
def test_rgcn(O):
    ctx = F.ctx()
    etype = []
    g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.1)).to(F.ctx())
    # 5 etypes
    R = 5
    for i in range(g.number_of_edges()):
        etype.append(i % 5)
    B = 2
    I = 10

    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B)
    rgc_basis.initialize(ctx=ctx)
    h = nd.random.randn(100, I, ctx=ctx)
    r = nd.array(etype, ctx=ctx)
    h_new = rgc_basis(g, h, r)
    assert list(h_new.shape) == [100, O]

    if O % B == 0:
        rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B)
        rgc_bdd.initialize(ctx=ctx)
        h = nd.random.randn(100, I, ctx=ctx)
        r = nd.array(etype, ctx=ctx)
        h_new = rgc_bdd(g, h, r)
        assert list(h_new.shape) == [100, O]

    # with norm
    norm = nd.zeros((g.number_of_edges(), 1), ctx=ctx)

    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B)
    rgc_basis.initialize(ctx=ctx)
    h = nd.random.randn(100, I, ctx=ctx)
    r = nd.array(etype, ctx=ctx)
    h_new = rgc_basis(g, h, r, norm)
    assert list(h_new.shape) == [100, O]

    if O % B == 0:
        rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B)
        rgc_bdd.initialize(ctx=ctx)
        h = nd.random.randn(100, I, ctx=ctx)
        r = nd.array(etype, ctx=ctx)
        h_new = rgc_bdd(g, h, r, norm)
        assert list(h_new.shape) == [100, O]

    # id input
    rgc_basis = nn.RelGraphConv(I, O, R, "basis", B)
    rgc_basis.initialize(ctx=ctx)
    h = nd.random.randint(0, I, (100,), ctx=ctx)
    r = nd.array(etype, ctx=ctx)
    h_new = rgc_basis(g, h, r)
    assert list(h_new.shape) == [100, O]
示例#17
0
    def create_graph(self, edges_src, edges_dst, num_nodes):
        """graph = dgl.graph((edges_src, edges_dst), num_nodes=num_nodes)
        graph = dgl.remove_self_loop(graph)

        graph = dgl.add_reverse_edges(graph)
        graph = dgl.add_self_loop(graph)"""
        adj = sp.coo_matrix((np.ones(num_nodes), (edges_src, edges_dst)),
                            shape=(num_nodes, num_nodes),
                            dtype=np.float32)
        # build symmetric adjacency matrix
        adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
        adj = normalize(adj + sp.eye(adj.shape[0]))
        graph = dgl.from_scipy(adj, eweight_name='w')
        return graph
示例#18
0
def astensor(x, *, dtype=None, device=None, escape=None):

    try:
        if x is None or (escape is not None and isinstance(x, escape)):
            return x
    except TypeError:
        raise TypeError(f"argument 'escape' must be a type or tuple of types.")

    if dtype is None:
        dtype = gf.infer_type(x)

    if isinstance(dtype, (np.dtype, str)):
        dtype = data_type_dict().get(str(dtype), dtype)
    elif not isinstance(dtype, torch.dtype):
        raise TypeError(
            f"argument 'dtype' must be torch.dtype, np.dtype or str, but got {type(dtype)}."
        )

    if is_tensor(x):
        tensor = x.to(dtype)
    elif gf.is_tensor(x, backend='tensorflow'):
        return astensor(gf.tensoras(x),
                        dtype=dtype,
                        device=device,
                        escape=escape)
    elif sp.isspmatrix(x):
        if gg.backend() == "dgl_torch":
            import dgl
            tensor = dgl.from_scipy(x, idtype=getattr(torch, gg.intx()))
        elif gg.backend() == "pyg":
            edge_index, edge_weight = gf.sparse_adj_to_edge(x)
            return (astensor(edge_index,
                             dtype=gg.intx(),
                             device=device,
                             escape=escape),
                    astensor(edge_weight,
                             dtype=gg.floatx(),
                             device=device,
                             escape=escape))
        else:
            tensor = sparse_adj_to_sparse_tensor(x, dtype=dtype)
    elif any((isinstance(x, (np.ndarray, np.matrix)), gg.is_listlike(x),
              gg.is_scalar(x))):
        tensor = torch.tensor(x, dtype=dtype, device=device)
    else:
        raise TypeError(
            f"Invalid type of inputs. Allowed data type (Tensor, SparseTensor, Numpy array, Scipy sparse tensor, None), but got {type(x)}."
        )
    return tensor.to(device)
示例#19
0
def update_graph(model, optimizer, features, adj, rew_states, loss, args,
                 envs):
    if adj.shape[0] > 1:
        labels = torch.zeros((len(features)))
        idx_train = torch.LongTensor([0])
        for r_s in rew_states:
            if len(envs.observation_space.shape) == 1:  #MuJoCo experiments
                labels[r_s[0]] = torch.sigmoid(2 * r_s[1])
            else:
                labels[r_s[0]] = torch.tensor(
                    [1.]) if r_s[1] > 0. else torch.tensor([0.])
            idx_train = torch.cat((idx_train, torch.LongTensor([r_s[0]])), 0)
        labels = labels.type(torch.LongTensor)
    else:
        labels = torch.zeros((len(features))).type(torch.LongTensor)
        idx_train = torch.LongTensor([0])

    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    deg = np.diag(adj.toarray().sum(axis=1))
    laplacian = torch.from_numpy((deg - adj.toarray()).astype(np.float32))
    adj = sp.csr_matrix(adj) + sp.eye(adj.shape[0])
    g = dgl.from_scipy(adj)

    if args.cuda and torch.cuda.is_available():
        model.cuda()
        features = features.cuda()
        laplacian = laplacian.cuda()
        labels = labels.cuda()
        idx_train = idx_train.cuda()
        g = g.to('cuda')

    t_total = time.time()
    for epoch in range(args.gcn_epochs):
        t = time.time()
        model.train()
        optimizer.zero_grad()
        output = model(features, g)
        loss_train = F.nll_loss(output[idx_train], labels[idx_train])
        soft_out = torch.unsqueeze(
            torch.nn.functional.softmax(output, dim=1)[:, 1], 1)
        loss_reg = torch.mm(torch.mm(soft_out.T, laplacian), soft_out)
        loss_train += args.gcn_lambda * loss_reg.squeeze()
        loss_train.backward()
        optimizer.step()
示例#20
0
def test_dense_cheb_conv():
    for k in range(1, 4):
        ctx = F.ctx()
        g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx())
        adj = g.adjacency_matrix(ctx=ctx).tostype('default')
        cheb = nn.ChebConv(5, 2, k)
        dense_cheb = nn.DenseChebConv(5, 2, k)
        cheb.initialize(ctx=ctx)
        dense_cheb.initialize(ctx=ctx)

        for i in range(len(cheb.fc)):
            dense_cheb.fc[i].weight.set_data(cheb.fc[i].weight.data())
            if cheb.bias is not None:
                dense_cheb.bias.set_data(cheb.bias.data())

        feat = F.randn((100, 5))
        out_cheb = cheb(g, feat, [2.0])
        out_dense_cheb = dense_cheb(adj, feat, 2.0)
        assert F.allclose(out_cheb, out_dense_cheb)
示例#21
0
def astensor(x, *, dtype=None, device=None, escape=None):

    try:
        if x is None or (escape is not None and isinstance(x, escape)):
            return x
    except TypeError:
        raise TypeError(f"argument 'escape' must be a type or tuple of types.")
    if dtype is None:
        dtype = gf.infer_type(x)
    elif isinstance(dtype, tf.dtypes.DType):
        dtype = dtype.name
    elif isinstance(dtype, (np.dtype, str)):
        dtype = str(dtype)
    else:
        raise TypeError(
            f"argument 'dtype' must be tf.dtypes.DType, np.dtype or str, but got {type(dtype)}."
        )

    with tf.device(device):
        if is_tensor(x):
            if x.dtype != dtype:
                return tf.cast(x, dtype=dtype)
            return tf.identity(x)
        elif gf.is_tensor(x, backend='torch'):
            return astensor(gf.tensoras(x),
                            dtype=dtype,
                            device=device,
                            escape=escape)
        elif sp.isspmatrix(x):
            if gg.backend() == "dgl_tf":
                import dgl
                return dgl.from_scipy(x, idtype=getattr(tf,
                                                        gg.intx())).to(device)
            else:
                return sparse_adj_to_sparse_tensor(x, dtype=dtype)
        elif any((isinstance(x, (np.ndarray, np.matrix)), gg.is_listlike(x),
                  gg.is_scalar(x))):
            return tf.convert_to_tensor(x, dtype=dtype)
        else:
            raise TypeError(
                f"Invalid type of inputs. Allowed data type(Tensor, SparseTensor, Numpy array, Scipy sparse matrix, None), but got {type(x)}."
            )
示例#22
0
def test_bfs(idtype, n=100):
    def _bfs_nx(g_nx, src):
        edges = nx.bfs_edges(g_nx, src)
        layers_nx = [set([src])]
        edges_nx = []
        frontier = set()
        edge_frontier = set()
        for u, v in edges:
            if u in layers_nx[-1]:
                frontier.add(v)
                edge_frontier.add(g.edge_ids(int(u), int(v)))
            else:
                layers_nx.append(frontier)
                edges_nx.append(edge_frontier)
                frontier = set([v])
                edge_frontier = set([g.edge_ids(u, v)])
        # avoids empty successors
        if len(frontier) > 0 and len(edge_frontier) > 0:
            layers_nx.append(frontier)
            edges_nx.append(edge_frontier)
        return layers_nx, edges_nx

    a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n))
    g = dgl.from_scipy(a).astype(idtype)

    g_nx = g.to_networkx()
    src = random.choice(range(n))
    layers_nx, _ = _bfs_nx(g_nx, src)
    layers_dgl = dgl.bfs_nodes_generator(g, src)
    assert len(layers_dgl) == len(layers_nx)
    assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx))

    g_nx = nx.random_tree(n, seed=42)
    g = dgl.from_networkx(g_nx).astype(idtype)
    src = 0
    _, edges_nx = _bfs_nx(g_nx, src)
    edges_dgl = dgl.bfs_edges_generator(g, src)
    assert len(edges_dgl) == len(edges_nx)
    assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
示例#23
0
文件: train.py 项目: yuk12/dgl
def web_main():
    adj, features = load_data(args.dataset)

    features = sparse_to_tuple(features.tocoo())

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # # Create model
    # graph = dgl.from_scipy(adj)
    # graph.add_self_loop()

    # Some preprocessing
    adj_normalization, adj_norm = preprocess_graph(adj)

    # Create model
    graph = dgl.from_scipy(adj_normalization)
    graph.add_self_loop()

    # Create Model
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T),
                                        torch.FloatTensor(adj_norm[1]),
                                        torch.Size(adj_norm[2]))
    adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T),
                                         torch.FloatTensor(adj_label[1]),
                                         torch.Size(adj_label[2]))
    features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T),
                                        torch.FloatTensor(features[1]),
                                        torch.Size(features[2]))

    weight_mask = adj_label.to_dense().view(-1) == 1
    weight_tensor = torch.ones(weight_mask.size(0))
    weight_tensor[weight_mask] = pos_weight

    features = features.to_dense()
    in_dim = features.shape[-1]

    vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2)
    # create training component
    optimizer = torch.optim.Adam(vgae_model.parameters(),
                                 lr=args.learning_rate)
    print('Total Parameters:',
          sum([p.nelement() for p in vgae_model.parameters()]))

    def get_scores(edges_pos, edges_neg, adj_rec):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        # Predict on test set of edges
        preds = []
        pos = []
        for e in edges_pos:
            # print(e)
            # print(adj_rec[e[0], e[1]])
            preds.append(sigmoid(adj_rec[e[0], e[1]].item()))
            pos.append(adj_orig[e[0], e[1]])

        preds_neg = []
        neg = []
        for e in edges_neg:
            preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data))
            neg.append(adj_orig[e[0], e[1]])

        preds_all = np.hstack([preds, preds_neg])
        labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
        roc_score = roc_auc_score(labels_all, preds_all)
        ap_score = average_precision_score(labels_all, preds_all)

        return roc_score, ap_score

    def get_acc(adj_rec, adj_label):
        labels_all = adj_label.to_dense().view(-1).long()
        preds_all = (adj_rec > 0.5).view(-1).long()
        accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0)
        return accuracy

    # create training epoch
    for epoch in range(args.epochs):
        t = time.time()

        # Training and validation using a full graph
        vgae_model.train()

        logits = vgae_model.forward(graph, features)

        # compute loss
        loss = norm * F.binary_cross_entropy(logits.view(-1),
                                             adj_label.to_dense().view(-1),
                                             weight=weight_tensor)
        kl_divergence = 0.5 / logits.size(0) * (
            1 + 2 * vgae_model.log_std - vgae_model.mean**2 -
            torch.exp(vgae_model.log_std)**2).sum(1).mean()
        loss -= kl_divergence

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc = get_acc(logits, adj_label)

        val_roc, val_ap = get_scores(val_edges, val_edges_false, logits)

        # Print out performance
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(loss.item()), "train_acc=",
              "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc),
              "val_ap=", "{:.5f}".format(val_ap), "time=",
              "{:.5f}".format(time.time() - t))

    test_roc, test_ap = get_scores(test_edges, test_edges_false, logits)
    print("End of training!", "test_roc=", "{:.5f}".format(test_roc),
          "test_ap=", "{:.5f}".format(test_ap))
示例#24
0
print(pa_g.number_of_edges('written-by'))
print(pa_g.successors(
    1, etype='written-by'))  # get the authors that write paper #1

# Type name argument could be omitted whenever the behavior is unambiguous.
print(pa_g.number_of_edges()
      )  # Only one edge type, the edge type argument could be omitted

###############################################################################
# A homogeneous graph is just a special case of a heterograph with only one type
# of node and edge.

# Paper-citing-paper graph is a homogeneous graph
pp_g = dgl.heterograph({('paper', 'citing', 'paper'): data['PvsP'].nonzero()})
# equivalent (shorter) API for creating homogeneous graph
pp_g = dgl.from_scipy(data['PvsP'])

# All the ntype and etype arguments could be omitted because the behavior is unambiguous.
print(pp_g.number_of_nodes())
print(pp_g.number_of_edges())
print(pp_g.successors(3))

###############################################################################
# Create a subset of the ACM graph using the paper-author, paper-paper,
# and paper-subject relationships.  Meanwhile, also add the reverse
# relationship to prepare for the later sections.

G = dgl.heterograph({
    ('paper', 'written-by', 'author'):
    data['PvsA'].nonzero(),
    ('author', 'writing', 'paper'):
示例#25
0
def create_random_graph(n):
    arr = (spsp.random(n, n, density=0.001, format='coo', random_state=100) !=
           0).astype(np.int64)
    return dgl.from_scipy(arr)
示例#26
0
attention_mask = th.cat([
    attention_mask[:-nb_test],
    th.zeros((nb_word, max_length), dtype=th.long), attention_mask[-nb_test:]
])

# transform one-hot label to class ID for pytorch computation
y = y_train + y_test + y_val
y_train = y_train.argmax(axis=1)
y = y.argmax(axis=1)

# document mask used for update feature
doc_mask = train_mask + val_mask + test_mask

# build DGL Graph
adj_norm = normalize_adj(adj + sp.eye(adj.shape[0]))
g = dgl.from_scipy(adj_norm.astype('float32'), eweight_name='edge_weight')
g.ndata['input_ids'], g.ndata['attention_mask'] = input_ids, attention_mask
g.ndata['label'], g.ndata['train'], g.ndata['val'], g.ndata['test'] = \
    th.LongTensor(y), th.FloatTensor(train_mask), th.FloatTensor(val_mask), th.FloatTensor(test_mask)
g.ndata['label_train'] = th.LongTensor(y_train)
g.ndata['cls_feats'] = th.zeros((nb_node, model.feat_dim))

logger.info('graph information:')
logger.info(str(g))

# create index loader
train_idx = Data.TensorDataset(th.arange(0, nb_train, dtype=th.long))
val_idx = Data.TensorDataset(
    th.arange(nb_train, nb_train + nb_val, dtype=th.long))
test_idx = Data.TensorDataset(
    th.arange(nb_node - nb_test, nb_node, dtype=th.long))
示例#27
0
def get_graph_dgl(device=None):
    adj = get_scipy_adj()
    G = dgl.from_scipy(adj, device=device)
    return G
示例#28
0
def create_dgl_graphs(dir):
    edge_data_path = dir + "scipy_graphs/"

    node_data_by_month = [
        pd.read_csv(dir + "surge_2019-0" + str(i) + ".csv")
        for i in range(1, 7)
    ]
    for df in node_data_by_month:
        df["interval_datetime"] = pd.to_datetime(df["interval_datetime"],
                                                 format='%Y-%m-%d %H:%M:%S',
                                                 errors='ignore')

    dgl_graphs = []
    i = 0
    for graph_file in os.listdir(edge_data_path):
        sparse_adj = sparse.load_npz(edge_data_path + graph_file)
        weights = th.tensor(list(sparse_adj.data), dtype=th.int32)

        month_num, interval_num = graph_file.split("-")
        month_num = int(month_num.split("_")[-1])
        interval_num = int(interval_num.split(".")[0]) - 1

        cur_interval_start = datetime.datetime(2019, month_num, 1, 0, 00,
                                               0) + datetime.timedelta(
                                                   0, 10 * 60 * interval_num)
        node_DF = node_data_by_month[month_num - 1][node_data_by_month[
            month_num - 1]["interval_datetime"] == cur_interval_start]

        label_DF = node_data_by_month[month_num - 1][
            node_data_by_month[month_num -
                               1]["interval_datetime"] == cur_interval_start +
            datetime.timedelta(0, 10 * 60)]
        node_labels = th.from_numpy(
            label_DF[label_DF.columns[9:]].values.astype(int).T)

        node_base_features = node_DF[[
            'is_holiday', "PU_time_2AM", "PU_time_6AM", "PU_time_10AM",
            "PU_time_2PM", "PU_time_6PM", "PU_time_10PM"
        ]].values.astype(int)[0]
        node_surge_features = node_DF[node_DF.columns[9:]].values.astype(int)

        node_base_features = np.array(
            [node_base_features for i in range(node_surge_features.size)])
        node_features = th.from_numpy(
            np.vstack([node_surge_features, node_base_features.T]).T)

        g = dgl.from_scipy(sparse_adj)
        g.edata['feature'] = weights
        g.ndata['feature'] = node_features
        g.ndata['label'] = node_labels

        train_mask = np.random.randint(0, 10, size=len(node_labels))
        test_mask = np.where(train_mask == -1, 1, 0).astype(bool)
        val_mask = np.where(train_mask == -1, 1, 0).astype(bool)
        train_mask = np.where(train_mask > 0, 1, 0).astype(bool)

        g.ndata['train_mask'] = th.from_numpy(train_mask)
        g.ndata['test_mask'] = th.from_numpy(test_mask)
        g.ndata['val_mask'] = th.from_numpy(val_mask)
        g.add_edges(g.nodes(), g.nodes())

        dgl_graphs.append(g)

        i += 1
        if i > 100:
            break

    np.random.shuffle(dgl_graphs)
    return dgl_graphs
示例#29
0
parser.add_argument('--savemodelpath', type=str, default='stgcnwavemodel.pt', help='save model path')
parser.add_argument('--pred_len', type=int, default=5, help='how many steps away we want to predict')
parser.add_argument('--control_str', type=str, default='TNTSTNTST', help='model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer')
parser.add_argument('--channels', type=int, nargs='+', default=[1, 16, 32, 64, 32, 128], help='model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer')
args = parser.parse_args()

device = torch.device("cuda") if torch.cuda.is_available() and not args.disablecuda else torch.device("cpu")

with open(args.sensorsfilepath) as f:
    sensor_ids = f.read().strip().split(',')

distance_df = pd.read_csv(args.disfilepath, dtype={'from': 'str', 'to': 'str'})

adj_mx = get_adjacency_matrix(distance_df, sensor_ids)
sp_mx = sp.coo_matrix(adj_mx)
G = dgl.from_scipy(sp_mx)


df = pd.read_hdf(args.tsfilepath)
num_samples, num_nodes = df.shape

tsdata = df.to_numpy()


n_his = args.window

save_path = args.savemodelpath



n_pred = args.pred_len
import dgl
from mxnet import nd
import scipy.sparse as sp

spmat = sp.rand(4, 4, format='csr',
                density=0.5)  # 50% nonzero entries  一半的边是存在的
# dgl 必须接受方形矩阵 但是 scipy可以生成矩形矩阵
print(dgl.from_scipy(spmat), '\n matric of spmat: \n', spmat)

from scipy.sparse import rand
matrix = rand(3, 4, density=0.25, format="csr", random_state=42)  # 生成一个矩形矩阵
print(matrix.todense())  # todense  生成稀疏矩阵的密集表示

import networkx as nx
nx_g = nx.path_graph(5)  # 生成一个单链  0-1-2-3-4
print(dgl.from_networkx(nx_g))  # 这里有八条边 因为Networkx生成的是无向图

nxg = nx.DiGraph([(2, 1), (1, 2), (2, 3),
                  (0, 0)])  # 使用networkx中的 DiGraph方法可以避免上面的问题
print(dgl.from_networkx(nxg))  # 在github收藏了很多示例代码