Exemplo n.º 1
0
    def get_train_data(self):
        g1 = dgl.from_networkx(self.g1)
        g2 = dgl.from_networkx(self.g2)

        feat1 = torch.from_numpy(self.node_feat1).float()
        feat2 = torch.from_numpy(self.node_feat2).float()

        # 对于无监督对齐场景,先使用随机初始化的GCN找到对齐的seed
        h1 = self.model.GCNLayer(g1, feat1)
        h2 = self.model.GCNLayer(g2, feat2)
        h1 = h1.cpu().detach().numpy()
        h2 = h2.cpu().detach().numpy()

        h1 = preprocessing.normalize(h1, norm='l2')
        h2 = preprocessing.normalize(h2, norm='l2')

        rough_similarity = cosine_similarity(h1, h2) * cosine_similarity(
            self.node_feat1, self.node_feat2)
        # 挑选出前K大
        candidates = np.argpartition(-rough_similarity,
                                     kth=self.config.top_candidates,
                                     axis=1)
        candidates = candidates[:, :self.config.top_candidates]
        # train_indices
        train_indices = defaultdict(list)
        for i in range(candidates.shape[0]):
            train_indices[i] = candidates[i, :].tolist()
        return train_indices, g1, g2, feat1, feat2
Exemplo n.º 2
0
def test_sequential():
    ctx = F.ctx()

    # test single graph
    class ExampleLayer(gluon.nn.Block):
        def __init__(self, **kwargs):
            super().__init__(**kwargs)

        def forward(self, graph, n_feat, e_feat):
            graph = graph.local_var()
            graph.ndata['h'] = n_feat
            graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
            n_feat += graph.ndata['h']
            graph.apply_edges(fn.u_add_v('h', 'h', 'e'))
            e_feat += graph.edata['e']
            return n_feat, e_feat

    g = dgl.graph(([], [])).to(F.ctx())
    g.add_nodes(3)
    g.add_edges([0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2])
    net = nn.Sequential()
    net.add(ExampleLayer())
    net.add(ExampleLayer())
    net.add(ExampleLayer())
    net.initialize(ctx=ctx)
    n_feat = F.randn((3, 4))
    e_feat = F.randn((9, 4))
    n_feat, e_feat = net(g, n_feat, e_feat)
    assert n_feat.shape == (3, 4)
    assert e_feat.shape == (9, 4)

    # test multiple graphs
    class ExampleLayer(gluon.nn.Block):
        def __init__(self, **kwargs):
            super().__init__(**kwargs)

        def forward(self, graph, n_feat):
            graph = graph.local_var()
            graph.ndata['h'] = n_feat
            graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
            n_feat += graph.ndata['h']
            return n_feat.reshape(graph.number_of_nodes() // 2, 2, -1).sum(1)

    g1 = dgl.from_networkx(nx.erdos_renyi_graph(32, 0.05)).to(F.ctx())
    g2 = dgl.from_networkx(nx.erdos_renyi_graph(16, 0.2)).to(F.ctx())
    g3 = dgl.from_networkx(nx.erdos_renyi_graph(8, 0.8)).to(F.ctx())

    net = nn.Sequential()
    net.add(ExampleLayer())
    net.add(ExampleLayer())
    net.add(ExampleLayer())
    net.initialize(ctx=ctx)
    n_feat = F.randn((32, 4))
    n_feat = net([g1, g2, g3], n_feat)
    assert n_feat.shape == (4, 4)
Exemplo n.º 3
0
def batch_graphs(data_root, data_list, windowing=False):
    data_files = [
        line.rstrip() for line in open(os.path.join(data_root, data_list))
    ]

    all_graphs = []
    all_labels = []
    all_features = []

    for file in data_files:
        # Convert the gpickle file to a dgl graph for batching
        #dgl_g = convert_gpickle_to_dgl_graph(file)
        nxg = nx.read_gpickle(os.path.join(data_root, file))

        if windowing:
            nxg_list = sliding_window.perform_windowing(nxg)
            for nxg in nxg_list:
                # Get the annotated labels
                labels = get_labels(nxg)
                # Get the feature from the file
                features = chris_get_features(nxg)

                dgl_g = dgl.from_networkx(nxg)

                # Append the information for batching
                all_graphs.append(dgl_g)
                all_labels.append(labels)
                all_features.append(features)
        else:
            # Get the annotated labels
            labels = get_labels(nxg)
            # Get the feature from the file
            features = chris_get_features(nxg)

            dgl_g = dgl.from_networkx(nxg)

            # Append the information for batching
            all_graphs.append(dgl_g)
            all_labels.append(labels)
            all_features.append(features)

    # Batch the graphs
    batched_graph = dgl.batch(all_graphs)

    # all_labels is a list of tensors, so concetenate into one tensor
    conc_labels = torch.LongTensor(batched_graph.number_of_nodes(), 1)
    torch.cat(all_labels, out=conc_labels)

    # all_features is a list of tensors, so concetenate into one tensor
    conc_features = torch.Tensor(batched_graph.number_of_nodes(), 1)
    torch.cat(all_features, out=conc_features)

    return batched_graph, conc_labels, conc_features
Exemplo n.º 4
0
def test_simple_pool():
    g = dgl.from_networkx(nx.path_graph(15)).to(F.ctx())

    sum_pool = nn.SumPooling()
    avg_pool = nn.AvgPooling()
    max_pool = nn.MaxPooling()
    sort_pool = nn.SortPooling(10)  # k = 10
    print(sum_pool, avg_pool, max_pool, sort_pool)

    # test#1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    h1 = sum_pool(g, h0)
    check_close(F.squeeze(h1, 0), F.sum(h0, 0))
    h1 = avg_pool(g, h0)
    check_close(F.squeeze(h1, 0), F.mean(h0, 0))
    h1 = max_pool(g, h0)
    check_close(F.squeeze(h1, 0), F.max(h0, 0))
    h1 = sort_pool(g, h0)
    assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.ndim == 2

    # test#2: batched graph
    g_ = dgl.from_networkx(nx.path_graph(5)).to(F.ctx())
    bg = dgl.batch([g, g_, g, g_, g])
    h0 = F.randn((bg.number_of_nodes(), 5))
    h1 = sum_pool(bg, h0)
    truth = mx.nd.stack(F.sum(h0[:15], 0),
                        F.sum(h0[15:20], 0),
                        F.sum(h0[20:35], 0),
                        F.sum(h0[35:40], 0),
                        F.sum(h0[40:55], 0),
                        axis=0)
    check_close(h1, truth)

    h1 = avg_pool(bg, h0)
    truth = mx.nd.stack(F.mean(h0[:15], 0),
                        F.mean(h0[15:20], 0),
                        F.mean(h0[20:35], 0),
                        F.mean(h0[35:40], 0),
                        F.mean(h0[40:55], 0),
                        axis=0)
    check_close(h1, truth)

    h1 = max_pool(bg, h0)
    truth = mx.nd.stack(F.max(h0[:15], 0),
                        F.max(h0[15:20], 0),
                        F.max(h0[20:35], 0),
                        F.max(h0[35:40], 0),
                        F.max(h0[40:55], 0),
                        axis=0)
    check_close(h1, truth)

    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2
def preprocessing(data, emb_file, seed, trans):
    num_graphs = len(data[0])
    nx_graphs = [data[0][i].g for i in range(num_graphs)]
    dgl_graphs = [dgl.from_networkx(graph) for graph in nx_graphs]
    batch_graphs = dgl.batch(dgl_graphs)
    num_nodes = len(batch_graphs.nodes())
    graph_size = [len(g.nodes()) for g in nx_graphs]
    
    emb = np.loadtxt(emb_file)
    if trans:
        emb = np.dot(emb,DCT(num_nodes).T)
    G = batch_graphs.to_networkx()
    Sub = {}
    for i in range(num_graphs):
        if i == 0:
            node_start = 0
        else:
            node_start = sum(graph_size[:i-1]) 
        node_end = sum(graph_size[:i])
        nbunch = [node for node in range(node_start, node_end)]
        subgraph = nx.subgraph(G, nbunch)
        Sub[data[0][i]] = np.dot(emb, encode(G,subgraph))
    
    idx_list = separate_data(data[0],seed = seed)
    return Sub, idx_list, data[0]
Exemplo n.º 6
0
def edge_list_to_graph(args):
    global label
    global acc_cnts
    global graphs
    global labels
    global cnts
    global num_class
    i = args[0]
    f = str(args[1])
    G = args[2]
    L = args[3]
    print(f'Preprocessing file: {f}')
    with open(f, "rb") as file:
        edges = nx.read_edgelist(file, create_using=nx.Graph, nodetype=int)

        # for labeling class of malware
        class_ = 0
        found = False
        for idx, cond in enumerate(acc_cnts):
            if i < cond:
                class_ = idx + 1
                found = True
                break
        if found == False:
            class_ = len(acc_cnts) + 1

        hetero_graph = dgl.from_networkx(edges)

        L.append(class_)
        G.append(hetero_graph)
Exemplo n.º 7
0
def group_labels_features(data_root, data_list, windowing=False):
    #data_path = 'data/'
    data_files = [
        line.rstrip() for line in open(os.path.join(data_root, data_list))
    ]

    # Initialize empty list
    dataset = []

    print("loading {} files".format(len(data_files)))
    for idx, file in enumerate(data_files):
        graph = []
        nxg = nx.read_gpickle(os.path.join(data_root, file))

        # Get the annotated labels
        labels = get_labels(nxg)
        # Get the feature from the file
        features = chris_get_features(nxg)

        dgl_g = dgl.from_networkx(nxg)

        # Append the information for batching
        graph.append(dgl_g)
        graph.append(labels)
        graph.append(features)
        dataset.append(graph)

    return dataset
Exemplo n.º 8
0
def convert_nx_to_dgl(G: Graph) -> DGLHeteroGraph:
    """
    Convert NetworkX graph import DGL graph
    """
    return from_networkx(nx_graph=G,
                         node_attrs=["nfeat"],
                         edge_attrs=["efeat", "label"])
Exemplo n.º 9
0
def test_tagconv(out_dim):
    g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx())
    ctx = F.ctx()
    adj = g.adjacency_matrix(transpose=True, ctx=ctx)
    norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5)

    conv = nn.TAGConv(5, out_dim, bias=True)
    conv.initialize(ctx=ctx)
    print(conv)

    # test#1: basic
    h0 = F.ones((3, 5))
    h1 = conv(g, h0)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    shp = norm.shape + (1,) * (h0.ndim - 1)
    norm = norm.reshape(shp).as_in_context(h0.context)

    assert F.allclose(h1, _S2AXWb(adj, norm, h0, conv.lin.data(ctx), conv.h_bias.data(ctx)))

    conv = nn.TAGConv(5, out_dim)
    conv.initialize(ctx=ctx)

    # test#2: basic
    h0 = F.ones((3, 5))
    h1 = conv(g, h0)
    assert h1.shape[-1] == out_dim
Exemplo n.º 10
0
    def graph_update(self, number):
        """
        :param Y: [batch_size, frames, num_nodes, num_nodes] the processed Node admittance matrix
        :param infos: [batch_size, in_channels, frames, num_nodes] the features of each node
        :param weights: [features, num_nodes, num_nodes] learnable weigths for message passing, using nn.Embedding()
        :return: graph
        """
        # todo frames的判断应该根据frame_0来
        batches = number // self.frames
        frames = number % self.frames
        Y_number = 0
        if frames >= 1:
            Y_number = 1
            if frames > 11:
                Y_number = 2
        Y_need = self.Y[batches, Y_number, :, :].cpu().numpy()
        nx_graph = nx.from_numpy_matrix(Y_need)
        graph = dgl.from_networkx(nx_graph).to(torch.device('cuda:0'))
        # add features to all the nodes
        graph.ndata['feats'] = (self.infos[batches, :, frames, :]).T
        # graph.edata['weights'] = (self.Y[batches, Y_number, :, :] *
        #                                       self.weights).permute((1, 2, 0))\
        #     .reshape((self.num_nodes * self.num_nodes, self.c_in))

        self.graph_list[number] = graph
        return graph
Exemplo n.º 11
0
    def __getitem__(self, idx):
        g_path = os.path.join(self.path, self.all_graphs[idx])
        try:
            graph = read_graph(g_path)
        except Exception as e:
            print(e)
            print("ERROR could not read graph file:\n", g_path)

        # graph = nx.to_undirected(graph)
        # graph = nx.Graph(graph)
        one_hot = {}
        for edge, label in (nx.get_edge_attributes(graph, 'LW')).items():
            if '.' in label:
                graph.remove_edge(edge[0], edge[1])
                continue
            try:
                one_hot[edge] = torch.tensor(self.edge_map[label.upper()])
            except KeyError as e:
                # print('ERROR: unrecognized edge label:')
                # print(e)
                graph.remove_edge(edge[0], edge[1])

        interface = get_labels(graph, interaction=self.interaction,
                mode=self.use_mode)
        nx.set_node_attributes(graph, name='interface', values = interface)
        nx.set_edge_attributes(graph, name='one_hot', values=one_hot)

        g_dgl = dgl.from_networkx(nx_graph=graph, edge_attrs=['one_hot'], node_attrs=['interface'])


        return g_dgl, [idx]
Exemplo n.º 12
0
def parsed_tree_to_dgl_tree(parsed_tree, vocab):
    PAD_WORD = -1
    g = nx.DiGraph()

    def _rec_build(u):
        if len(u.child) == 1:
            return _rec_build(u.child[0])
        elif len(u.child) > 1:
            assert len(u.child) == 2
            nid = g.number_of_nodes()
            g.add_node(nid, x=PAD_WORD, y=0)
            left = _rec_build(u.child[0])
            right = _rec_build(u.child[1])
            g.add_edge(left, nid)
            g.add_edge(right, nid)
            return nid
        else:
            cid = g.number_of_nodes()
            word = vocab.get(u.value, PAD_WORD)
            g.add_node(cid, x=word, y=0)
            return cid

    # add root
    root = _rec_build(parsed_tree)
    g.add_node(root, x=PAD_WORD)
    return dgl.from_networkx(g, node_attrs=['x', 'y'])
Exemplo n.º 13
0
 def networkx_to_torch(self, networkx_graph):
     import dgl
     # graph = dgl.DGLGraph()
     graph = dgl.from_networkx(networkx_graph)
     graph = dgl.remove_self_loop(graph)
     graph = dgl.add_self_loop(graph)
     graph = graph.to(self.device)
     return graph
Exemplo n.º 14
0
def make_full_graph(g):
    """
        Converting the given graph to fully connected
    """

    full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))
    full_g.ndata['feat'] = g.ndata['feat']
    full_g.edata['feat'] = torch.zeros(full_g.number_of_edges())
    return full_g
Exemplo n.º 15
0
def load_dgl():
    global g
    g_x = load_graph()
    g = dgl.from_networkx(g_x,
                          node_attrs=['vector', 'node_order'],
                          edge_attrs=None)
    #     g = dgl.DGLGraph()
    #     g.from_networkx(g_x,node_attrs=['tipo','vector','node_order'], edge_attrs=None)
    print("Meta-feature graph from datasets loaded")
Exemplo n.º 16
0
def convert_to_dgl_graph(graph):
    # directed graph
    g = nx.DiGraph()
    for edge, weight in graph.items():
        nodes = edge.split(",")
        src, dst = int(nodes[0]), int(nodes[1])
        g.add_edge(src, dst, weight=float(weight))

    return dgl.from_networkx(g, edge_attrs=['weight'])
Exemplo n.º 17
0
def test_graph_conv(idtype, out_dim):
    g = dgl.from_networkx(nx.path_graph(3))
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    adj = g.adjacency_matrix(transpose=True, ctx=ctx)

    conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
    conv.initialize(ctx=ctx)
    # test#1: basic
    h0 = F.ones((3, 5))
    h1 = conv(g, h0)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    check_close(h1, _AXWb(adj, h0, conv.weight, conv.bias))
    # test#2: more-dim
    h0 = F.ones((3, 5, 5))
    h1 = conv(g, h0)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    check_close(h1, _AXWb(adj, h0, conv.weight, conv.bias))

    conv = nn.GraphConv(5, out_dim)
    conv.initialize(ctx=ctx)

    # test#3: basic
    h0 = F.ones((3, 5))
    h1 = conv(g, h0)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0
    # test#4: basic
    h0 = F.ones((3, 5, 5))
    h1 = conv(g, h0)
    assert len(g.ndata) == 0
    assert len(g.edata) == 0

    conv = nn.GraphConv(5, out_dim)
    conv.initialize(ctx=ctx)

    with autograd.train_mode():
        # test#3: basic
        h0 = F.ones((3, 5))
        h1 = conv(g, h0)
        assert len(g.ndata) == 0
        assert len(g.edata) == 0
        # test#4: basic
        h0 = F.ones((3, 5, 5))
        h1 = conv(g, h0)
        assert len(g.ndata) == 0
        assert len(g.edata) == 0

    # test not override features
    g.ndata["h"] = 2 * F.ones((3, 1))
    h1 = conv(g, h0)
    assert len(g.ndata) == 1
    assert len(g.edata) == 0
    assert "h" in g.ndata
    check_close(g.ndata['h'], 2 * F.ones((3, 1)))
Exemplo n.º 18
0
def rGIN(g):
    g = dgl.from_networkx(g)
    f = np.random.standard_normal(size=(g.number_of_nodes(), 1))
    x = torch.tensor(f, dtype=torch.float)
    g.ndata['x'] = x
    lin = torch.nn.Linear(1, 1)
    conv = GINConv(lin, 'sum')
    res = conv(g, x)
    sumpool = SumPooling()
    return sumpool(g, res)[0].detach().numpy()
Exemplo n.º 19
0
def load_graph():
    # pandas reads csv
    edges_data = pd.read_csv('data/knowledge_aquisition_reference.csv')
    # networkx reads pandas
    g_nx: nx.DiGraph = nx.from_pandas_edgelist(edges_data,
                                               'paper_id',
                                               'reference_id',
                                               create_using=nx.DiGraph())
    # dgl read networkx
    # ATTENTION!!!: nodes in dgl graph is ordered by paperid
    return dgl.from_networkx(g_nx)
Exemplo n.º 20
0
def make_full_graph(g):
    """
        Converting the given graph to fully connected
        This function just makes full connections
        removes available edge features 
    """

    full_g = dgl.from_networkx(nx.complete_graph(g.number_of_nodes()))
    full_g.ndata['feat'] = g.ndata['feat']
    full_g.edata['feat'] = torch.zeros(full_g.number_of_edges()).long()
    return full_g
Exemplo n.º 21
0
def test_appnp_conv():
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()

    appnp_conv = nn.APPNPConv(3, 0.1, 0)
    appnp_conv.initialize(ctx=ctx)
    print(appnp_conv)

    # test#1: basic
    h0 = F.randn((20, 10))
    h1 = appnp_conv(g, h0)
    assert h1.shape == (20, 10)
Exemplo n.º 22
0
def test_cheb_conv(out_dim):
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()

    cheb = nn.ChebConv(10, out_dim, 3) # k = 3
    cheb.initialize(ctx=ctx)
    print(cheb)

    # test#1: basic
    h0 = F.randn((20, 10))
    h1 = cheb(g, h0)
    assert h1.shape == (20, out_dim)
Exemplo n.º 23
0
def main(args):
    # load dataset
    if args.dataset == 'syn1':
        g, labels, name = gen_syn1()
    elif args.dataset == 'syn2':
        g, labels, name = gen_syn2()
    elif args.dataset == 'syn3':
        g, labels, name = gen_syn3()
    elif args.dataset == 'syn4':
        g, labels, name = gen_syn4()
    elif args.dataset == 'syn5':
        g, labels, name = gen_syn5()
    else:
        raise NotImplementedError
    
    #Transform to dgl graph. 
    graph = dgl.from_networkx(g) 
    labels = th.tensor(labels, dtype=th.long)
    graph.ndata['label'] = labels
    graph.ndata['feat'] = th.randn(graph.number_of_nodes(), args.feat_dim)
    hid_dim = th.tensor(args.hidden_dim, dtype=th.long)
    label_dict = {'hid_dim':hid_dim}

    # save graph for later use
    save_graphs(filename='./'+args.dataset+'.bin', g_list=[graph], labels=label_dict)

    num_classes = max(graph.ndata['label']).item() + 1
    n_feats = graph.ndata['feat']

    #create model
    dummy_model = dummy_gnn_model(args.feat_dim, args.hidden_dim, num_classes)
    loss_fn = nn.CrossEntropyLoss()
    optim = th.optim.Adam(dummy_model.parameters(), lr=args.lr, weight_decay=args.wd)

    # train and output
    for epoch in range(args.epochs):

        dummy_model.train()

        logits = dummy_model(graph, n_feats)
        loss = loss_fn(logits, labels)
        acc = th.sum(logits.argmax(dim=1) == labels).item() / len(labels)
        
        optim.zero_grad()
        loss.backward()
        optim.step()

        print('In Epoch: {:03d}; Acc: {:.4f}; Loss: {:.6f}'.format(epoch, acc, loss.item()))

    # save model
    model_stat_dict = dummy_model.state_dict()
    model_path = os.path.join('./', 'dummy_model_{}.pth'.format(args.dataset))
    th.save(model_stat_dict, model_path)
Exemplo n.º 24
0
def test_sg_conv():
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()

    sgc = nn.SGConv(5, 2, 2)
    sgc.initialize(ctx=ctx)
    print(sgc)

    # test #1: basic
    h0 = F.randn((g.number_of_nodes(), 5))
    h1 = sgc(g, h0)
    assert h1.shape == (g.number_of_nodes(), 2)
Exemplo n.º 25
0
def load_batch_graph(dataset):
    data = load_data(dataset,True)
    num_graphs = len(data[0])
    nx_graphs = [data[0][i].g for i in range(num_graphs)]
    dgl_graphs = [dgl.from_networkx(graph) for graph in nx_graphs]
    batch_graphs = dgl.batch(dgl_graphs)
    
    node_features = [data[0][i].node_features for i in range(num_graphs)]
    batch_features = torch.cat(node_features,0)
    graph_size = [len(g.nodes()) for g in nx_graphs]
    
    return batch_graphs, batch_features, graph_size
Exemplo n.º 26
0
def build_graph(smiles):
    """
    Constructs a NetworkX graph out of a SMILES representation of a molecule from the train/test data.
    :param smiles: a string object of SMILES format
    :return: nx.Graph:
        A graph describing a molecule. Nodes will have an 'element', 'aromatic'
        and a 'charge', and if `explicit_hydrogen` is False a 'hcount'.
        Depending on the input, they will also have 'isotope' and 'class'
        information.
        Edges will have an 'order'.
    """
    '''
    can access node data and edge data when the graph is in networkx format
    dgl.from_networkx(g) converts networkx to dgl graph but the node data and edge data doesnt seem to be transferred
    Goal: save the node feats and edge feats of networkx as tensor and set them to dgl graph ndata and edata
    Question: Do we save ndata as ('C', 'C', 'C', 'O', 'C') or do we create one hot vectors like in the hw
    '''
    # read the smile graphs in using pysmiles & build network
    g = pysmiles.read_smiles(smiles)

    # get the features from the graph and convert to tensor
    elems = g.nodes(data='element')
    h_count = g.nodes(data='hcount')
    aros = g.nodes(data='aromatic')
    raw_node_feats = []
    for elem, data, aro in zip(elems, h_count, aros):
        node = list(elem)
        node.append(data[1])
        node.append(aro[1] * 1)
        raw_node_feats.append(node)
    na = np.array(list(raw_node_feats))
    byte_node_feats = tf.convert_to_tensor(na[:, 1])

    # turn the byte string node feats into one_hot node feats
    node_feats = pt_lookup(byte_node_feats).numpy()
    node_feats[:, -2] = na[:, 2]
    node_feats[:, -1] = na[:, 3]
    node_feats = tf.convert_to_tensor(node_feats)

    # get edge data and extract bonds, double them, then convert to tensor
    edata = g.edges(data='order')
    bonds = list(edata)
    na = np.array(bonds)
    tup = zip(na[:, 2], na[:, 2])
    bond_data = tf.convert_to_tensor(list(itertools.chain(*tup)))
    bond_data = tf.cast(bond_data, tf.float32)
    # build dgl graph
    dgl_graph = dgl.from_networkx(g)

    dgl_graph.ndata['node_feats'] = node_feats
    dgl_graph.edata['edge_feats'] = bond_data

    return dgl_graph
Exemplo n.º 27
0
def prepare_minibatch(targets, node_mptype_mpinstances, type_mask, node_orders,
                      nlayer, sampling, device):

    layer_ntype_mptype_g = [defaultdict(dict) for _ in range(nlayer)]
    layer_ntype_mptype_mpinstances = [defaultdict(dict) for _ in range(nlayer)]
    layer_ntype_mptype_iftargets = [defaultdict(dict) for _ in range(nlayer)]
    for layer_index in range(nlayer):

        ## group target nodes by type
        ntype_targets = defaultdict(set)
        for target in targets:
            ntype_targets[type_mask[target]].add(target)

        ## sample metapath instances for each ntype
        targets = set()
        for ntype, curr_targets in ntype_targets.items():
            mptype_mpinstances = sample_mpinstances_perntype(
                curr_targets, node_mptype_mpinstances, sampling)

            for mptype, mpinstances in mptype_mpinstances.items():

                ng = nx.MultiDiGraph()
                ng.add_nodes_from(curr_targets)
                ng.add_edges_from(
                    np.vstack([mpinstances[:, 0], mpinstances[:, -1]]).T)
                g = dgl.from_networkx(ng).to(device)

                iftargets = {src: False for src in mpinstances[:, 0]}
                iftargets.update({dst: True for dst in curr_targets})

                layer_ntype_mptype_g[-layer_index - 1][ntype][mptype] = g
                layer_ntype_mptype_mpinstances[-layer_index -
                                               1][ntype][mptype] = mpinstances
                layer_ntype_mptype_iftargets[-layer_index -
                                             1][ntype][mptype] = np.array(
                                                 sorted(iftargets.items(),
                                                        key=lambda x: x[0]))

                targets.update(np.unique(mpinstances))

    batch_ntype_orders = defaultdict(dict)
    for target in targets:
        batch_ntype_orders[type_mask[target]][target] = node_orders[target]

    for ntype in batch_ntype_orders:
        batch_ntype_orders[ntype] = {
            target: order
            for target, order in sorted(batch_ntype_orders[ntype].items(),
                                        key=lambda x: x[1])
        }

    return layer_ntype_mptype_g, layer_ntype_mptype_mpinstances, layer_ntype_mptype_iftargets, batch_ntype_orders
Exemplo n.º 28
0
def load_data(path, backend='dgl', format='tuple'):
    if backend == 'dgl':
        try:
            print("Trying to load dgl graph directly")
            glist, __ = load_graphs(osp.join(path, 'g.bin'))
            g = glist[0]
            print("Success")
        except DGLError as e:
            print("File not found", e)
            print("Loading nx graph")
            nx_graph = nx.read_adjlist(osp.join(path, 'adjlist.txt'),
                                       nodetype=int)
            print("Type:", type(nx_graph))
            g = dgl.from_networkx(nx_graph)
        N = g.number_of_nodes()
        X = np.load(osp.join(path, 'X.npy'))
        y = np.load(osp.join(path, 'y.npy'))
        t = np.load(osp.join(path, 't.npy'))
        assert X.shape[0] == N
        assert y.size == N
        assert t.size == N
        return g, X, y, t
    elif backend == 'geometric':
        # DONE test this!
        nx_graph = nx.read_adjlist(osp.join(path, 'adjlist.txt'), nodetype=int)
        X = np.load(osp.join(path, 'X.npy'))
        y = np.load(osp.join(path, 'y.npy'))
        t = np.load(osp.join(path, 't.npy'))
        print("Type:", type(nx_graph))
        attr_dict = {
            i: {
                'X': X[i],
                'y': y[i],
                't': t[i]
            }
            for i in range(X.shape[0])
        }
        print("attr_dict loaded!")
        nx.set_node_attributes(nx_graph, attr_dict)
        print("attributes set!")
        del attr_dict
        gc.collect()
        g = tg.utils.from_networkx(nx_graph)
        del nx_graph
        if format == 'tuple':
            return g.edge_index, g.X, g.y, g.t
        else:
            g.x = g.X
            return g

    else:
        raise ValueError("Unknown backend: " + backend)
Exemplo n.º 29
0
def test_gg_conv():
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()

    gg_conv = nn.GatedGraphConv(10, 20, 3, 4) # n_step = 3, n_etypes = 4
    gg_conv.initialize(ctx=ctx)
    print(gg_conv)

    # test#1: basic
    h0 = F.randn((20, 10))
    etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx)
    h1 = gg_conv(g, h0, etypes)
    assert h1.shape == (20, 20)
Exemplo n.º 30
0
def generate_data(args):
    data = load_data(args)
    labels = torch.LongTensor(data.labels)
    features = torch.FloatTensor(data.features)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)

    num_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()
    print("""----Data statistics------'
      #Edges %d
      #Classes %d 
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, train_mask.int().sum().item(),
           val_mask.int().sum().item(), test_mask.int().sum().item()))

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g).to('cuda:0')
    g.add_edges(g.nodes(), g.nodes())
    netg = nx.from_numpy_matrix(g.adjacency_matrix().to_dense().numpy(),
                                create_using=nx.DiGraph)
    print(netg)
    g = dgl.from_networkx(netg, edge_attrs=['weight']).to("cuda:0")
    n_edges = g.number_of_edges()
    # create model
    heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
    print("train_mask-shape", train_mask)
    return g, num_feats, n_classes, heads, cuda, features, labels, train_mask, val_mask, test_mask