Пример #1
0
def start_server(args):
    """Start kvstore service
    """
    server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

    my_server = KVServer(server_id=args.server_id,
                         server_namebook=server_namebook,
                         num_client=args.num_client)

    data = F.zeros((num_entries, args.dim_size), F.float32, F.cpu())
    g2l = F.zeros(num_entries * args.num_servers, F.int64, F.cpu())
    start = num_entries * my_server.get_machine_id()
    end = num_entries * (my_server.get_machine_id() + 1)
    g2l[start:end] = F.arange(0, num_entries)

    partition = np.arange(args.num_servers)
    partition = F.tensor(np.repeat(partition, num_entries))
    if my_server.get_id() % my_server.get_group_count() == 0:  # master server
        my_server.set_global2local(name='entity_embed', global2local=g2l)
        my_server.init_data(name='entity_embed', data_tensor=data)
        my_server.set_partition_book(name='entity_embed',
                                     partition_book=partition)
    else:
        my_server.set_global2local(name='entity_embed')
        my_server.init_data(name='entity_embed')
        my_server.set_partition_book(name='entity_embed')

    my_server.print()

    my_server.start()
Пример #2
0
def generate_rand_graph(n, func_name):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    g = dgl.DGLGraph(arr, readonly=True)
    num_rels = 10
    entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1)
    if func_name == 'RotatE':
        entity_emb = F.uniform((g.number_of_nodes(), 20), F.float32, F.cpu(),
                               0, 1)
    rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), -1, 1)
    if func_name == 'RESCAL':
        rel_emb = F.uniform((num_rels, 10 * 10), F.float32, F.cpu(), 0, 1)
    g.ndata['id'] = F.arange(0, g.number_of_nodes())
    rel_ids = np.random.randint(0,
                                num_rels,
                                g.number_of_edges(),
                                dtype=np.int64)
    g.edata['id'] = F.tensor(rel_ids, F.int64)
    # TransR have additional projection_emb
    if (func_name == 'TransR'):
        args = {'gpu': -1, 'lr': 0.1}
        args = dotdict(args)
        projection_emb = ExternalEmbedding(args, 10, 10 * 10, F.cpu())
        return g, entity_emb, rel_emb, (12.0, projection_emb, 10, 10)
    elif (func_name == 'TransE'):
        return g, entity_emb, rel_emb, (12.0)
    elif (func_name == 'RESCAL'):
        return g, entity_emb, rel_emb, (10, 10)
    elif (func_name == 'RotatE'):
        return g, entity_emb, rel_emb, (12.0, 1.0)
    else:
        return g, entity_emb, rel_emb, None
Пример #3
0
def generate_rand_emb(func_name, bcast):
    dim=16

    num_head = 16
    num_rels = 4
    num_tail = 32
    if bcast == 'rel':
        num_rels = 1
    if bcast == 'head':
        num_head = 1
    if bcast == 'tail':
        num_tail = 1

    head_emb = F.uniform((num_head, dim), F.float32, F.cpu(), 0, 1)
    tail_emb = F.uniform((num_tail, dim), F.float32, F.cpu(), 0, 1)
    rel_emb = F.uniform((num_rels, dim), F.float32, F.cpu(), -1, 1)

    if func_name == 'RotatE':
        rel_emb = F.uniform((num_rels, dim//2), F.float32, F.cpu(), -1, 1)
    if func_name == 'RESCAL':
        rel_emb = F.uniform((num_rels, dim * dim), F.float32, F.cpu(), -1, 1)

    if func_name == 'TransE':
        return head_emb, rel_emb, tail_emb, (12.0)
    elif func_name == 'TransE_l1':
        return head_emb, rel_emb, tail_emb, (12.0, 'l1')
    elif func_name == 'TransE_l2':
        return head_emb, rel_emb, tail_emb, (12.0, 'l2')
    elif func_name == 'RESCAL':
        return head_emb, rel_emb, tail_emb, (dim, dim)
    elif func_name == 'RotatE':
        return head_emb, rel_emb, tail_emb, (12.0, 1.0)
    else:
        return head_emb, rel_emb, tail_emb, None
Пример #4
0
    def _init_data(self, name, shape, init_type, low, high):
        """Initialize kvstore tensor.

        Parameters
        ----------
        name : str
            data name
        shape : list of int
            The tensor shape
        init_type : str
            initialize method, including 'zero' and 'uniform'
        low : float
            min threshold
        high : float
            max threshold
        """
        if init_type == 'uniform':
            self._data_store[name] = F.uniform(shape=shape,
                                               dtype=F.float32,
                                               ctx=F.cpu(),
                                               low=low,
                                               high=high)
        elif init_type == 'zero':
            self._data_store[name] = F.zeros(shape=shape,
                                             dtype=F.float32,
                                             ctx=F.cpu())
        else:
            raise RuntimeError('Unknown initial method')
Пример #5
0
def generate_rand_emb(func_name, num_entity, num_rels, dim, bcast):
    if bcast == 'rel':
        num_rels = 1
    if bcast == 'head':
        num_head = 1
    if bcast == 'tail':
        num_tail = 1

    entity_emb = F.uniform((num_entity, dim), F.float32, F.cpu(), -1, 1)
    rel_emb = F.uniform((num_rels, dim), F.float32, F.cpu(), -1, 1)

    if func_name == 'RotatE':
        rel_emb = F.uniform((num_rels, dim//2), F.float32, F.cpu(), -1, 1)
    if func_name == 'RESCAL':
        rel_emb = F.uniform((num_rels, dim * dim), F.float32, F.cpu(), -1, 1)
    if func_name == 'TransE':
        return entity_emb, rel_emb
    elif func_name == 'TransE_l1':
        return entity_emb, rel_emb
    elif func_name == 'TransE_l2':
        return entity_emb, rel_emb
    elif func_name == 'RESCAL':
        return entity_emb, rel_emb
    elif func_name == 'RotatE':
        return entity_emb, rel_emb
    else:
        return entity_emb, rel_emb
Пример #6
0
    def __init__(self, args, model_name, n_entities, n_relations, hidden_dim, gamma,
                 double_entity_emb=False, double_relation_emb=False):
        super(KEModel, self).__init__()
        self.args = args
        self.n_entities = n_entities
        self.n_relations = n_relations
        self.model_name = model_name
        self.hidden_dim = hidden_dim
        self.eps = 2.0
        self.emb_init = (gamma + self.eps) / hidden_dim

        entity_dim = 2 * hidden_dim if double_entity_emb else hidden_dim
        relation_dim = 2 * hidden_dim if double_relation_emb else hidden_dim

        device = get_device(args)
        self.entity_emb = ExternalEmbedding(args, n_entities, entity_dim,
                                            F.cpu() if args.mix_cpu_gpu else device)
        # For RESCAL, relation_emb = relation_dim * entity_dim
        if model_name == 'RESCAL':
            rel_dim = relation_dim * entity_dim
        else:
            rel_dim = relation_dim

        self.rel_dim = rel_dim
        self.entity_dim = entity_dim
        self.strict_rel_part = args.strict_rel_part
        self.soft_rel_part = args.soft_rel_part
        if not self.strict_rel_part and not self.soft_rel_part:
            self.relation_emb = ExternalEmbedding(args, n_relations, rel_dim,
                                                  F.cpu() if args.mix_cpu_gpu else device)
        else:
            self.global_relation_emb = ExternalEmbedding(args, n_relations, rel_dim, F.cpu())

        if model_name == 'TransE' or model_name == 'TransE_l2':
            self.score_func = TransEScore(gamma, 'l2')
        elif model_name == 'TransE_l1':
            self.score_func = TransEScore(gamma, 'l1')
        elif model_name == 'TransR':
            projection_emb = ExternalEmbedding(args,
                                               n_relations,
                                               entity_dim * relation_dim,
                                               F.cpu() if args.mix_cpu_gpu else device)

            self.score_func = TransRScore(gamma, projection_emb, relation_dim, entity_dim)
        elif model_name == 'DistMult':
            self.score_func = DistMultScore()
        elif model_name == 'ComplEx':
            self.score_func = ComplExScore()
        elif model_name == 'RESCAL':
            self.score_func = RESCALScore(relation_dim, entity_dim)
        elif model_name == 'RotatE':
            self.score_func = RotatEScore(gamma, self.emb_init)
        
        self.model_name = model_name
        self.head_neg_score = self.score_func.create_neg(True)
        self.tail_neg_score = self.score_func.create_neg(False)
        self.head_neg_prepare = self.score_func.create_neg_prepare(True)
        self.tail_neg_prepare = self.score_func.create_neg_prepare(False)

        self.reset_parameters()
    def __init__(
        self,
        args,
        model_name,
        n_entities,
        n_relations,
        hidden_dim,
        gamma,
        double_entity_emb=False,
        double_relation_emb=False,
    ):
        super(KEModel, self).__init__()
        self.args = args
        self.n_entities = n_entities
        self.model_name = model_name
        self.hidden_dim = hidden_dim
        self.eps = 2.0
        self.emb_init = (gamma + self.eps) / hidden_dim

        entity_dim = 2 * hidden_dim if double_entity_emb else hidden_dim
        relation_dim = 2 * hidden_dim if double_relation_emb else hidden_dim

        device = get_device(args)
        self.entity_emb = ExternalEmbedding(
            args, n_entities, entity_dim, F.cpu() if args.mix_cpu_gpu else device
        )
        # For RESCAL, relation_emb = relation_dim * entity_dim
        if model_name == "RESCAL":
            rel_dim = relation_dim * entity_dim
        else:
            rel_dim = relation_dim
        self.relation_emb = ExternalEmbedding(args, n_relations, rel_dim, device)

        if model_name == "TransE":
            self.score_func = TransEScore(gamma)
        elif model_name == "TransR":
            projection_emb = ExternalEmbedding(
                args,
                n_relations,
                entity_dim * relation_dim,
                F.cpu() if args.mix_cpu_gpu else device,
            )
            self.score_func = TransRScore(gamma, projection_emb, relation_dim, entity_dim)
        elif model_name == "DistMult":
            self.score_func = DistMultScore()
        elif model_name == "ComplEx":
            self.score_func = ComplExScore()
        elif model_name == "RESCAL":
            self.score_func = RESCALScore(relation_dim, entity_dim)
        elif model_name == "RotatE":
            self.score_func = RotatEScore(gamma, self.emb_init)

        self.head_neg_score = self.score_func.create_neg(True)
        self.tail_neg_score = self.score_func.create_neg(False)
        self.head_neg_prepare = self.score_func.create_neg_prepare(True)
        self.tail_neg_prepare = self.score_func.create_neg_prepare(False)

        self.reset_parameters()
Пример #8
0
def generate_rand_graph(n):
    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(
        np.int64)
    g = dgl.DGLGraph(arr, readonly=True)
    num_rels = 10
    entity_emb = F.uniform((g.number_of_nodes(), 10), F.float32, F.cpu(), 0, 1)
    rel_emb = F.uniform((num_rels, 10), F.float32, F.cpu(), 0, 1)
    g.ndata['id'] = F.arange(0, g.number_of_nodes())
    rel_ids = np.random.randint(0,
                                num_rels,
                                g.number_of_edges(),
                                dtype=np.int64)
    g.edata['id'] = F.tensor(rel_ids, F.int64)
    return g, entity_emb, rel_emb
Пример #9
0
def knn_graphE(x, k, istrain=False):
    """Transforms the given point set to a directed graph, whose coordinates
    are given as a matrix. The predecessors of each point are its k-nearest
    neighbors.

    If a 3D tensor is given instead, then each row would be transformed into
    a separate graph.  The graphs will be unioned.

    Parameters
    ----------
    x : Tensor
        The input tensor.

        If 2D, each row of ``x`` corresponds to a node.

        If 3D, a k-NN graph would be constructed for each row.  Then
        the graphs are unioned.
    k : int
        The number of neighbors

    Returns
    -------
    DGLGraph
        The graph.  The node IDs are in the same order as ``x``.
    """
    if F.ndim(x) == 2:
        x = F.unsqueeze(x, 0)
    n_samples, n_points, _ = F.shape(x)

    dist = pairwise_squared_distance(x)
    if istrain and np.random.rand() > 0.5:
        k_indices = F.argtopk(dist, round(1.5 * k), 2, descending=False)
        rand_k = np.random.permutation(round(1.5 * k) -
                                       1)[0:k - 1] + 1  # 0 + random k-1
        rand_k = np.append(rand_k, 0)
        k_indices = k_indices[:, :, rand_k]  # add 0
    else:
        k_indices = F.argtopk(dist, k, 2, descending=False)

    dst = F.copy_to(k_indices, F.cpu())

    src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1))

    per_sample_offset = F.reshape(
        F.arange(0, n_samples) * n_points, (-1, 1, 1))
    dst += per_sample_offset
    src += per_sample_offset
    dst = F.reshape(dst, (-1, ))
    src = F.reshape(src, (-1, ))
    adj = sparse.csr_matrix(
        (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))))

    g = DGLGraph(adj, readonly=True)
    return g
Пример #10
0
    def writeback_relation(self, rank=0, rel_parts=None):
        """ Writeback relation embeddings in a specific process to global relation embedding.
        Used in multi-process multi-gpu training model.

        rank : int
            Process id.
        rel_parts : List of tensor
            List of tensor stroing edge types of each partition.
        """
        idx = rel_parts[rank]
        self.global_relation_emb.emb[idx] = F.copy_to(self.relation_emb.emb,
                                                      F.cpu())[idx]
        if self.model_name == 'TransR':
            self.score_func.writeback_local_emb(idx)
Пример #11
0
def dist_tensor_test_sanity(data_shape, rank, name=None):
    dist_ten = dgl.distributed.DistTensor(data_shape,
                                          F.int32,
                                          init_func=zeros_init,
                                          name=name)
    # arbitrary value
    stride = 3
    if part_id == 0:
        dist_ten[rank*stride:(rank+1)*stride] = F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (rank+1)
        dgl.distributed.client_barrier()
    else:
        dgl.distributed.client_barrier()
        original_rank = rank % num_client_per_machine
        assert F.allclose(dist_ten[original_rank*stride:(original_rank+1)*stride],
                          F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (original_rank+1))
Пример #12
0
def start_client(args):
    if args.range == -1:
        policy, gpb = create_partition_policy(args)
    else:
        policy, gpb = create_range_partition_policy(args)
    print("create data...")
    data = create_data(args)
    print("Create data done.")
    dgl.distributed.connect_to_server(ip_config=args.ip_config)
    kvclient = dgl.distributed.KVClient(ip_config=args.ip_config)
    kvclient.barrier()
    kvclient.map_shared_data(partition_book=gpb)

    #################################### local fast-pull ####################################

    
    if args.machine_id == 1:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Local fast-pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    
    

    #################################### remote fast-pull ####################################

    
    if args.machine_id == 0:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Remote fast-pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    
    

    #################################### local pull ##################################

    
    kvclient.register_pull_handler('data', udf_pull)
    kvclient.barrier()

    if args.machine_id == 1:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Local pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    

    #################################### remote pull ##################################

    
    if args.machine_id == 0:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    start = time.time()
    for _ in range(100):
        res = kvclient.pull(name='data', id_tensor=id_tensor)
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Remote pull Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    
    
    ################################# local push ######################################

    
    if args.machine_id == 1:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)
    data_tensor = F.zeros((args.data_size, args.dim), F.float32, F.cpu())

    kvclient.barrier()
    start = time.time()
    for _ in range(100):
        res = kvclient.push(name='data', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.barrier()
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Local push Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    

    ################################# remote push ######################################

    
    if args.machine_id == 0:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
        id_tensor = id_tensor + args.graph_size
    else:
        id_tensor = np.random.randint(args.graph_size, size=args.data_size)
    id_tensor = F.tensor(id_tensor)

    kvclient.barrier()
    start = time.time()
    for _ in range(100):
        res = kvclient.push(name='data', id_tensor=id_tensor, data_tensor=data_tensor)
    kvclient.barrier()
    end = time.time()
    total_bytes = (args.data_size*(args.dim+2)*4)*100*args.num_client/2
    print("Remote push Throughput (MB): %f" % (total_bytes / (end-start) / 1024.0 / 1024.0))
    

    dgl.distributed.shutdown_servers()
    dgl.distributed.finalize_client()
Пример #13
0
def zeros_init(shape, dtype):
    return F.zeros(shape, dtype=dtype, ctx=F.cpu())
Пример #14
0
    def score(self, head, rel, tail, triplet_wise=False):
        head_emb = self.entity_emb(head)
        rel_emb = self.relation_emb(rel)
        tail_emb = self.entity_emb(tail)

        num_head = F.shape(head)[0]
        num_rel = F.shape(rel)[0]
        num_tail = F.shape(tail)[0]

        batch_size = self.batch_size
        score = []
        if triplet_wise:

            class FakeEdge(object):
                def __init__(self, head_emb, rel_emb, tail_emb):
                    self._hobj = {}
                    self._robj = {}
                    self._tobj = {}
                    self._hobj['emb'] = head_emb
                    self._robj['emb'] = rel_emb
                    self._tobj['emb'] = tail_emb

                @property
                def src(self):
                    return self._hobj

                @property
                def dst(self):
                    return self._tobj

                @property
                def data(self):
                    return self._robj

            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                sr_emb = rel_emb[i * batch_size : (i + 1) * batch_size \
                                                  if (i + 1) * batch_size < num_head \
                                                  else num_head]
                st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                edata = FakeEdge(sh_emb, sr_emb, st_emb)
                score.append(
                    F.copy_to(
                        self.score_func.edge_func(edata)['score'], F.cpu()))
            score = F.cat(score, dim=0)
            return score
        else:
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                s_score = []
                for j in range((num_tail + batch_size - 1) // batch_size):
                    st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \
                                                       if (j + 1) * batch_size < num_tail \
                                                       else num_tail]

                    s_score.append(
                        F.copy_to(
                            self.score_func.infer(sh_emb, rel_emb, st_emb),
                            F.cpu()))
                score.append(F.cat(s_score, dim=2))
            score = F.cat(score, dim=0)
            return F.reshape(score, (num_head * num_rel * num_tail, ))
Пример #15
0
                    accum_time += timer.time
            avg_time = accum_time / (n_times - n_cold_start)
            print('hidden size: {}, avg time: {}'.format(n_hid, avg_time))
        except:
            print('hidden size: {}, OOM'.format(n_hid))


if __name__ == '__main__':
    parser = argparse.ArgumentParser("Benchmark DGL kernels")
    parser.add_argument('--spmm-binary', type=str, default='copy_lhs')
    parser.add_argument('--spmm-reduce', type=str, default='sum')
    parser.add_argument('--sddmm-binary', type=str, default='add')
    parser.add_argument('--gpu', '-g', type=str, default='-1')
    args = parser.parse_args()
    if args.gpu == '-1':
        ctx = F.cpu()
    else:
        ctx = F.gpu()
    ctx_str = 'cpu' if args.gpu == '-1' else 'gpu'

    for dataset in ['reddit', 'arxiv', 'proteins']:
        g = get_graph(dataset)
        g = g.int().to(ctx)
        print(g)
        # SPMM
        bench_spmm(g, ctx, args.spmm_binary, args.spmm_reduce)
        # SDDMM
        if ctx_str == 'cpu':
            continue  # sddmm out of mem on cpu will result in termination of the program.
        bench_sddmm(g, ctx, args.sddmm_binary)
        del g
Пример #16
0
    def topK(self, head=None, tail=None, bcast=False, pair_ws=False, k=10):
        if head is None:
            head = F.arange(0, self.emb.shape[0])
        else:
            head = F.tensor(head)
        if tail is None:
            tail = F.arange(0, self.emb.shape[0])
        else:
            tail = F.tensor(tail)

        head_emb = self.emb[head]
        tail_emb = self.emb[tail]
        if pair_ws is True:
            result = []
            batch_size = self.batch_size
            # chunked cal score
            score = []
            num_head = head.shape[0]
            num_tail = tail.shape[0]
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                sh_emb = F.copy_to(sh_emb, self.device)
                st_emb = tail_emb[i * batch_size : (i + 1) * batch_size \
                                                   if (i + 1) * batch_size < num_head \
                                                   else num_head]
                st_emb = F.copy_to(st_emb, self.device)
                score.append(F.copy_to(self.sim_func(sh_emb, st_emb, pw=True), F.cpu()))
            score = F.cat(score, dim=0)

            sidx = F.argsort(score, dim=0, descending=True)
            sidx = sidx[:k]
            score = score[sidx]
            result.append((F.asnumpy(head[sidx]),
                           F.asnumpy(tail[sidx]),
                           F.asnumpy(score)))
        else:
            num_head = head.shape[0]
            num_tail = tail.shape[0]
            batch_size = self.batch_size

            # chunked cal score
            score = []
            for i in range((num_head + batch_size - 1) // batch_size):
                sh_emb = head_emb[i * batch_size : (i + 1) * batch_size \
                                            if (i + 1) * batch_size < num_head \
                                            else num_head]
                sh_emb = F.copy_to(sh_emb, self.device)
                s_score = []
                for j in range((num_tail + batch_size - 1) // batch_size):
                    st_emb = tail_emb[j * batch_size : (j + 1) * batch_size \
                                                    if (j + 1) * batch_size < num_tail \
                                                    else num_tail]
                    st_emb = F.copy_to(st_emb, self.device)
                    s_score.append(F.copy_to(self.sim_func(sh_emb, st_emb), F.cpu()))
                score.append(F.cat(s_score, dim=1))
            score = F.cat(score, dim=0)

            if bcast is False:
                result = []
                idx = F.arange(0, num_head * num_tail)
                score = F.reshape(score, (num_head * num_tail, ))

                sidx = F.argsort(score, dim=0, descending=True)
                sidx = sidx[:k]
                score = score[sidx]
                sidx = sidx
                idx = idx[sidx]
                tail_idx = idx % num_tail
                idx = floor_divide(idx, num_tail)
                head_idx = idx % num_head

                result.append((F.asnumpy(head[head_idx]),
                           F.asnumpy(tail[tail_idx]),
                           F.asnumpy(score)))

            else: # bcast at head
                result = []
                for i in range(num_head):
                    i_score = score[i]

                    sidx = F.argsort(i_score, dim=0, descending=True)
                    idx = F.arange(0, num_tail)
                    i_idx = sidx[:k]
                    i_score = i_score[i_idx]
                    idx = idx[i_idx]

                    result.append((np.full((k,), F.asnumpy(head[i])),
                                  F.asnumpy(tail[idx]),
                                  F.asnumpy(i_score)))

        return result
Пример #17
0
    def __init__(
        self,
        args,
        model_name,
        n_entities,
        n_relations,
        hidden_dim,
        gamma,
        double_entity_emb=False,
        double_relation_emb=False,
        ent_feat_dim=-1,
        rel_feat_dim=-1,
    ):
        super(KEModel, self).__init__()
        self.args = args
        self.has_edge_importance = args.has_edge_importance
        self.n_entities = n_entities
        self.n_relations = n_relations
        self.model_name = model_name
        self.hidden_dim = hidden_dim
        self.eps = EMB_INIT_EPS
        self.emb_init = (gamma + self.eps) / hidden_dim
        entity_dim = 2 * hidden_dim if double_entity_emb else hidden_dim
        relation_dim = 2 * hidden_dim if double_relation_emb else hidden_dim
        self.encoder_model_name = args.encoder_model_name

        device = get_device(args)

        self.loss_gen = LossGenerator(
            args,
            args.loss_genre,
            args.neg_adversarial_sampling,
            args.adversarial_temperature,
            args.pairwise,
        )

        if self.encoder_model_name in ["shallow", "concat"]:
            self.entity_emb = ExternalEmbedding(
                args, n_entities, entity_dim, F.cpu() if args.mix_cpu_gpu else device
            )
        if self.encoder_model_name in ["roberta", "concat"]:
            assert ent_feat_dim != -1 and rel_feat_dim != -1
            self.entity_feat = ExternalEmbedding(
                args,
                n_entities,
                ent_feat_dim,
                F.cpu() if args.mix_cpu_gpu else device,
                is_feat=True,
            )
        # For RESCAL, relation_emb = relation_dim * entity_dim
        if model_name == "RESCAL":
            rel_dim = relation_dim * entity_dim
        else:
            rel_dim = relation_dim

        self.use_mlp = self.encoder_model_name in ["concat", "roberta"]
        if self.encoder_model_name == "concat":
            self.transform_net = MLP(
                entity_dim + ent_feat_dim,
                entity_dim,
                relation_dim + rel_feat_dim,
                relation_dim,
            )
            # self.transform_e_net = torch.nn.Linear(entity_dim, entity_dim)
            # self.transform_r_net = torch.nn.Linear(relation_dim, relation_dim)
        elif self.encoder_model_name == "roberta":
            self.transform_net = MLP(
                ent_feat_dim, entity_dim, rel_feat_dim, relation_dim
            )

        self.rel_dim = rel_dim
        self.entity_dim = entity_dim
        self.strict_rel_part = args.strict_rel_part
        self.soft_rel_part = args.soft_rel_part
        print(self.strict_rel_part, self.soft_rel_part)
        assert not self.strict_rel_part and not self.soft_rel_part
        if not self.strict_rel_part and not self.soft_rel_part:
            if self.encoder_model_name in ["shallow", "concat"]:
                self.relation_emb = ExternalEmbedding(
                    args, n_relations, rel_dim, F.cpu() if args.mix_cpu_gpu else device
                )
            if self.encoder_model_name in ["roberta", "concat"]:
                self.relation_feat = ExternalEmbedding(
                    args,
                    n_relations,
                    rel_feat_dim,
                    F.cpu() if args.mix_cpu_gpu else device,
                    is_feat=True,
                )
        else:
            self.global_relation_emb = ExternalEmbedding(
                args, n_relations, rel_dim, F.cpu()
            )

        if model_name == "TransE" or model_name == "TransE_l2":
            self.score_func = TransEScore(gamma, "l2")
        elif model_name == "TransE_l1":
            self.score_func = TransEScore(gamma, "l1")
        elif model_name == "TransR":
            projection_emb = ExternalEmbedding(
                args,
                n_relations,
                entity_dim * relation_dim,
                F.cpu() if args.mix_cpu_gpu else device,
            )

            self.score_func = TransRScore(
                gamma, projection_emb, relation_dim, entity_dim
            )
        elif model_name == "DistMult":
            self.score_func = DistMultScore()
        elif model_name == "ComplEx":
            self.score_func = ComplExScore()
        elif model_name == "RESCAL":
            self.score_func = RESCALScore(relation_dim, entity_dim)
        elif model_name == "RotatE":
            self.score_func = RotatEScore(gamma, self.emb_init)
        elif model_name == "SimplE":
            self.score_func = SimplEScore()

        self.model_name = model_name
        self.head_neg_score = self.score_func.create_neg(True)
        self.tail_neg_score = self.score_func.create_neg(False)
        self.head_neg_prepare = self.score_func.create_neg_prepare(True)
        self.tail_neg_prepare = self.score_func.create_neg_prepare(False)

        self.reset_parameters()
Пример #18
0
def create_data(args):
    """Create data hold by server nodes
    """
    data = F.zeros((args.graph_size, args.dim), F.float32, F.cpu())
    return data
Пример #19
0
def run_topk_emb(sfunc, sim_func, create_emb_sim=create_kge_emb_sim):
    hidden_dim = 32
    num_head = 40
    num_tail = 40
    num_emb = 80

    emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1)
    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    sim_infer = create_emb_sim(emb, sfunc)

    result1 = sim_infer.topK(head, tail, pair_ws=True)
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        j = i
        hemb = F.take(emb, head[i], 0)
        temb = F.take(emb, tail[j], 0)

        score = sim_func(hemb, temb)
        scores.append(F.asnumpy(score))
        head_ids.append(F.asnumpy(head[i]))
        tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    print('pass pair wise')

    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    result1 = sim_infer.topK(head, tail)
    assert len(result1) == 1
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(tail.shape[0]):
            hemb = F.take(emb, head[i], 0)
            temb = F.take(emb, tail[j], 0)

            score = sim_func(hemb, temb)
            scores.append(F.asnumpy(score))
            head_ids.append(F.asnumpy(head[i]))
            tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)

    emb_ids = F.arange(0, num_emb)
    result1 = sim_infer.topK(emb_ids, emb_ids, bcast=True)
    result2 = sim_infer.topK(bcast=True)
    assert len(result1) == emb_ids.shape[0]
    assert len(result2) == emb_ids.shape[0]

    for i in range(emb_ids.shape[0]):
        scores = []
        head_ids = []
        tail_ids = []
        for j in range(emb_ids.shape[0]):
            hemb = F.take(emb, emb_ids[i], 0)
            temb = F.take(emb, emb_ids[j], 0)

            score = sim_func(hemb, temb)
            score = F.asnumpy(score)
            scores.append(score)
            tail_ids.append(F.asnumpy(emb_ids[j]))
        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = np.full((10,), F.asnumpy(emb_ids[i]))
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_tail, r1_score = result1[i]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        r2_head, r2_tail, r2_score = result2[i]
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
    print('pass all')
Пример #20
0
def run_topk_emb2(sfunc, sim_func, emb_model):
    hidden_dim = 32
    num_head = 40
    num_tail = 40
    num_emb = 80

    with tempfile.TemporaryDirectory() as tmpdirname:
        emb = F.uniform((num_emb, hidden_dim), F.float32, F.cpu(), -1, 1)
        create_emb_file(Path(tmpdirname), 'entity.npy', emb.numpy())
        create_emb_file(Path(tmpdirname), 'relation.npy', emb.numpy())

        emb_model.load(Path(tmpdirname))

    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc, pair_ws=True)
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        j = i
        hemb = F.take(emb, head[i], 0)
        temb = F.take(emb, tail[j], 0)

        score = sim_func(hemb, temb)
        scores.append(F.asnumpy(score))
        head_ids.append(F.asnumpy(head[i]))
        tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)
    print('pass pair wise')

    head = F.arange(0, num_head)
    tail = F.arange(num_head, num_head+num_tail)
    result1 = emb_model.embed_sim(head, tail, 'entity', sfunc=sfunc)
    assert len(result1) == 1
    scores = []
    head_ids = []
    tail_ids = []
    for i in range(head.shape[0]):
        for j in range(tail.shape[0]):
            hemb = F.take(emb, head[i], 0)
            temb = F.take(emb, tail[j], 0)

            score = sim_func(hemb, temb)
            scores.append(F.asnumpy(score))
            head_ids.append(F.asnumpy(head[i]))
            tail_ids.append(F.asnumpy(tail[j]))
    scores = np.asarray(scores)
    scores = scores.reshape(scores.shape[0])
    head_ids = np.asarray(head_ids)
    tail_ids = np.asarray(tail_ids)
    idx = np.argsort(scores)
    idx = idx[::-1]
    idx = idx[:10]
    head_ids = head_ids[idx]
    tail_ids = tail_ids[idx]
    score_topk = scores[idx]

    r1_head, r1_tail, r1_score = result1[0]
    np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(r1_head, head_ids)
    np.testing.assert_allclose(r1_tail, tail_ids)

    emb_ids = F.arange(0, num_emb)
    result1 = emb_model.embed_sim(emb_ids, emb_ids, 'entity', sfunc=sfunc, bcast=True)
    result2 = emb_model.embed_sim(embed_type='entity', sfunc=sfunc, bcast=True)
    assert len(result1) == emb_ids.shape[0]
    assert len(result2) == emb_ids.shape[0]

    for i in range(emb_ids.shape[0]):
        scores = []
        head_ids = []
        tail_ids = []
        for j in range(emb_ids.shape[0]):
            hemb = F.take(emb, emb_ids[i], 0)
            temb = F.take(emb, emb_ids[j], 0)

            score = sim_func(hemb, temb)
            score = F.asnumpy(score)
            scores.append(score)
            tail_ids.append(F.asnumpy(emb_ids[j]))
        scores = np.asarray(scores)
        scores = scores.reshape(scores.shape[0])
        tail_ids = np.asarray(tail_ids)
        idx = np.argsort(scores)
        idx = idx[::-1]
        idx = idx[:10]
        head_ids = np.full((10,), F.asnumpy(emb_ids[i]))
        tail_ids = tail_ids[idx]
        score_topk = scores[idx]

        r1_head, r1_tail, r1_score = result1[i]
        np.testing.assert_allclose(r1_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r1_head, head_ids)
        np.testing.assert_allclose(r1_tail, tail_ids)
        r2_head, r2_tail, r2_score = result2[i]
        np.testing.assert_allclose(r2_score, score_topk, rtol=1e-5, atol=1e-5)
        np.testing.assert_allclose(r2_head, head_ids)
        np.testing.assert_allclose(r2_tail, tail_ids)
    print('pass all')
Пример #21
0
def start_client(args):
    """Start client
    """
    server_namebook = dgl.contrib.read_ip_config(filename=args.ip_config)

    my_client = KVClient(server_namebook=server_namebook)

    my_client.connect()

    my_client.print()

    my_client.barrier()

    local_start = num_entries * my_client.get_machine_id()
    local_end = num_entries * (my_client.get_machine_id() + 1)
    local_range = np.arange(local_start, local_end)
    id_list = []
    for i in range(10000):
        ids = np.random.choice(local_range, args.batch_size)
        id_list.append(F.tensor(ids))

    print("Pull from local...")
    num_bytes = 0
    start = time.time()
    for ids in id_list:
        tmp = my_client.pull(name='entity_embed', id_tensor=ids)
        ndim = tmp.shape[1]
        num_bytes += np.prod(tmp.shape) * 4
    print("Total time: %.3f, #bytes: %.3f GB" %
          (time.time() - start, num_bytes / 1000 / 1000 / 1000))

    my_client.barrier()

    arr = F.zeros((num_entries, ndim), F.float32, F.cpu())
    print('Slice from a tensor...')
    num_bytes = 0
    start = time.time()
    for ids in id_list:
        tmp = arr[ids]
        num_bytes += np.prod(tmp.shape) * 4
    print("Total time: %.3f, #bytes: %.3f GB" %
          (time.time() - start, num_bytes / 1000 / 1000 / 1000))

    print("Pull from remote...")
    if local_start == 0:
        remote_range = np.arange(local_end, num_entries * args.num_servers)
    elif local_end == num_entries * args.num_servers:
        remote_range = np.arange(0, local_start)
    else:
        range1 = np.arange(0, local_start)
        range2 = np.arange(local_end, num_entries * args.num_servers)
        remote_range = np.concatenate((range1, range2))
    id_list = []
    for i in range(1000):
        ids = np.random.choice(remote_range, args.batch_size)
        id_list.append(F.tensor(ids))

    num_bytes = 0
    start = time.time()
    for ids in id_list:
        tmp = my_client.pull(name='entity_embed', id_tensor=ids)
        num_bytes += np.prod(tmp.shape) * 4
    print("Total time: %.3f, #bytes: %.3f GB" %
          (time.time() - start, num_bytes / 1000 / 1000 / 1000))

    my_client.barrier()

    if my_client.get_id() == 0:
        my_client.shut_down()