def test_2hop_using_gsl_with_undirected_edge_homo(self):
        """ Using gsl api and sample neighbor on undirected edges
    whose source node and dst node has same type.
    """
        gl.set_eager_mode(True)
        expand_factor = [3, 2]
        ids = self._seed_node2_ids

        def repeat_fn(q, params):
            return q.outE(params[0]).sample(params[1]).by("random").inV()

        nbrs = self.g.V(self._node2_type, feed=ids) \
          .repeat(repeat_fn, 2,
                  params_list=[(self._edge3_type, 3), (self._edge3_type, 2)]) \
          .emit()

        edges1 = nbrs[1]
        nodes1 = nbrs[2]
        edges2 = nbrs[3]
        nodes2 = nbrs[4]

        n = expand_factor[0] * expand_factor[1]
        for i in range(0, ids.size * 3):
            for dst_id in nodes2.ids.flatten()[i:i + 2]:
                src_id = nodes1.ids.flatten()[i]
                out_id = utils.fixed_dst_ids(src_id, self._node2_range)
                in_id = utils.fixed_dst_ids(dst_id, self._node2_range)
                utils.check_ids(src_id, out_id + in_id)
Пример #2
0
    def setUp(self):
        """ prepare the data and the decoder.
    num_int_attrs = num_float_attrs = num_string_attrs = 2.
    dst_ids = utils.fixed_dst_ids(src_ids, dst_range).
    with fixed_dst_ids, the src_ids which src_id % 5 == 0 has no edge.
    """
        self._node1_type = "node1"
        self._node2_type = "node2"
        self._edge1_type = "edge1"
        self._edge2_type = "edge2"
        self._edge3_type = "edge3"

        # for subgraph sampler
        self._node3_type = "entity"
        self._edge4_type = "relation"

        # for conditional negative sampler
        self._cond_node_type = "cond_item"
        self._cond_edge_type = "cond_sim"

        self._node1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        self._node2_decoder = gl.Decoder(weighted=True, labeled=True)
        self._edge1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         labeled=True)
        self._edge2_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         weighted=True)
        self._edge3_decoder = gl.Decoder(weighted=True)
        self._node3_decoder = gl.Decoder(attr_types=utils.ENTITY_ATTR_TYPES,
                                         labeled=True)
        self._edge4_decoder = gl.Decoder(weighted=True)
        self._cond_node_deocder = gl.Decoder(attr_types=utils.COND_ATTR_TYPES,
                                             weighted=True)

        self._node1_range = (0, 100)
        self._node2_range = (100, 200)

        # test for mask.
        self._train_node_range = (0, 50)
        self._test_node_range = (50, 70)
        self._val_node_range = (70, 100)

        self._node1_ids = range(self._node1_range[0], self._node1_range[1])
        self._node2_ids = utils.fixed_dst_ids(self._node1_ids,
                                              self._node2_range)
        self._seed_node1_ids = np.array([2, 7, 8])
        self._seed_node2_ids = np.array([102, 107, 108])

        self._seed_node1_ids_with_nbr_missing = np.array([5, 10, 110])
        self._seed_node2_ids_with_nbr_missing = np.array([102, 105, 108])
        # there has no edge whose src_id = 5 | 105

        self._default_dst_id = -1
        self._default_int_attr = 1000
        self._default_float_attr = 999.9
        self._default_string_attr = 'hehe'

        if self.needs_initial:
            self.initialize()
        if not self.g:
            time.sleep(1)
Пример #3
0
 def test_sampling_with_mask_eager_mode(self):
     gl.set_eager_mode(True)
     bs = 8
     q = self.g.E(self._edge1_type, mask=gl.Mask.VAL).batch(bs).alias('val') \
               .each(
                 lambda e:
                   (e.outV().alias('src'),
                    e.inV().alias('dst') \
                     .outV(self._edge2_type).sample(3).by('topk').alias('nbr'))
               ).values(lambda x:
                  (x['src'].ids,
                   x['val'].labels,
                   x['dst'].ids, x['dst'].weights, x['dst'].labels,
                   x['nbr'].ids, x['nbr'].int_attrs))
     iteration = 0
     for i in range(2):
         res = []
         while True:
             try:
                 sid, elb, did, dwei, dlb, nid, ni = q.next()
                 utils.check_id_weights(did, dwei)
                 utils.check_equal(dlb, did)
                 iteration += 1
                 res += list(sid)
             except gl.OutOfRangeError:
                 break
         whole = range(self._val_node_range[0], self._val_node_range[1])
         expected = []
         for elem in whole:
             expected += [elem] * len(
                 utils.fixed_dst_ids(elem, self._node2_range))
         utils.check_sorted_equal(res, expected)
    def test_1hop(self):
        """ Sample full neighbors.
    """
        ids = self._seed_node1_ids
        nbr_s = self.g.neighbor_sampler(self._edge1_type, 1, strategy="full")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        index = 0
        for node in nodes:
            utils.check_sorted_equal(
                utils.fixed_dst_ids(ids[index], self._node2_range), node.ids)
            index += 1
    def test_1hop_circular_padding(self):
        """ Sample one hop of neighbors.
    """
        gl.set_padding_mode(gl.CIRCULAR)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbr_s = self.g.neighbor_sampler(self._edge1_type,
                                        expand_factor=expand_factor,
                                        strategy="random_without_replacement")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        for iid, nbrs in zip(ids, nodes.ids):
            full_nbrs = utils.fixed_dst_ids(iid, (100, 200))
            utils.check_set_equal(nbrs, full_nbrs)
Пример #6
0
 def test_full_sample(self):
   q = self.g.V(self._node2_type).batch(4).alias('a') \
             .outV(self._edge2_type).sample(3).by("full").alias('b') \
             .values(lambda x: (x['a'].ids, x['b'].ids, x['b'].offsets))
   dataset = gl.Dataset(q)
   while True:
     try:
       src, nbrs, offsets = dataset.next()
       start = 0
       for idx, offset in enumerate(offsets):
         expected_nbrs = utils.fixed_dst_ids(src[idx], self._node1_range)
         assert offset == min(len(expected_nbrs), 3)
         utils.check_subset(nbrs[start: start + offset], expected_nbrs)
         start += offset
     except gl.OutOfRangeError:
       break
    def test_neg(self):
        """ Using primative api.
    """
        expand_factor = 6
        ids = self._seed_node1_ids
        nbr_s = self.g.negative_sampler(self._edge1_type,
                                        expand_factor=expand_factor,
                                        strategy="random")
        nodes = nbr_s.get(ids)

        for i, e in enumerate(ids):
            expected_ids = [iid for iid in self._node2_ids if \
                iid not in utils.fixed_dst_ids(e, self._node2_range)]
            utils.check_ids(nodes.ids[i], expected_ids)

        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
Пример #8
0
    def test_neg(self):
        """ Sample negative neighbors with in-degree of the target nodes.
    """
        expand_factor = 6
        ids = self._seed_node1_ids
        nbr_s = self.g.negative_sampler(self._edge1_type,
                                        expand_factor=expand_factor,
                                        strategy="in_degree")
        nodes = nbr_s.get(ids)

        for i, e in enumerate(ids):
            expected_ids = [iid for iid in self._node2_ids if \
                iid not in utils.fixed_dst_ids(e, self._node2_range)]
            utils.check_ids(nodes.ids[i], expected_ids)

        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
    def test_1hop_using_gremlin(self):
        """ Using gremlin-like api.
    """
        gl.set_padding_mode(gl.REPLICATE)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outE(self._edge1_type).sample(expand_factor).by("random_without_replacement") \
          .inV().emit()

        edges = nbrs[1]
        nodes = nbrs[2]

        for iid, nbrs in zip(ids, nodes.ids):
            full_nbrs = utils.fixed_dst_ids(iid, (100, 200))
            full_nbrs.extend([-1])
            utils.check_set_equal(nbrs, full_nbrs)
Пример #10
0
    def setUp(self):
        """ prepare the data and the decoder.
    num_int_attrs = num_float_attrs = num_string_attrs = 2.
    dst_ids = utils.fixed_dst_ids(src_ids, dst_range).
    with fixed_dst_ids, the src_ids which src_id % 5 == 0 has no edge.
    """
        self._node1_type = "node1"
        self._node2_type = "node2"

        self._edge1_type = "edge1"
        self._edge2_type = "edge2"
        self._edge3_type = "edge3"

        self._node1_range = (0, 100)
        self._node2_range = (100, 200)

        self._node1_ids = range(self._node1_range[0], self._node1_range[1])
        self._node2_ids = utils.fixed_dst_ids(self._node1_ids,
                                              self._node2_range)

        self._node1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        self._node2_decoder = gl.Decoder(weighted=True, labeled=True)
        self._edge1_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         labeled=True)
        self._edge2_decoder = gl.Decoder(attr_types=utils.ATTR_TYPES,
                                         weighted=True)
        self._edge3_decoder = gl.Decoder(weighted=True)

        self._seed_node1_ids = np.array([2, 7, 8])
        self._seed_node2_ids = np.array([102, 107, 108])

        self._seed_node1_ids_with_nbr_missing = np.array([5, 10, 110])
        self._seed_node2_ids_with_nbr_missing = np.array([102, 105, 108])
        # there has no edge whose src_id = 5 | 105

        self._default_dst_id = -1
        self._default_int_attr = 1000
        self._default_float_attr = 999.9
        self._default_string_attr = 'hehe'

        if self.needs_initial:
            self.initialize()
        if not self.g:
            time.sleep(1)
Пример #11
0
    def test_neg_using_gsl(self):
        """ Using gsl api.
    """
        gl.set_eager_mode(True)
        expand_factor = 6
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outNeg(self._edge1_type).sample(expand_factor).by("in_degree") \
          .emit()

        nodes = nbrs[1]

        for i, e in enumerate(ids):
            expected_ids = [iid for iid in self._node2_ids if \
                iid not in utils.fixed_dst_ids(e, self._node2_range)]
            utils.check_ids(nodes.ids[i], expected_ids)

        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
Пример #12
0
    def test_neg_using_gremlin(self):
        """ Using gremlin-like api.
    """
        expand_factor = 6
        ids = self._seed_node1_ids
        nbrs = self.g.V(self._node1_type, feed=ids) \
          .outNeg(self._edge1_type).sample(expand_factor).by("random") \
          .emit()

        nodes = nbrs[1]

        for i, e in enumerate(ids):
            expected_ids = [iid for iid in self._node2_ids \
                if iid not in utils.fixed_dst_ids(e, self._node2_range)]
            utils.check_ids(
                nodes.ids[i * expand_factor:(i + 1) * expand_factor],
                expected_ids)

        utils.check_node_type(nodes, node_type=self._node2_type)
        utils.check_node_shape(nodes, ids.size * expand_factor)
  def test_1hop_using_gremlin(self):
    """ Full neighbor sample with gremlin-like api.
    """
    expand_factor = 2
    ids = self._seed_node1_ids
    nbrs = self.g.V(self._node1_type, feed=ids) \
      .outE(self._edge1_type).sample().by("full") \
      .inV().emit()

    nodes = nbrs[2]

    index = 0
    for node in nodes:
      utils.check_sorted_equal(
          utils.fixed_dst_ids(ids[index], self._node2_range), node.ids)
      index += 1
    utils.check_node_ids(nodes, self._node2_ids)
    utils.check_node_type(nodes, node_type=self._node2_type)
    utils.check_node_weights(nodes)
    utils.check_node_labels(nodes)