def test_node_iterate_using_gsl(self):
        gl.set_eager_mode(True)
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.V('user').batch(batch_size).values()
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = g.run(query)
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0], self.value_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        query = g.V('user').batch(batch_size).shuffle().values()
        max_iter = 10
        for i in range(max_iter):
            nodes = g.run(query)
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
    def test_edge_shuffle(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.E('first').batch(batch_size).shuffle(
            traverse=True).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = sampler.next()
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        g.close()
示例#3
0
    def test_edge_iterate_using_gremlin(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

        batch_size = 4
        query = g.E('first').batch(batch_size).values()
        res_src = []
        res_dst = []
        max_iter = 100
        for i in range(max_iter):
            try:
                edges = g.run(query)
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        query = g.E('first').batch(batch_size).shuffle().values()
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = g.run(query)
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)
示例#4
0
 def test_sampling_with_mask_eager_mode(self):
     gl.set_eager_mode(True)
     bs = 8
     q = self.g.E(self._edge1_type, mask=gl.Mask.VAL).batch(bs).alias('val') \
               .each(
                 lambda e:
                   (e.outV().alias('src'),
                    e.inV().alias('dst') \
                     .outV(self._edge2_type).sample(3).by('topk').alias('nbr'))
               ).values(lambda x:
                  (x['src'].ids,
                   x['val'].labels,
                   x['dst'].ids, x['dst'].weights, x['dst'].labels,
                   x['nbr'].ids, x['nbr'].int_attrs))
     iteration = 0
     for i in range(2):
         res = []
         while True:
             try:
                 sid, elb, did, dwei, dlb, nid, ni = q.next()
                 utils.check_id_weights(did, dwei)
                 utils.check_equal(dlb, did)
                 iteration += 1
                 res += list(sid)
             except gl.OutOfRangeError:
                 break
         whole = range(self._val_node_range[0], self._val_node_range[1])
         expected = []
         for elem in whole:
             expected += [elem] * len(
                 utils.fixed_dst_ids(elem, self._node2_range))
         utils.check_sorted_equal(res, expected)
示例#5
0
    def test_node_iterate(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED])
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .node(source=file_path, node_type=self.node_type_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.node_sampler('user',
                                 batch_size=batch_size,
                                 strategy="by_order")
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_attrs(nodes)
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.value_range_[0][0], self.value_range_[0][1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('user',
                                 batch_size=batch_size,
                                 strategy="random")
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_node_attrs(nodes)
            utils.check_subset(nodes.ids, ids)

        g.close()
    def test_node_iterate_from_graph(self):
        file_path = self.gen_test_data([utils.ATTRIBUTED], False)
        decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order",
                                 node_from=gl.EDGE_SRC)
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_type(nodes, "user")
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.src_range_[0], self.src_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random",
                                 node_from=gl.EDGE_SRC)
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_subset(nodes.ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order",
                                 node_from=gl.EDGE_DST)
        res_ids = []
        max_iter = 100
        for i in range(max_iter):
            try:
                nodes = sampler.get()
                utils.check_node_type(nodes, "item")
                res_ids.extend(list(nodes.ids))
            except gl.OutOfRangeError:
                break
        ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_ids, ids)

        sampler = g.node_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random",
                                 node_from=gl.EDGE_DST)
        max_iter = 10
        for i in range(max_iter):
            nodes = sampler.get()
            utils.check_subset(nodes.ids, ids)

        g.close()
    def test_1hop(self):
        """ Sample full neighbors.
    """
        ids = self._seed_node1_ids
        nbr_s = self.g.neighbor_sampler(self._edge1_type, 1, strategy="full")
        nbrs = nbr_s.get(ids)
        edges = nbrs.layer_edges(1)
        nodes = nbrs.layer_nodes(1)

        index = 0
        for node in nodes:
            utils.check_sorted_equal(
                utils.fixed_dst_ids(ids[index], self._node2_range), node.ids)
            index += 1
  def test_1hop_using_gremlin(self):
    """ Full neighbor sample with gremlin-like api.
    """
    expand_factor = 2
    ids = self._seed_node1_ids
    nbrs = self.g.V(self._node1_type, feed=ids) \
      .outE(self._edge1_type).sample().by("full") \
      .inV().emit()

    nodes = nbrs[2]

    index = 0
    for node in nodes:
      utils.check_sorted_equal(
          utils.fixed_dst_ids(ids[index], self._node2_range), node.ids)
      index += 1
    utils.check_node_ids(nodes, self._node2_ids)
    utils.check_node_type(nodes, node_type=self._node2_type)
    utils.check_node_weights(nodes)
    utils.check_node_labels(nodes)
示例#9
0
 def test_traverse_with_mask(self):
   bs = 8
   q = self.g.V(self._node1_type, mask=gl.Mask.TEST).batch(bs).alias('test') \
           .values(lambda x:
              (x['test'].ids, x['test'].int_attrs, x['test'].float_attrs, x['test'].string_attrs))
   dataset = gl.Dataset(q)
   iteration = 0
   for i in range(2):
     res = []
     while True:
       try:
         ids, i, f, s = dataset.next()
         utils.check_i_attrs(i, ids)
         utils.check_f_attrs(f, ids)
         utils.check_s_attrs(s, ids)
         iteration += 1
         res += list(ids)
       except gl.OutOfRangeError:
         break
     utils.check_sorted_equal(res, range(self._test_node_range[0], self._test_node_range[1]))
示例#10
0
  def test_node_iterate(self):
    file_path = self.gen_test_data([utils.ATTRIBUTED])
    decoder = gl.Decoder(attr_types=utils.ATTR_TYPES)
    g = gl.Graph() \
      .node(source=file_path, node_type=self.node_type_, decoder=decoder)
    g.init(server_id=0, server_count=1, tracker=utils.TRACKER_PATH)

    batch_size = 4
    sampler = g.V('user').batch(batch_size).shuffle(traverse=True).values()
    res_ids = []
    max_iter = 100
    for i in range(max_iter):
      try:
        nodes = sampler.next()
        utils.check_node_attrs(nodes)
        res_ids.extend(list(nodes.ids))
      except gl.OutOfRangeError:
        break
    ids = range(self.value_range_[0][0], self.value_range_[0][1])
    utils.check_sorted_equal(res_ids, ids)
示例#11
0
 def test_traverse_with_mask_eager_mode(self):
     gl.set_eager_mode(True)
     bs = 8
     q = self.g.V(self._node1_type, mask=gl.Mask.TRAIN).batch(bs).alias('train') \
             .values(lambda x:
                (x['train'].ids, x['train'].int_attrs, x['train'].float_attrs, x['train'].string_attrs))
     iteration = 0
     res = []
     while True:
         try:
             ids, i, f, s = q.next()
             utils.check_i_attrs(i, ids)
             utils.check_f_attrs(f, ids)
             utils.check_s_attrs(s, ids)
             iteration += 1
             res += list(ids)
         except gl.OutOfRangeError:
             break
     utils.check_sorted_equal(
         res, range(self._train_node_range[0], self._train_node_range[1]))
示例#12
0
    def test_edge_iterate(self):
        file_path = self.gen_test_data([utils.WEIGHTED], False)
        decoder = gl.Decoder(weighted=True)
        g = gl.Graph() \
          .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder)
        g.init(tracker=utils.TRACKER_PATH)

        batch_size = 4
        sampler = g.edge_sampler('first',
                                 batch_size=batch_size,
                                 strategy="by_order")
        res_src = []
        res_dst = []
        max_iter = 100
        for _ in range(max_iter):
            try:
                edges = sampler.get()
                utils.check_edge_weights(edges)
                res_src.extend(list(edges.src_ids))
                res_dst.extend(list(edges.dst_ids))
            except gl.OutOfRangeError:
                break
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        utils.check_sorted_equal(res_src, src_ids)
        utils.check_sorted_equal(res_dst, dst_ids)

        sampler = g.edge_sampler('first',
                                 batch_size=batch_size,
                                 strategy="random")
        max_iter = 10
        src_ids = range(self.src_range_[0], self.src_range_[1])
        dst_ids = range(self.dst_range_[0], self.dst_range_[1])
        for i in range(max_iter):
            edges = sampler.get()
            utils.check_edge_weights(edges)
            utils.check_subset(edges.src_ids, src_ids)
            utils.check_subset(edges.dst_ids, dst_ids)

        g.close()