def test_sampling_with_mask_eager_mode(self): gl.set_eager_mode(True) bs = 8 q = self.g.E(self._edge1_type, mask=gl.Mask.VAL).batch(bs).alias('val') \ .each( lambda e: (e.outV().alias('src'), e.inV().alias('dst') \ .outV(self._edge2_type).sample(3).by('topk').alias('nbr')) ).values(lambda x: (x['src'].ids, x['val'].labels, x['dst'].ids, x['dst'].weights, x['dst'].labels, x['nbr'].ids, x['nbr'].int_attrs)) iteration = 0 for i in range(2): res = [] while True: try: sid, elb, did, dwei, dlb, nid, ni = q.next() utils.check_id_weights(did, dwei) utils.check_equal(dlb, did) iteration += 1 res += list(sid) except gl.OutOfRangeError: break whole = range(self._val_node_range[0], self._val_node_range[1]) expected = [] for elem in whole: expected += [elem] * len( utils.fixed_dst_ids(elem, self._node2_range)) utils.check_sorted_equal(res, expected)
def test_1hop_using_gsl(self): """ Test case for sample 1 hop neighbor. hetegerous graph with edge attrs, without edge weight. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("edge_weight") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_1hop_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("in_degree") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_2hop_using_gsl_with_undirected_edge_homo(self): """ Using gsl api and sample neighbor on undirected edges whose source node and dst node has same type. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node2_ids def repeat_fn(q, params): return q.outE(params[0]).sample(params[1]).by("random").inV() nbrs = self.g.V(self._node2_type, feed=ids) \ .repeat(repeat_fn, 2, params_list=[(self._edge3_type, 3), (self._edge3_type, 2)]) \ .emit() edges1 = nbrs[1] nodes1 = nbrs[2] edges2 = nbrs[3] nodes2 = nbrs[4] n = expand_factor[0] * expand_factor[1] for i in range(0, ids.size * 3): for dst_id in nodes2.ids.flatten()[i:i + 2]: src_id = nodes1.ids.flatten()[i] out_id = utils.fixed_dst_ids(src_id, self._node2_range) in_id = utils.fixed_dst_ids(dst_id, self._node2_range) utils.check_ids(src_id, out_id + in_id)
def test_2hop_using_gsl_with_undirected_edge(self): """ Using gsl api and sample neighbor on undirected edges whose source node and dst node has defferent type. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor[0]).by("random") \ .inV() \ .inE(self._edge1_type).sample(expand_factor[1]).by("random") \ .inV().emit() edges1 = nbrs[1] nodes1 = nbrs[2] edges2 = nbrs[3] nodes2 = nbrs[4] utils.check_fixed_edge_dst_ids(edges1, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges1, self._node1_type, self._node2_type, self._edge1_type) utils.check_edge_type(edges2, self._node2_type, self._node1_type, self._edge1_type + "_reverse") utils.check_node_type(nodes1, self._node2_type) utils.check_node_type(nodes2, self._node1_type) utils.check_node_ids(nodes2, self._node1_ids)
def test_node_iterate_using_gsl(self): gl.set_eager_mode(True) file_path = self.gen_test_data([utils.ATTRIBUTED]) decoder = gl.Decoder(attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .node(source=file_path, node_type=self.node_type_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 query = g.V('user').batch(batch_size).values() res_ids = [] max_iter = 100 for i in range(max_iter): try: nodes = g.run(query) utils.check_node_attrs(nodes) res_ids.extend(list(nodes.ids)) except gl.OutOfRangeError: break ids = range(self.value_range_[0], self.value_range_[1]) utils.check_sorted_equal(res_ids, ids) query = g.V('user').batch(batch_size).shuffle().values() max_iter = 10 for i in range(max_iter): nodes = g.run(query) utils.check_node_attrs(nodes) utils.check_subset(nodes.ids, ids) g.close()
def test_2hop(self): """ Sample 2 hops of neighbors. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler([self._edge1_type, self._edge2_type], expand_factor=expand_factor, strategy="in_degree") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor[0]) utils.check_edge_attrs(edges) utils.check_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor[0]) utils.check_node_weights(nodes) utils.check_node_labels(nodes) ids = nodes.ids.reshape(-1) edges = nbrs.layer_edges(2) nodes = nbrs.layer_nodes(2) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node1_range, expected_src_ids=ids) utils.check_edge_type(edges, src_type=self._node2_type, dst_type=self._node1_type, edge_type=self._edge2_type) utils.check_edge_shape(edges, ids.size * expand_factor[1]) utils.check_edge_attrs(edges) utils.check_edge_weights(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, self._node1_ids) utils.check_node_type(nodes, node_type=self._node1_type) utils.check_node_shape(nodes, ids.size * expand_factor[1])
def test_basic(self): gl.set_eager_mode(True) file_path = self.gen_test_data([], False) decoder = gl.Decoder() gl.set_eager_mode(True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) edges = g.E("first").batch(4).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) g.close()
def test_neg_using_gsl(self): """ Using gsl api. """ import graphlearn as gl gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node2_type, feed=ids) \ .Neg(self._node2_type).sample(expand_factor).by("node_weight") \ .emit() nodes = nbrs[1] utils.check_ids(nodes.ids, [i for i in range(100, 200) if i not in ids]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_weighted(self): gl.set_eager_mode(True) file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) edges = g.E("first").batch(self.batch_size_).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) utils.check_edge_weights(edges) g.close()
def test_1hop_circular_padding(self): """ Sample one hop of neighbors. """ gl.set_eager_mode(True) gl.set_padding_mode(gl.CIRCULAR) expand_factor = 6 ids = self._seed_node1_ids nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="random_without_replacement") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) for iid, nbrs in zip(ids, nodes.ids): full_nbrs = utils.fixed_dst_ids(iid, (100, 200)) utils.check_set_equal(nbrs, full_nbrs)
def test_1hop_using_gsl(self): """ Topk neighbor sample with gsl api. """ gl.set_eager_mode(True) gl.set_padding_mode(gl.REPLICATE) ids = self._seed_node2_ids nbrs = self.g.V(self._node2_type, feed=ids) \ .outE(self._edge2_type).sample(2).by("topk") \ .inV().emit() edges = nbrs[1] utils.check_topk_edge_ids(edges, ids, (0, 100), expand_factor=2, default_dst_id=self._default_dst_id) utils.check_half_exist_edge_weights( edges, default_dst_id=self._default_dst_id)
def test_1hop_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) gl.set_padding_mode(gl.REPLICATE) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("random_without_replacement") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] for iid, nbrs in zip(ids, nodes.ids): full_nbrs = utils.fixed_dst_ids(iid, (100, 200)) full_nbrs.extend([-1]) utils.check_set_equal(nbrs, full_nbrs)
def test_labeled_attributed(self): gl.set_eager_mode(True) file_path = self.gen_test_data([utils.LABELED, utils.ATTRIBUTED], False) decoder = gl.Decoder(labeled=True, attr_types=utils.ATTR_TYPES) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) edges = g.E("first").batch(self.batch_size_).emit() utils.check_ids(edges.src_ids, range(self.src_range_[0], self.src_range_[1])) utils.check_ids(edges.dst_ids, range(self.dst_range_[0], self.dst_range_[1])) utils.check_edge_labels(edges) utils.check_edge_attrs(edges) g.close()
def test_neg_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outNeg(self._edge1_type).sample(expand_factor).by("in_degree") \ .emit() nodes = nbrs[1] for i, e in enumerate(ids): expected_ids = [iid for iid in self._node2_ids if \ iid not in utils.fixed_dst_ids(e, self._node2_range)] utils.check_ids(nodes.ids[i], expected_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor)
def test_traverse_with_mask_eager_mode(self): gl.set_eager_mode(True) bs = 8 q = self.g.V(self._node1_type, mask=gl.Mask.TRAIN).batch(bs).alias('train') \ .values(lambda x: (x['train'].ids, x['train'].int_attrs, x['train'].float_attrs, x['train'].string_attrs)) iteration = 0 res = [] while True: try: ids, i, f, s = q.next() utils.check_i_attrs(i, ids) utils.check_f_attrs(f, ids) utils.check_s_attrs(s, ids) iteration += 1 res += list(ids) except gl.OutOfRangeError: break utils.check_sorted_equal( res, range(self._train_node_range[0], self._train_node_range[1]))
def test_1hop_using_gsl(self): """ Full neighbor sample with gsl api. """ gl.set_eager_mode(True) expand_factor = 0 ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor).by("full") \ .inV().emit() nodes = nbrs[2] index = 0 for node in nodes: utils.check_sorted_equal( utils.fixed_dst_ids(ids[index], self._node2_range), node.ids) index += 1 utils.check_node_ids(nodes, self._node2_ids) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_weights(nodes) utils.check_node_labels(nodes)
def test_2hop_using_gsl(self): """ Test case for sample 2 hop neighbor with strategy of edge_weight. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor[0]).by("edge_weight") \ .inV() \ .outE(self._edge2_type).sample(expand_factor[1]).by("edge_weight") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) ids = nodes.ids.reshape(-1) edges = nbrs[3] nodes = nbrs[4] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node1_range, expected_src_ids=ids)
def test_1hop_with_neighbor_missing(self): """ Sample neighbors for nodes which have no out neighbors, and get the default neighbor id. """ gl.set_eager_mode(True) expand_factor = 6 ids = self._seed_node1_ids_with_nbr_missing nbr_s = self.g.neighbor_sampler(self._edge1_type, expand_factor=expand_factor, strategy="random") nbrs = nbr_s.get(ids) edges = nbrs.layer_edges(1) nodes = nbrs.layer_nodes(1) utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids, default_dst_id=self._default_dst_id) utils.check_edge_type(edges, src_type=self._node1_type, dst_type=self._node2_type, edge_type=self._edge1_type) utils.check_edge_shape(edges, ids.size * expand_factor) utils.check_not_exist_edge_attrs( edges, default_int_attr=self._default_int_attr, default_float_attr=self._default_float_attr, default_string_attr=self._default_string_attr, ) utils.check_not_exist_edge_labels(edges) utils.check_equal(nodes.ids, edges.dst_ids) utils.check_node_ids(nodes, [self._default_dst_id]) utils.check_node_type(nodes, node_type=self._node2_type) utils.check_node_shape(nodes, ids.size * expand_factor) utils.check_not_exist_node_weights(nodes) utils.check_not_exist_node_labels(nodes)
def test_edge_iterate_using_gsl(self): gl.set_eager_mode(True) file_path = self.gen_test_data([utils.WEIGHTED], False) decoder = gl.Decoder(weighted=True) g = gl.Graph() \ .edge(source=file_path, edge_type=self.edge_tuple_, decoder=decoder) g.init(tracker=utils.TRACKER_PATH) batch_size = 4 query = g.E('first').batch(batch_size).values() res_src = [] res_dst = [] max_iter = 100 for i in range(max_iter): try: edges = g.run(query) utils.check_edge_weights(edges) res_src.extend(list(edges.src_ids)) res_dst.extend(list(edges.dst_ids)) except gl.OutOfRangeError: break src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) utils.check_sorted_equal(res_src, src_ids) utils.check_sorted_equal(res_dst, dst_ids) query = g.E('first').batch(batch_size).shuffle().values() max_iter = 10 src_ids = range(self.src_range_[0], self.src_range_[1]) dst_ids = range(self.dst_range_[0], self.dst_range_[1]) for i in range(max_iter): edges = g.run(query) utils.check_edge_weights(edges) utils.check_subset(edges.src_ids, src_ids) utils.check_subset(edges.dst_ids, dst_ids) g.close()
def test_2hop_using_gsl(self): """ Using gsl api. """ gl.set_eager_mode(True) expand_factor = [3, 2] ids = self._seed_node1_ids nbrs = self.g.V(self._node1_type, feed=ids) \ .outE(self._edge1_type).sample(expand_factor[0]).by("in_degree") \ .inV() \ .outE(self._edge2_type).sample(expand_factor[1]).by("in_degree") \ .inV().emit() edges = nbrs[1] nodes = nbrs[2] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node2_range, expected_src_ids=ids) ids = nodes.ids.reshape(-1) edges = nbrs[3] nodes = nbrs[4] utils.check_fixed_edge_dst_ids(edges, dst_range=self._node1_range, expected_src_ids=ids)
def test_query_not_exist_gsl(self): gl.set_eager_mode(True) nodes = self.g.V(self.node_type_, feed=self.not_exist_ids_) \ .emit() self.check_not_exist_attrs(nodes)
def test_1hop_with_agg(self): gl.set_eager_mode(True) ids = self._seed_node2_ids res = self.g.V(self._node2_type, feed=ids).outV( self._edge2_type).sample().by("full").emit() print(res[1].embedding_agg(func="sum"))