def gremlin(self, graph): """Get a interactive engine handler to execute gremlin queries. Args: graph: :class:`Graph` Raises: InvalidArgumentError: :code:`graph` is not a property graph or unloaded. Returns: :class:`InteractiveQuery` """ if (graph.vineyard_id in self._interactive_instance_dict and self._interactive_instance_dict[graph.vineyard_id] is not None): return self._interactive_instance_dict[graph.vineyard_id] if not graph.loaded(): raise InvalidArgumentError("The graph has already been unloaded") if not graph.graph_type == types_pb2.ARROW_PROPERTY: raise InvalidArgumentError("The graph should be a property graph.") from graphscope.interactive.query import InteractiveQuery response = self._grpc_client.create_interactive_engine( graph.vineyard_id, graph.schema_path) interactive_query = InteractiveQuery( graphscope_session=self, object_id=graph.vineyard_id, front_ip=response.frontend_host, front_port=response.frontend_port, ) self._interactive_instance_dict[graph.vineyard_id] = interactive_query graph.attach_interactive_instance(interactive_query) return interactive_query
def _not_compatible_for(not_compatible_for_func, *args, **kwargs): graph = args[0] if not hasattr(graph, "graph_type"): raise InvalidArgumentError( "Missing graph_type attribute in graph object.") terms = { "arrow_property": graph.graph_type == types_pb2.ARROW_PROPERTY, "dynamic_property": graph.graph_type == types_pb2.DYNAMIC_PROPERTY, "arrow_projected": graph.graph_type == types_pb2.ARROW_PROJECTED, "dynamic_projected": graph.graph_type == types_pb2.DYNAMIC_PROJECTED, } match = False try: for t in graph_types: match = match or terms[t] except KeyError: raise InvalidArgumentError( "Use one or more of arrow_property,dynamic_property,arrow_projected,dynamic_projected", ) if match: raise InvalidArgumentError("Not compatible for %s type" % " ".join(graph_types)) else: return not_compatible_for_func(*args, **kwargs)
def load_app(gar=None, algo=None, context=None, **kwargs): """Load an app from gar. bytes or the resource of the specified path or bytes. Args: algo: str Algo name inside resource. None will extract name from gar resource if there is only one app in it. gar: bytes or BytesIO or str str represent the path of resource. for java apps, gar can be none to indicate we should find the app in previouse added libs. Returns: Instance of <graphscope.framework.app.AppAssets> Raises: FileNotFoundError: File not exist. PermissionError: Permission denied of path. TypeError: File is not a zip file. Examples: >>> sssp = load_app(gar='./resource.gar', algo='sssp') >>> sssp(src=4) which will have following `.gs_conf.yaml` in resource.gar: app: - algo: sssp type: cpp_pie class_name: grape:SSSP context_type: vertex_data src: sssp/sssp.h compatible_graph: - gs::ArrowProjectedFragment """ if isinstance(gar, (BytesIO, bytes)): return AppAssets(algo, context, gar, **kwargs) elif isinstance(gar, str): with open(gar, "rb") as f: content = f.read() if not zipfile.is_zipfile(gar): raise InvalidArgumentError("{} is not a zip file.".format(gar)) return AppAssets(algo, context, content, **kwargs) elif isinstance(algo, str) and algo.startswith("giraph:"): if gar is not None: raise InvalidArgumentError( "Running giraph app expect no gar resource") return AppAssets(algo, "vertex_data", None, **kwargs) else: raise InvalidArgumentError("Wrong type with {}".format(gar))
def gremlin(self, graph, engine_params=None): """Get a interactive engine handler to execute gremlin queries. Args: graph (:class:`Graph`): Use the graph to create interactive instance. engine_params (dict, optional): Configure startup parameters of interactive engine. See a list of configurable keys in `interactive_engine/deploy/docker/dockerfile/executor.vineyard.properties` Raises: InvalidArgumentError: :code:`graph` is not a property graph or unloaded. Returns: :class:`InteractiveQuery` """ if (graph.vineyard_id in self._interactive_instance_dict and self._interactive_instance_dict[graph.vineyard_id] is not None): return self._interactive_instance_dict[graph.vineyard_id] if not graph.loaded(): raise InvalidArgumentError("The graph has already been unloaded") if not graph.graph_type == types_pb2.ARROW_PROPERTY: raise InvalidArgumentError("The graph should be a property graph.") if engine_params is not None: engine_params = { str(key): str(value) for key, value in engine_params.items() } else: engine_params = {} from graphscope.interactive.query import InteractiveQuery response = self._grpc_client.create_interactive_engine( object_id=graph.vineyard_id, schema_path=graph.schema_path, gremlin_server_cpu=gs_config.k8s_gie_gremlin_server_cpu, gremlin_server_mem=gs_config.k8s_gie_gremlin_server_mem, engine_params=engine_params, ) interactive_query = InteractiveQuery( graphscope_session=self, object_id=graph.vineyard_id, front_ip=response.frontend_host, front_port=response.frontend_port, ) self._interactive_instance_dict[graph.vineyard_id] = interactive_query graph.attach_interactive_instance(interactive_query) return interactive_query
def _check_selector(self, selector): """ Raises: InvalidArgumentError: - Selector in vertex data context is None SyntaxError: - The syntax of selector is incorrect NotImplementedError: - Selector of e not supported """ if selector is None: raise InvalidArgumentError( "Selector in vertex data context cannot be None") segments = selector.split(".") err_msg = f"Invalid selector: `{selector}`. " err_msg += ( "Please inspect the result with `ret.schema` and choose a valid selector." ) if segments[0] == "v": if selector not in ("v.id", "v.data"): raise SyntaxError(err_msg) elif segments[0] == "e": raise NotImplementedError("Selector of e is not supported yet") if selector not in ("e.src", "e.dst", "e.data"): raise SyntaxError(err_msg) elif segments[0] == "r": if selector != "r": raise SyntaxError(err_msg) else: raise SyntaxError(err_msg) return True
def _check_selector(self, selector): """ Raises: InvalidArgumentError: - Selector in labeled vertex data context is None SyntaxError: - The syntax of selector is incorrect NotImplementedError: - Selector of e not supported """ if selector is None: raise InvalidArgumentError( "Selector in labeled vertex data context cannot be None") segments = selector.split(":") err_msg = f"Invalid selector: `{selector}`. " err_msg += ( "Please inspect the result with `ret.schema` and choose a valid selector." ) if len(segments) != 2: raise SyntaxError(err_msg) stype, segments = segments[0], segments[1] segments = segments.split(".") if stype == "v": if len(segments) != 2: raise SyntaxError(err_msg) elif stype == "e": raise NotImplementedError("Selector of e not supported yet") elif stype == "r": if len(segments) != 1: raise SyntaxError(err_msg) else: raise SyntaxError(err_msg) return True
def _check_selector(self, selector): """ Raises: InvalidArgumentError: - Selector in labeled vertex data context is None SyntaxError: - The syntax of selector is incorrect NotImplementedError: - Selector of e not supported """ if selector is None: raise InvalidArgumentError( "Selector in vertex property context cannot be None") segments = selector.split(".") err_msg = f"Invalid selector: `{selector}`. " err_msg += ( "Please inspect the result with `ret.schema` and choose a valid selector." ) if len(segments) != 2: raise SyntaxError(err_msg) if segments[0] == "v": if selector not in ("v.id", "v.data", "v.label_id"): raise SyntaxError(err_msg) elif segments[0] == "e": raise NotImplementedError("Selector of e not supported yet") elif segments[0] == "r": # The second part of selector or r is user defined name. # So we will allow any str pass else: raise SyntaxError(err_msg) return True
def __init__(self, algo, context=None, gar=None): """Init assets of the algorithm. Args: algo (str): Represent specific algo inside resource. context (str): Type of context that hold the calculation results. It will get from gar if param is None. Defaults to None. gar (bytes or BytesIO, optional): The bytes that encodes the application's source code. Defaults to None. """ self._algo = algo self._context_type = context if isinstance(self._algo, str) and "giraph:" in self._algo: self._type = "java_pie" else: self._type = "cpp_pie" # default is builtin app with `built_in` type self._meta = {} # used for gar resource if gar and isinstance(gar, (BytesIO, bytes)): self._gar = gar if isinstance(gar, bytes) else gar.getvalue() self._extract_meta_info() else: # built_in apps has no gar resource. self._gar = None if self._context_type not in self._support_context_type: raise InvalidArgumentError( "Unsupport context type: {0}".format(self._context_type) ) self._op = create_app(self)
def resolve_src_dst_value(value: Union[int, str, Tuple[Union[int, str], str]]): """Resolve the edge's source and destination. Args: value (Union[int, str, Tuple[Union[int, str], str]]): 1. a int, represent vid id. a str, represent vid name 2. a ([int/str], str). former represents vid, latter represents label Raises: SyntaxError: If the format is incorrect. """ if isinstance(value, (int, str)): check_argument( isinstance(value, int) or (isinstance(value, str) and not value.isdecimal()), "Column name cannot be decimal", ) return value, "" elif isinstance(value, Sequence): check_argument(len(value) == 2) check_argument( isinstance(value[0], int) or (isinstance(value[0], str) and not value[0].isdecimal()), "Column name cannot be decimal", ) check_argument(isinstance(value[1], str), "Label must be str") return value[0], value[1] else: raise InvalidArgumentError( "Source / destination format incorrect. Expect vid or [vid, source_label]" )
def learning(self, graph, nodes=None, edges=None, gen_labels=None): """Start a graph learning engine. Args: nodes (list): The node types that will be used for gnn training. edges (list): The edge types that will be used for gnn training. gen_labels (list): Extra node and edge labels on original graph for gnn training. Returns: `graphscope.learning.Graph`: An instance of `graphscope.learning.Graph` that could be feed to the learning engine. """ if ( graph.vineyard_id in self._learning_instance_dict and self._learning_instance_dict[graph.vineyard_id] is not None ): return self._learning_instance_dict[graph.vineyard_id] if sys.platform != "linux" and sys.platform != "linux2": raise RuntimeError( "The learning engine currently supports Linux only, doesn't support %s" % sys.platform ) if not graph.loaded(): raise InvalidArgumentError("The graph has already been unloaded") if not graph.graph_type == types_pb2.ARROW_PROPERTY: raise InvalidArgumentError("The graph should be a property graph.") from graphscope.learning.graph import Graph as LearningGraph handle = self._get_gl_handle(graph) config = LearningGraph.preprocess_args(handle, nodes, edges, gen_labels) config = base64.b64encode(json.dumps(config).encode("utf-8")).decode("utf-8") endpoints = self._grpc_client.create_learning_engine( graph.vineyard_id, handle, config ) handle = json.loads(base64.b64decode(handle.encode("utf-8")).decode("utf-8")) handle["server"] = endpoints handle["client_count"] = 1 learning_graph = LearningGraph(handle, config, graph.vineyard_id, self) self._learning_instance_dict[graph.vineyard_id] = learning_graph graph.attach_learning_instance(learning_graph) return learning_graph
def load_app(algo, gar=None, **kwargs): """Load an app from gar. bytes orthe resource of the specified path or bytes. Args: algo: str Algo name inside resource. gar: bytes or BytesIO or str str represent the path of resource. Returns: Instance of <graphscope.AppAssets> Raises: FileNotFoundError: File not exist. PermissionError: Permission denied of path. TypeError: File is not a zip file. Examples: >>> sssp = load_app('sssp', gar='./resource.gar') >>> sssp(src=4) which will have following `.gs_conf.yaml` in resource.gar: app: - algo: sssp type: cpp_pie class_name: grape:SSSP src: sssp/sssp.h compatible_graph: - gs::ArrowProjectedFragment """ if isinstance(gar, (BytesIO, bytes)): return AppAssets(str(algo), gar, **kwargs) elif isinstance(gar, str): with open(gar, "rb") as f: content = f.read() if not zipfile.is_zipfile(gar): raise InvalidArgumentError("{} is not a zip file.".format(gar)) return AppAssets(str(algo), content, **kwargs) else: raise InvalidArgumentError("Wrong type with {}".format(gar))
def is_compatible(self, graph): """Determine if this algorithm can run on this type of graph. Args: graph (:class:`Graph`): A graph instance. Raises: InvalidArgumentError: - :code:`gs_conf.yaml` not exist in gar resource. - App is not compatible with graph or - Algo not found in gar resource. ScannerError: - Yaml file format is incorrect. """ if not isinstance( graph, ( graphscope.framework.graph.Graph, graphscope.experimental.nx.classes.graph.Graph, graphscope.experimental.nx.classes.digraph.DiGraph, ), ): raise InvalidArgumentError("Wrong type of graph.") # builtin app if self._gar is None: self._type = "cpp_pie" return # check yaml file fp = BytesIO(self._gar) archive = zipfile.ZipFile(fp, "r") config = yaml.safe_load(archive.read(DEFAULT_GS_CONFIG_FILE)) # check the compatibility with graph for application in config["app"]: if self._algo == application["algo"]: self._type = application["type"] graph_type = graph_type_to_cpp_class(graph.graph_type) if graph_type not in application["compatible_graph"]: raise InvalidArgumentError( "App is uncompatible with graph {}".format(graph_type)) return True raise InvalidArgumentError("App not found in gar: {}".format( self._algo))
def __call__(self, graph: Graph, *args, **kwargs): kwargs_extend = dict(app_class=self.java_app_class, **kwargs) if not hasattr(graph, "graph_type"): raise InvalidArgumentError("Missing graph_type attribute in graph object.") if ( self.java_app_type.find("simple") != -1 and graph.graph_type == graph_def_pb2.ARROW_PROPERTY ): graph = graph._project_to_simple() app_ = graph.session._wrapper(JavaAppDagNode(graph, self)) return app_(*args, **kwargs_extend)
def create_context(context_type, session_id, context_key, graph): """A context factory, create concrete context class by context_type.""" if context_type == "tensor": return TensorContext(session_id, context_key, graph) if context_type == "vertex_data": return VertexDataContext(session_id, context_key, graph) elif context_type == "labeled_vertex_data": return LabeledVertexDataContext(session_id, context_key, graph) elif context_type == "vertex_property": return VertexPropertyContext(session_id, context_key, graph) elif context_type == "labeled_vertex_property": return LabelVertexPropertyContext(session_id, context_key, graph) else: raise InvalidArgumentError("Not supported context type: " + context_type)
def wrapper(*args, **kwargs): graph = args[0] if not hasattr(graph, "graph_type"): raise InvalidArgumentError( "Missing graph_type attribute in graph object.") if graph.graph_type == graph_def_pb2.ARROW_PROPERTY: if "weight" in kwargs: # func has 'weight' argument weight = kwargs.get("weight", None) graph = graph._project_to_simple(e_prop=weight) elif "attribute" in kwargs: # func has 'attribute' argument attribute = kwargs.get("attribute", None) graph = graph._project_to_simple(v_prop=attribute) else: graph = graph._project_to_simple() return func(graph, *args[1:], **kwargs)
def louvain(graph, min_progress=1000, progress_tries=1): """Compute best partition on the `graph` by louvain. Args: graph (:class:`graphscope.Graph`): A simple undirected graph. min_progress: The minimum delta X required to be considered progress, where X is the number of nodes that have changed their community on a particular pass. Delta X is then the difference in number of nodes that changed communities on the current pass compared to the previous pass. progress_tries: number of times the min_progress setting is not met before exiting form the current level and compressing the graph. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with id of community it belongs to, evaluated in eager mode. References: [1] Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008). [2] https://github.com/Sotera/distributed-graph-analytics [3] https://sotera.github.io/distributed-graph-analytics/louvain/ Notes: louvain now only support undirected graph. If input graph is directed graph, louvain would raise an InvalidArgumentError. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess, directed=False) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.louvain(pg, min_progress=1000, progress_tries=1) >>> sess.close() """ if graph.is_directed(): raise InvalidArgumentError("Louvain not support directed graph.") return AppAssets(algo="louvain", context="vertex_data")(graph, min_progress, progress_tries)
def wrapper(*args, **kwargs): graph = args[0] if not hasattr(graph, "graph_type"): raise InvalidArgumentError( "Unsupported graph to project to simple.") elif graph.graph_type in ( graph_def_pb2.DYNAMIC_PROPERTY, graph_def_pb2.ARROW_PROPERTY, ): weight = None attribute = None if "attribute" in inspect.getfullargspec(func)[0]: attribute = kwargs.get("attribute", None) if "weight" in inspect.getfullargspec(func)[0]: # func has 'weight' argument weight = kwargs.get("weight", None) graph = graph._project_to_simple(v_prop=attribute, e_prop=weight) return func(graph, *args[1:], **kwargs)
def _parse_value(self, op, response: message_pb2.RunStepResponse): # attach an output to op, indicating the op is already run. op.set_output(response.metrics) # if loads a arrow property graph, will return {'object_id': xxxx} if op.output_types == types_pb2.GRAPH: return response.graph_def if op.output_types == types_pb2.APP: return response.result.decode("utf-8") if op.output_types in ( types_pb2.RESULTS, types_pb2.VINEYARD_TENSOR, types_pb2.VINEYARD_DATAFRAME, ): return response.result.decode("utf-8") if op.output_types in (types_pb2.TENSOR, types_pb2.DATAFRAME): return response.result else: raise InvalidArgumentError("Not recognized output type: %s", op.output_types)
def is_compatible(self, graph): """Determine if this algorithm can run on this type of graph. Args: graph (:class:`GraphDAGNode`): A graph instance. Raises: InvalidArgumentError: - App is not compatible with graph ScannerError: - Yaml file format is incorrect. """ # builtin app if self._gar is None: return # check yaml file graph_type = graph_type_to_cpp_class(graph.graph_type) if graph_type not in self._meta["compatible_graph"]: raise InvalidArgumentError( "App is uncompatible with graph {}".format(graph_type)) return True
def _extract_meta_info(self): """Extract app meta info from gar resource. Raises: InvalidArgumentError: - :code:`gs_conf.yaml` not exist in gar resource. - App not found in gar resource. """ fp = BytesIO(self._gar) archive = zipfile.ZipFile(fp, "r") config = yaml.safe_load(archive.read(DEFAULT_GS_CONFIG_FILE)) # default app will used if there is only one app in it if self._algo is None and len(config["app"]) == 1: self._algo = config["app"][0]["algo"] logger.info("Default app %s will be used.", self._algo) for meta in config["app"]: if self._algo == meta["algo"]: if "context_type" in meta: self._context_type = meta["context_type"] self._type = meta["type"] self._meta = meta return raise InvalidArgumentError("App not found in gar: {}".format(self._algo))
def louvain(graph, min_progress=1000, progress_tries=1): """Compute best partition on the `graph` by louvain. Args: graph (:class:`Graph`): A projected simple graph. min_progress: The minimum delta X required to be considered progress, where X is the number of nodes that have changed their community on a particular pass. Delta X is then the difference in number of nodes that changed communities on the current pass compared to the previous pass. progress_tries: number of times the min_progress setting is not met before exiting form the current level and compressing the graph. Returns: :class:`VertexDataContext`: A context with each vertex assigned with id of community it belongs to. References: .. [1] Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008). .. [2] https://github.com/Sotera/distributed-graph-analytics .. [3] https://sotera.github.io/distributed-graph-analytics/louvain/ Examples: .. code:: python import graphscope as gs s = gs.session() g = s.load_from('The parameters for loading a graph...') pg = g.project_to_simple(v_label='vlabel', e_label='elabel', v_prop=None, e_prop='weight') r = gs.louvain(pg) s.close() """ if graph.is_directed(): raise InvalidArgumentError("Louvain not support directed graph.") return AppAssets(algo="louvain")(graph, min_progress, progress_tries)
def wrapper(*args, **kwargs): graph = args[0] if not hasattr(graph, "graph_type"): raise InvalidArgumentError( "Missing graph_type attribute in graph object.") elif graph.graph_type in ( graph_def_pb2.DYNAMIC_PROPERTY, graph_def_pb2.ARROW_PROPERTY, ): if ("weight" in inspect.getfullargspec(func)[0] ): # func has 'weight' argument weight = kwargs.get("weight", None) try: e_label = graph.schema.edge_labels[0] graph.schema.get_edge_property_id(e_label, weight) except KeyError: weight = None graph = graph._project_to_simple(e_prop=weight) elif "attribute" in inspect.getfullargspec(func)[0]: attribute = kwargs.get("attribute", None) graph = graph._project_to_simple(v_prop=attribute) else: graph = graph._project_to_simple() return func(graph, *args[1:], **kwargs)
def _get_gl_handle(self, graph): """Dump a handler for GraphLearn for interaction. Fields in :code:`schema` are: + the name of node type or edge type + whether the graph is weighted graph + whether the graph is labeled graph + the number of int attributes + the number of float attributes + the number of string attributes An example of the graph handle: .. code:: python { "server": "127.0.0.1:8888,127.0.0.1:8889", "client_count": 1, "vineyard_socket": "/var/run/vineyard.sock", "vineyard_id": 13278328736, "node_schema": [ "user:false:false:10:0:0", "item:true:false:0:0:5" ], "edge_schema": [ "user:click:item:true:false:0:0:0", "user:buy:item:true:true:0:0:0", "item:similar:item:false:false:10:0:0" ], "node_attribute_types": { "person": { "age": "i", "name": "s", }, }, "edge_attribute_types": { "knows": { "weight": "f", }, }, } The handle can be decoded using: .. code:: python base64.b64decode(handle.encode('ascii')).decode('ascii') Note that the ports are selected from a range :code:`(8000, 9000)`. Args: graph (:class:`Graph`): A Property Graph. client_number (int): Number of client. Returns: str: Base64 encoded handle Raises: InvalidArgumentError: If the graph is not loaded, or graph_type isn't `ARROW_PROPERTY`. """ if not graph.loaded(): raise InvalidArgumentError("The graph has already been unloaded") if not graph.graph_type == types_pb2.ARROW_PROPERTY: raise InvalidArgumentError("The graph should be a property graph.") def group_property_types(props): weighted, labeled, i, f, s, attr_types = "false", "false", 0, 0, 0, {} for field_name, field_type in props.items(): if field_type in [types_pb2.STRING]: s += 1 attr_types[field_name] = "s" elif field_type in (types_pb2.FLOAT, types_pb2.DOUBLE): f += 1 attr_types[field_name] = "f" else: i += 1 attr_types[field_name] = "i" if field_name == "weight": weighted = "true" elif field_name == "label": labeled = "true" return weighted, labeled, i, f, s, attr_types node_schema, node_attribute_types = [], dict() for index, label in enumerate(graph.schema.vertex_labels): weighted, labeled, i, f, s, attr_types = group_property_types( graph.schema.vertex_properties[index]) node_schema.append("{}:{}:{}:{}:{}:{}".format( label, weighted, labeled, i, f, s)) node_attribute_types[label] = attr_types edge_schema, edge_attribute_types = [], dict() for index, label in enumerate(graph.schema.edge_labels): weighted, labeled, i, f, s, attr_types = group_property_types( graph.schema.edge_properties[index]) for rel in graph.schema.edge_relationships[index]: edge_schema.append("{}:{}:{}:{}:{}:{}:{}:{}".format( rel[0], label, rel[1], weighted, labeled, i, f, s)) edge_attribute_types[label] = attr_types handle = { "hosts": self.info["engine_hosts"], "client_count": 1, "vineyard_id": graph.vineyard_id, "vineyard_socket": self._engine_config["vineyard_socket"], "node_schema": node_schema, "edge_schema": edge_schema, "node_attribute_types": node_attribute_types, "edge_attribute_types": edge_attribute_types, } handle_json_string = json.dumps(handle) return base64.b64encode( handle_json_string.encode("utf-8")).decode("utf-8")
def project_to_simple(self, v_label="_", e_label="_", v_prop=None, e_prop=None): """Project a property graph to a simple graph, useful for analytical engine. Will translate name represented label or property to index, which is broadedly used in internal engine. Args: v_label (str, optional): vertex label to project. Defaults to "_". e_label (str, optional): edge label to project. Defaults to "_". v_prop (str, optional): vertex property of the v_label. Defaults to None. e_prop (str, optional): edge property of the e_label. Defaults to None. Returns: :class:`Graph`: A `Graph` instance, which graph_type is `ARROW_PROJECTED` """ if not self.loaded(): raise RuntimeError( "The graph is not registered in remote, and can't project to simple" ) self.check_unmodified() check_argument(self.graph_type == types_pb2.ARROW_PROPERTY) check_argument(isinstance(v_label, (int, str))) check_argument(isinstance(e_label, (int, str))) def check_out_of_range(id, length): if id < length and id > -1: return id else: raise KeyError("id {} is out of range.".format(id)) try: v_label_id = ( check_out_of_range(v_label, self._schema.vertex_label_num) if isinstance(v_label, int) else self._schema.vertex_label_index(v_label) ) except ValueError as e: raise ValueError( "graph not contains the vertex label {}.".format(v_label) ) from e try: e_label_id = ( check_out_of_range(e_label, self._schema.edge_label_num) if isinstance(e_label, int) else self._schema.edge_label_index(e_label) ) except ValueError as e: raise InvalidArgumentError( "graph not contains the edge label {}.".format(e_label) ) from e if v_prop is None: # NB: -1 means vertex property is None v_prop_id = -1 v_properties = None else: check_argument(isinstance(v_prop, (int, str))) v_properties = self._schema.vertex_properties[v_label_id] try: v_prop_id = ( check_out_of_range(v_prop, len(v_properties)) if isinstance(v_prop, int) else self._schema.vertex_property_index(v_label_id, v_prop) ) except ValueError as e: raise ValueError( "vertex label {} not contains the property {}".format( v_label, v_prop ) ) from e if e_prop is None: # NB: -1 means edge property is None e_prop_id = -1 e_properties = None else: check_argument(isinstance(e_prop, (int, str))) e_properties = self._schema.edge_properties[e_label_id] try: e_prop_id = ( check_out_of_range(e_prop, len(e_properties)) if isinstance(e_prop, int) else self._schema.edge_property_index(e_label_id, e_prop) ) except ValueError as e: raise ValueError( "edge label {} not contains the property {}".format(e_label, e_prop) ) from e oid_type = self._schema.oid_type vid_type = self._schema.vid_type vdata_type = None if v_properties: vdata_type = list(v_properties.values())[v_prop_id] edata_type = None if e_properties: edata_type = list(e_properties.values())[e_prop_id] op = project_arrow_property_graph( self, v_label_id, v_prop_id, e_label_id, e_prop_id, vdata_type, edata_type, oid_type, vid_type, ) graph_def = op.eval() return Graph(self.session_id, graph_def)
def preprocess_args(handle, nodes, edges, gen_labels): # noqa: C901 handle = json.loads( base64.b64decode(handle).decode("utf-8", errors="ignore")) node_names = [] node_attributes = {} edge_names = [] edge_attributes = {} def selected_property_schema(attr_types, attributes): prop_counts = collections.defaultdict(lambda: 0) for attr in attributes: prop_counts[attr_types[attr]] += 1 return [prop_counts["i"], prop_counts["f"], prop_counts["s"]] if nodes is not None: for node in nodes: if isinstance(node, str): if node in node_names: raise InvalidArgumentError("Duplicate node type: %s" % node) node_names.append(node) elif isinstance(node, tuple): if node[0] in node_names: raise InvalidArgumentError("Duplicate node type: %s" % node[0]) node_names.append(node[0]) attr_types = handle["node_attribute_types"][node[0]] attr_schema = selected_property_schema(attr_types, node[1]) node_attributes[node[0]] = (node[1], attr_schema) else: raise InvalidArgumentError( "The node parameter is in bad format: %s" % node) else: for node in handle["node_schema"]: node_names.append(node.split(":")[0]) if edges is not None: for edge in edges: if isinstance(edge, str): if len(node_names) > 1: raise InvalidArgumentError( "Cannot inference edge type when multiple kinds of nodes exists" ) edge_names.append((node_names[0], edge, node_names[0])) elif (isinstance(edge, tuple) and isinstance(edge[0], str) and isinstance(edge[1], str)): edge_names.append(edge) elif (isinstance(edge, tuple) and isinstance(edge[0], str) and isinstance(edge[1], list)): if len(node_names) > 1: raise InvalidArgumentError( "Cannot inference edge type when multiple kinds of nodes exists" ) edge_names.append((node_names[0], edge[0], node_names[0])) attr_types = handle["edge_attribute_types"][edge[0]] attr_schema = selected_property_schema(attr_types, edge[1]) edge_attributes[edge[0]] = (edge[1], attr_schema) elif (isinstance(edge, tuple) and isinstance(edge[0], (list, tuple)) and isinstance(edge[1], list)): edge_names.append(edge[0]) attr_types = handle["edge_attribute_types"][edge[0][1]] attr_schema = selected_property_schema(attr_types, edge[1]) edge_attributes[edge[0][1]] = (edge[1], attr_schema) else: raise InvalidArgumentError( "The edge parameter is in bad format: %s" % edge) split_groups = collections.defaultdict(list) if gen_labels is not None: for label in gen_labels: if len(label) == 3 or len(label) == 4: split_groups[label[1]].append(label) else: raise InvalidArgumentError("Bad gen_labels arguments: %s" % gen_labels) split_labels = [] for label, group in split_groups.items(): lengths = [len(split) for split in group] check_argument(lengths[:-1] == lengths[1:], "Invalid gen labels: %s" % group) if len(group[0]) == 3: length_sum = sum(split[2] for split in group) s, ss = 0, [] for split in group: ss.append((s, s + split[2])) s += split[2] group = [(split[0], split[1], length_sum, s) for split, s in zip(group, ss)] for split in group: split_labels.append(split) return { "nodes": node_names if node_names else None, "edges": edge_names if edge_names else None, "node_attributes": node_attributes, "edge_attributes": edge_attributes, "gen_labels": split_labels, }
def gremlin(self, graph, engine_params=None): """Get a interactive engine handler to execute gremlin queries. Note that this method will be executed implicitly when a property graph created and cache a instance of InteractiveQuery in session if `initializing_interactive_engine` is True. If you want to create a new instance under the same graph by different params, you should close the instance first. .. code:: python >>> # close and recreate InteractiveQuery. >>> interactive_query = sess.gremlin(g) >>> interactive_query.close() >>> interactive_query = sess.gremlin(g, engine_params={"xxx":"xxx"}) Args: graph (:class:`Graph`): Use the graph to create interactive instance. engine_params (dict, optional): Configure startup parameters of interactive engine. You can also configure this param by `graphscope.set_option(engine_params={})`. See a list of configurable keys in `interactive_engine/deploy/docker/dockerfile/executor.vineyard.properties` Raises: InvalidArgumentError: :code:`graph` is not a property graph or unloaded. Returns: :class:`InteractiveQuery` """ # self._interactive_instance_dict[graph.vineyard_id] will be None if # InteractiveQuery closed if (graph.vineyard_id in self._interactive_instance_dict and self._interactive_instance_dict[graph.vineyard_id] is not None): interactive_query = self._interactive_instance_dict[ graph.vineyard_id] if interactive_query.status == InteractiveQueryStatus.Running: return interactive_query elif interactive_query.status == InteractiveQueryStatus.Failed: raise InteractiveEngineInternalError( interactive_query.error_msg) else: # Initializing. # while True is ok, as the status is either running or failed eventually after timeout. while True: time.sleep(1) if interactive_query.status == InteractiveQueryStatus.Running: return interactive_query elif interactive_query.status == InteractiveQueryStatus.Failed: raise InteractiveEngineInternalError( interactive_query.error_msg) if not graph.loaded(): raise InvalidArgumentError("The graph has already been unloaded") if not graph.graph_type == types_pb2.ARROW_PROPERTY: raise InvalidArgumentError("The graph should be a property graph.") interactive_query = InteractiveQuery(session=self, object_id=graph.vineyard_id) self._interactive_instance_dict[graph.vineyard_id] = interactive_query if engine_params is not None: engine_params = { str(key): str(value) for key, value in engine_params.items() } else: engine_params = {} try: response = self._grpc_client.create_interactive_engine( object_id=graph.vineyard_id, schema_path=graph.schema_path, gremlin_server_cpu=gs_config.k8s_gie_gremlin_server_cpu, gremlin_server_mem=gs_config.k8s_gie_gremlin_server_mem, engine_params=engine_params, ) except Exception as e: interactive_query.status = InteractiveQueryStatus.Failed interactive_query.error_msg = str(e) raise InteractiveEngineInternalError(str(e)) from e else: interactive_query.set_frontend(front_ip=response.frontend_host, front_port=response.frontend_port) interactive_query.status = InteractiveQueryStatus.Running graph.attach_interactive_instance(interactive_query) return interactive_query