def __init__(self, incoming_graph_data=None, default_label=None, **attr): self.graph_attr_dict_factory = self.graph_attr_dict_factory self.node_dict_factory = self.node_dict_factory self.adjlist_dict_factory = self.adjlist_dict_factory self.graph = self.graph_attr_dict_factory() self.cache = self.graph_cache_factory(self) # init node and adj (must be after cache) self._node = self.node_dict_factory(self) self._adj = self.adjlist_dict_factory(self) self._pred = self.adjlist_dict_factory(self, pred=True) self._succ = self._adj self._key = None self._op = None self._session_id = None self._graph_type = self._graph_type self._schema = GraphSchema() self._schema.init_nx_schema() # cache for add_node and add_edge self._add_node_cache = [] self._add_edge_cache = [] self._remove_node_cache = [] self._remove_edge_cache = [] create_empty_in_engine = attr.pop("create_empty_in_engine", True) # a hidden parameter self._distributed = attr.pop("dist", False) if incoming_graph_data is not None and self._is_gs_graph( incoming_graph_data): # convert from gs graph always use distributed mode self._distributed = True if self._session is None: self._session = get_session_by_id( incoming_graph_data.session_id) self._default_label = default_label if self._session is None: self._try_to_get_default_session() if not self._is_gs_graph( incoming_graph_data) and create_empty_in_engine: graph_def = empty_graph_in_engine(self, self.is_directed(), self._distributed) self._key = graph_def.key # attempt to load graph with data if incoming_graph_data is not None: if self._is_gs_graph(incoming_graph_data): self._init_with_arrow_property_graph(incoming_graph_data) self.cache.warmup() else: g = to_networkx_graph(incoming_graph_data, create_using=self) check_argument(isinstance(g, Graph)) # load graph attributes (must be after to_networkx_graph) self.graph.update(attr) self._saved_signature = self.signature
def output(self, fd, selector, vertex_range=None, **kwargs): """Dump results to `fd`. Support dumps data to local (respect to pod) files, hdfs or oss. It first write results to a vineyard dataframe, and let vineyard do the data dumping job. `fd` must meet specific formats, with auth information if needed. As follows: - local `file:///tmp/result_path` - oss `oss:///bucket/object` - hdfs `hdfs:///tmp/result_path` Args: fd (str): Output location. selector (dict): Similar to `to_dataframe`. vertex_range (dict, optional): Similar to `to_dataframe`. Defaults to None. kwargs (dict, optional): Storage options with respect to output storage type. for example: key, secret, endpoint for oss, key, secret, client_kwargs for s3, host, port for hdfs, None for local. """ import vineyard import vineyard.io df = self.to_vineyard_dataframe(selector, vertex_range) sess = get_session_by_id(self.session_id) deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") # Write vineyard dataframe as a readable stream dfstream = vineyard.io.open( "vineyard://" + str(df), mode="r", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, deployment=deployment, hosts=hosts, ) vineyard.io.open( fd, dfstream, mode="w", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, )
def serialize(self, path, **kwargs): """Serialize graph to a location. The meta and data of graph is dumped to specified location, and can be restored by `Graph.deserialize` in other sessions. Each worker will write a `path_{worker_id}.meta` file and a `path_{worker_id}` file to storage. Args: path (str): supported storages are local, hdfs, oss, s3 """ import vineyard import vineyard.io sess = get_session_by_id(self.session_id) deployment = "kubernetes" if sess.info["type"] == "k8s" else "ssh" conf = sess.info["engine_config"] vineyard_endpoint = conf["vineyard_rpc_endpoint"] vineyard_ipc_socket = conf["vineyard_socket"] if sess.info["type"] == "k8s": hosts = [ "{}:{}".format(sess.info["namespace"], s) for s in sess.info["engine_hosts"].split(",") ] else: # type == "hosts" hosts = sess.info["engine_hosts"].split(",") vineyard.io.serialize( path, vineyard.ObjectID(self._vineyard_id), type="global", vineyard_ipc_socket=vineyard_ipc_socket, vineyard_endpoint=vineyard_endpoint, storage_options=kwargs, deployment=deployment, hosts=hosts, )
def _launch_interactive_instance_impl(self): try: sess = get_session_by_id(self.session_id) sess.gremlin(self) except: # noqa: E722 # Record error msg in `InteractiveQuery` when launching failed. # Unexpect and suppress all exceptions here. pass
def eval(self): # NB: to void cycle import # pylint: disable=import-outside-toplevel, cyclic-import from graphscope.client.session import get_session_by_id sess = get_session_by_id(self._session_id) res = sess.run(self) return res
def eval(self, leaf=True): """Evaluate by :code:`sess.run`. Args: leaf (bool, optional): Leaf Operation means there is no successor. """ # NB: to void cycle import # pylint: disable=import-outside-toplevel, cyclic-import from graphscope.client.session import get_session_by_id self._leaf = leaf sess = get_session_by_id(self._session_id) if not self._leaf: sess.dag.add_op(self) res = sess.run(self) return res
def empty_graph_in_engine(graph, directed): """create empty graph in grape_engine with the graph metadata. Parameters: ----------- graph: the graph instance in python. graph_type: the graph type of graph (IMMUTABLE, ARROW, DYNAMIC). nx_graph_type: the networkx graph type of graph (Graph, DiGraph, MultiGraph, MultiDiGraph). """ sess = get_session_by_id(graph._session_id) op = dag_utils.create_graph( sess.session_id, graph_type=graph._graph_type, directed=directed, efile="", vfile="", ) graph_def = sess.run(op) return graph_def
def draw(self, vertices, hop=1): """Visualize the graph data in the result cell when the draw functions are invoked Args: vertices (list): selected vertices. hop (int): draw induced subgraph with hop extension. Defaults to 1. Returns: A GraphModel. """ from ipygraphin import GraphModel sess = get_session_by_id(self.session_id) interactive_query = sess.gremlin(self) graph = GraphModel() graph.queryGraphData(vertices, hop, interactive_query) # listen on the 1~2 hops operation of node graph.on_msg(graph.queryNeighbor) return graph