def create_learning_instance(graph, nodes=None, edges=None, gen_labels=None): """Create an engine for graph learning. Args: graph (:class:`graphscope.framework.graph.GraphDAGNode`): Source property graph. nodes (list): The node types that will be used for gnn training. edges (list): The edge types that will be used for gnn training. gen_labels (list): Extra node and edge labels on original graph for gnn training. Returns: An op to create a learning engine based on a graph. """ config = {} # pickle None is expected config[types_pb2.NODES] = utils.bytes_to_attr(pickle.dumps(nodes)) config[types_pb2.EDGES] = utils.bytes_to_attr(pickle.dumps(edges)) config[types_pb2.GLE_GEN_LABELS] = utils.bytes_to_attr( pickle.dumps(gen_labels)) op = Operation( graph.session_id, types_pb2.CREATE_LEARNING_INSTANCE, config=config, inputs=[graph.op], output_types=types_pb2.LEARNING_GRAPH, ) return op
def get_attr(self): if not self.finished: self.finish() attr = attr_value_pb2.AttrValue() attr.func.name = "loader" attr.func.attr[types_pb2.PROTOCOL].CopyFrom(utils.s_to_attr(self.protocol)) # Let graphscope handle local files cause it's implemented in c++ and # doesn't add an additional stream layer. # Maybe handled by vineyard in the near future if self.protocol == "file": source = "{}#{}".format(self.source, self.options) attr.func.attr[types_pb2.VALUES].CopyFrom( utils.bytes_to_attr(source.encode("utf-8")) ) elif self.protocol in ("numpy", "pandas"): attr.func.attr[types_pb2.ROW_NUM].CopyFrom(utils.i_to_attr(self.row_num)) attr.func.attr[types_pb2.COLUMN_NUM].CopyFrom( utils.i_to_attr(self.column_num) ) # Use key start from 10000 + col_index to store raw bytes. for i in range(len(self.property_bytes)): attr.func.attr[10000 + i].CopyFrom( utils.bytes_to_attr(self.property_bytes[i]) ) else: # Let vineyard handle other data source. attr.func.attr[types_pb2.VALUES].CopyFrom( utils.bytes_to_attr(self.source.encode("utf-8")) ) return attr
def get_attr(self): attr = attr_value_pb2.AttrValue() attr.func.name = "loader" attr.func.attr[types_pb2.PROTOCOL].CopyFrom( utils.s_to_attr(self.protocol)) if self.protocol in ("file", "oss", "vineyard", "mars"): source = "{}#{}".format(self.source, self.options) attr.func.attr[types_pb2.VALUES].CopyFrom( utils.bytes_to_attr(source.encode("utf-8"))) elif self.protocol in ("numpy", "pandas"): attr.func.attr[types_pb2.ROW_NUM].CopyFrom( utils.i_to_attr(self.row_num)) attr.func.attr[types_pb2.COLUMN_NUM].CopyFrom( utils.i_to_attr(self.column_num)) # Use key start from 10000 + col_index to store raw bytes. for i in range(len(self.property_bytes)): attr.func.attr[10000 + i].CopyFrom( utils.bytes_to_attr(self.property_bytes[i])) else: raise TypeError("Protocol not recognized " + self.protocol) return attr
def create_app(app_assets): """Wrapper for create an `CREATE_APP` Operation with configuration. This op will do nothing but provide required information for `BOUND_APP` """ config = {types_pb2.APP_ALGO: utils.s_to_attr(app_assets.algo)} if app_assets.gar is not None: config[types_pb2.GAR] = utils.bytes_to_attr(app_assets.gar) op = Operation( None, types_pb2.CREATE_APP, config=config, output_types=types_pb2.APP ) return op
def create_app(graph, app): """Wrapper for create an `CREATE_APP` Operation with configuration. Compile and load an application after evaluated. Args: graph (:class:`Graph`): A :class:`Graph` instance app (:class:`App`): A :class:`App` instance. Returns: An :class:`Operation` with configuration that instruct analytical engine how to build the app. """ config = { types_pb2.APP_ALGO: utils.s_to_attr(app.algo), types_pb2.GRAPH_TYPE: utils.graph_type_to_attr(graph.graph_type), types_pb2.OID_TYPE: utils.s_to_attr(graph.schema.oid_type), types_pb2.VID_TYPE: utils.s_to_attr(graph.schema.vid_type), types_pb2.V_DATA_TYPE: utils.s_to_attr(utils.data_type_to_cpp(graph.schema.vdata_type)), types_pb2.E_DATA_TYPE: utils.s_to_attr(utils.data_type_to_cpp(graph.schema.edata_type)), types_pb2.APP_SIGNATURE: utils.s_to_attr(app.signature), types_pb2.GRAPH_SIGNATURE: utils.s_to_attr(graph.template_sigature), } if app.gar is not None: config[types_pb2.GAR] = utils.bytes_to_attr(app.gar) opr = Operation( graph.session_id, types_pb2.CREATE_APP, config=config, output_types=types_pb2.APP, ) return opr
def report_graph( graph, report_type, node=None, edge=None, fid=None, lid=None, key=None, label_id=None, gid=None, ): """Create report operation for nx graph. This operation is used to simulate networkx graph reporting methods with variaty report type and corresponding config parameters. Args: graph (`nx.Graph`): A nx graph. report_type: report type, can be type_pb2.(NODE_NUM, EDGE_NUM, HAS_NODE, HAS_EDGE, NODE_DATA, EDGE_DATA, NEIGHBORS_BY_NODE, SUCCS_BY_NODE, PREDS_BY_NODE, NEIGHBORS_BY_LOC, SUCCS_BY_LOC, PREDS_BY_LOC, DEG_BY_NODE, IN_DEG_BY_NODE, OUT_DEG_BY_NODE, DEG_BY_LOC, IN_DEG_BY_LOC, OUT_DEG_BY_LOC, NODES_BY_LOC) node (str): node id, used as node id with 'NODE' report types. (optional) edge (str): an edge with 'EDGE' report types. (optional) fid (int): fragment id, with 'LOC' report types. (optional) lid (int): local id of node in grape_engine, with 'LOC; report types. (optional) key (str): edge key for MultiGraph or MultiDiGraph, with 'EDGE' report types. (optional) Returns: An op to do reporting job. """ config = { types_pb2.GRAPH_NAME: utils.s_to_attr(graph.key), types_pb2.REPORT_TYPE: utils.report_type_to_attr(report_type), } if graph.graph_type == graph_def_pb2.ARROW_PROPERTY: config[types_pb2.DEFAULT_LABEL_ID] = utils.i_to_attr( graph._default_label_id) if node is not None: config[types_pb2.NODE] = utils.bytes_to_attr(node) if edge is not None: config[types_pb2.EDGE] = utils.bytes_to_attr(edge) if fid is not None: config[types_pb2.FID] = utils.i_to_attr(fid) if lid is not None: config[types_pb2.LID] = utils.i_to_attr(lid) if label_id is not None: config[types_pb2.V_LABEL_ID] = utils.i_to_attr(label_id) if gid is not None: config[types_pb2.GID] = utils.u_to_attr(gid) config[types_pb2.EDGE_KEY] = utils.s_to_attr( str(key) if key is not None else "") op = Operation( graph.session_id, types_pb2.REPORT_GRAPH, config=config, output_types=types_pb2.RESULTS, ) return op