def split(self, dag_def): """Traverse `large_attr` of op and split into a list of chunks. Note that this method will modify `large_attr` attribute of op in dag_def. Returns: Sequence[Sequence[bytes]]: splited chunks. """ chunks_list = [] for op in dag_def.op: large_attr = attr_value_pb2.LargeAttrValue() for chunk in op.large_attr.chunk_list.items: # construct chunk meta large_attr.chunk_meta_list.items.extend( [self._generate_chunk_meta(chunk)]) # split buffer chunks_list.append(( [ chunk.buffer[i:i + self.CHUNK_SIZE] for i in range(0, len(chunk.buffer), self.CHUNK_SIZE) ], op.key, )) # replace chunk with chunk_meta op.large_attr.CopyFrom(large_attr) return chunks_list
def create_loader(vertex_or_edge_label_list): """Create a loader operation. Args: vertex_or_edge_label_list: List of (:class:`graphscope.framework.graph_utils.VertexLabel`) or (:class:`graphscope.framework.graph_utils.EdgeLabel`) Returns: An op to take various data sources as a loader. """ if not isinstance(vertex_or_edge_label_list, list): vertex_or_edge_label_list = [vertex_or_edge_label_list] large_attr = attr_value_pb2.LargeAttrValue() for label in vertex_or_edge_label_list: large_attr.chunk_list.items.extend(label.attr()) op = Operation( vertex_or_edge_label_list[0]._session_id, types_pb2.DATA_SOURCE, config={}, large_attr=large_attr, output_types=types_pb2.NULL_OUTPUT, ) return op
def bytes_to_large_attr(s: bytes) -> attr_value_pb2.LargeAttrValue: check_argument(isinstance(s, bytes)) large_attr = attr_value_pb2.LargeAttrValue() chunk = attr_value_pb2.Chunk(buffer=s) large_attr.chunk_list.items.append(chunk) return large_attr