def update_root_id_lineage(cg, new_root_ids, former_root_ids, operation_id, time_stamp): assert len(former_root_ids) < 2 or len(new_root_ids) < 2 rows = [] for new_root_id in new_root_ids: val_dict = { column_keys.Hierarchy.FormerParent: np.array(former_root_ids), column_keys.OperationLogs.OperationID: operation_id } rows.append( cg.mutate_row(serializers.serialize_uint64(new_root_id), val_dict, time_stamp=time_stamp)) for former_root_id in former_root_ids: val_dict = { column_keys.Hierarchy.NewParent: np.array(new_root_ids), column_keys.OperationLogs.OperationID: operation_id } rows.append( cg.mutate_row(serializers.serialize_uint64(former_root_id), val_dict, time_stamp=time_stamp)) return rows
def _write_atomic_split_edges(cg, atomic_edges, time_stamp): rows = [] u_atomic_ids = np.unique(atomic_edges) for u_atomic_id in u_atomic_ids: atomic_node_info = cg.get_atomic_node_info(u_atomic_id) partners = np.concatenate([ atomic_edges[atomic_edges[:, 0] == u_atomic_id][:, 1], atomic_edges[atomic_edges[:, 1] == u_atomic_id][:, 0] ]) partner_idx = np.where( np.in1d(atomic_node_info[column_keys.Connectivity.Partner], partners))[0] partner_idx = np.array( partner_idx, dtype=column_keys.Connectivity.Connected.basetype) val_dict = {column_keys.Connectivity.Connected: partner_idx} rows.append( cg.mutate_row(serializers.serialize_uint64(u_atomic_id), val_dict, time_stamp=time_stamp)) return rows
def _write_atomic_merge_edges(cg, atomic_edges, affinities, areas, time_stamp): rows = [] if areas is None: areas = np.zeros(len(atomic_edges), dtype=column_keys.Connectivity.Area.basetype) if affinities is None: affinities = np.ones(len(atomic_edges)) * np.inf affinities = affinities.astype( column_keys.Connectivity.Affinity.basetype) rows = [] u_atomic_ids = np.unique(atomic_edges) for u_atomic_id in u_atomic_ids: val_dict = {} atomic_node_info = cg.get_atomic_node_info(u_atomic_id) edge_m0 = atomic_edges[:, 0] == u_atomic_id edge_m1 = atomic_edges[:, 1] == u_atomic_id edge_partners = np.concatenate( [atomic_edges[edge_m1][:, 0], atomic_edges[edge_m0][:, 1]]) edge_affs = np.concatenate([affinities[edge_m1], affinities[edge_m0]]) edge_areas = np.concatenate([areas[edge_m1], areas[edge_m0]]) ex_partner_m = np.in1d( edge_partners, atomic_node_info[column_keys.Connectivity.Partner]) partner_idx = np.where( np.in1d(atomic_node_info[column_keys.Connectivity.Partner], edge_partners[ex_partner_m]))[0] n_ex_partners = len(atomic_node_info[column_keys.Connectivity.Partner]) new_partner_idx = np.arange(n_ex_partners, n_ex_partners + np.sum(~ex_partner_m)) partner_idx = np.concatenate([partner_idx, new_partner_idx]) partner_idx = np.array( partner_idx, dtype=column_keys.Connectivity.Connected.basetype) val_dict[column_keys.Connectivity.Connected] = partner_idx if np.sum(~ex_partner_m) > 0: edge_affs = edge_affs[~ex_partner_m] edge_areas = edge_areas[~ex_partner_m] new_edge_partners = np.array( edge_partners[~ex_partner_m], dtype=column_keys.Connectivity.Partner.basetype) val_dict[column_keys.Connectivity.Affinity] = edge_affs val_dict[column_keys.Connectivity.Area] = edge_areas val_dict[column_keys.Connectivity.Partner] = new_edge_partners rows.append( cg.mutate_row(serializers.serialize_uint64(u_atomic_id), val_dict, time_stamp=time_stamp)) return rows
def _create_log_record( self, *, operation_id: np.uint64, timestamp: datetime, new_root_ids: Sequence[np.uint64] ) -> "bigtable.row.Row": val_dict = { column_keys.OperationLogs.UserID: self.user_id, column_keys.OperationLogs.UndoOperationID: self.superseded_operation_id, column_keys.OperationLogs.RootID: new_root_ids, } return self.cg.mutate_row(serializers.serialize_uint64(operation_id), val_dict, timestamp)
def create_parent_children_rows(cg, parent_id, children_ids, parent_cross_chunk_edge_dict, time_stamp): """ Generates BigTable rows :param eh: EditHelper :param parent_id: np.uint64 :param children_ids: list of np.uint64s :param parent_cross_chunk_edge_dict: dict :param former_root_ids: list of np.uint64s :param operation_id: np.uint64 :param time_stamp: datetime.datetime :return: """ rows = [] val_dict = {} for l, layer_edges in parent_cross_chunk_edge_dict.items(): val_dict[column_keys.Connectivity.CrossChunkEdge[l]] = layer_edges assert np.max( cg.get_chunk_layers(children_ids)) < cg.get_chunk_layer(parent_id) val_dict[column_keys.Hierarchy.Child] = children_ids rows.append( cg.mutate_row(serializers.serialize_uint64(parent_id), val_dict, time_stamp=time_stamp)) for child_id in children_ids: val_dict = {column_keys.Hierarchy.Parent: parent_id} rows.append( cg.mutate_row(serializers.serialize_uint64(child_id), val_dict, time_stamp=time_stamp)) return rows
def _create_log_record( self, *, operation_id: np.uint64, timestamp: datetime, new_root_ids: Sequence[np.uint64] ) -> "bigtable.row.Row": val_dict = { column_keys.OperationLogs.UserID: self.user_id, column_keys.OperationLogs.RootID: new_root_ids, column_keys.OperationLogs.SourceCoordinate: self.source_coords, column_keys.OperationLogs.SinkCoordinate: self.sink_coords, column_keys.OperationLogs.SourceID: self.source_ids, column_keys.OperationLogs.SinkID: self.sink_ids, column_keys.OperationLogs.BoundingBoxOffset: self.bbox_offset, column_keys.OperationLogs.RemovedEdge: self.removed_edges, } return self.cg.mutate_row(serializers.serialize_uint64(operation_id), val_dict, timestamp)
def _create_log_record(self, *, operation_id, timestamp, new_root_ids) -> "bigtable.row.Row": val_dict = { column_keys.OperationLogs.UserID: self.user_id, column_keys.OperationLogs.RootID: new_root_ids, column_keys.OperationLogs.AddedEdge: self.added_edges, } if self.source_coords is not None: val_dict[column_keys.OperationLogs.SourceCoordinate] = self.source_coords if self.sink_coords is not None: val_dict[column_keys.OperationLogs.SinkCoordinate] = self.sink_coords if self.affinities is not None: val_dict[column_keys.OperationLogs.Affinity] = self.affinities return self.cg.mutate_row(serializers.serialize_uint64(operation_id), val_dict, timestamp)
def remove_edges(cg, operation_id: np.uint64, atomic_edges: Sequence[Sequence[np.uint64]], time_stamp: datetime.datetime): # This view of the to be removed edges helps us to compute the mask # of the retained edges in each chunk double_atomic_edges = np.concatenate([atomic_edges, atomic_edges[:, ::-1]], axis=0) double_atomic_edges_view = double_atomic_edges.view(dtype='u8,u8') n_edges = double_atomic_edges.shape[0] double_atomic_edges_view = double_atomic_edges_view.reshape(n_edges) rows = [] # list of rows to be written to BigTable lvl2_dict = {} lvl2_cross_chunk_edge_dict = {} # Analyze atomic_edges --> translate them to lvl2 edges and extract cross # chunk edges to be removed lvl2_edges, old_cross_edge_dict = analyze_atomic_edges(cg, atomic_edges) lvl2_node_ids = np.unique(lvl2_edges) for lvl2_node_id in lvl2_node_ids: chunk_id = cg.get_chunk_id(lvl2_node_id) chunk_edges, _, _ = cg.get_subgraph_chunk(lvl2_node_id, make_unique=False) child_chunk_ids = cg.get_child_chunk_ids(chunk_id) assert len(child_chunk_ids) == 1 child_chunk_id = child_chunk_ids[0] children_ids = np.unique(chunk_edges) children_chunk_ids = cg.get_chunk_ids_from_node_ids(children_ids) children_ids = children_ids[children_chunk_ids == child_chunk_id] # These edges still contain the removed edges. # For consistency reasons we can only write to BigTable one time. # Hence, we have to evict the to be removed "atomic_edges" from the # queried edges. retained_edges_mask = ~np.in1d( chunk_edges.view(dtype='u8,u8').reshape(chunk_edges.shape[0]), double_atomic_edges_view) chunk_edges = chunk_edges[retained_edges_mask] edge_layers = cg.get_cross_chunk_edges_layer(chunk_edges) cross_edge_mask = edge_layers != 1 cross_edges = chunk_edges[cross_edge_mask] cross_edge_layers = edge_layers[cross_edge_mask] chunk_edges = chunk_edges[~cross_edge_mask] isolated_child_ids = children_ids[~np.in1d(children_ids, chunk_edges)] isolated_edges = np.vstack([isolated_child_ids, isolated_child_ids]).T graph, _, _, unique_graph_ids = flatgraph_utils.build_gt_graph( np.concatenate([chunk_edges, isolated_edges]), make_directed=True) ccs = flatgraph_utils.connected_components(graph) new_parent_ids = cg.get_unique_node_id_range(chunk_id, len(ccs)) for i_cc, cc in enumerate(ccs): new_parent_id = new_parent_ids[i_cc] cc_node_ids = unique_graph_ids[cc] lvl2_dict[new_parent_id] = [lvl2_node_id] # Write changes to atomic nodes and new lvl2 parent row val_dict = {column_keys.Hierarchy.Child: cc_node_ids} rows.append( cg.mutate_row(serializers.serialize_uint64(new_parent_id), val_dict, time_stamp=time_stamp)) for cc_node_id in cc_node_ids: val_dict = {column_keys.Hierarchy.Parent: new_parent_id} rows.append( cg.mutate_row(serializers.serialize_uint64(cc_node_id), val_dict, time_stamp=time_stamp)) # Cross edges --- cross_edge_m = np.in1d(cross_edges[:, 0], cc_node_ids) cc_cross_edges = cross_edges[cross_edge_m] cc_cross_edge_layers = cross_edge_layers[cross_edge_m] u_cc_cross_edge_layers = np.unique(cc_cross_edge_layers) lvl2_cross_chunk_edge_dict[new_parent_id] = {} for l in range(2, cg.n_layers): empty_edges = column_keys.Connectivity.CrossChunkEdge.deserialize( b'') lvl2_cross_chunk_edge_dict[new_parent_id][l] = empty_edges val_dict = {} for cc_layer in u_cc_cross_edge_layers: edge_m = cc_cross_edge_layers == cc_layer layer_cross_edges = cc_cross_edges[edge_m] if len(layer_cross_edges) > 0: val_dict[column_keys.Connectivity.CrossChunkEdge[cc_layer]] = \ layer_cross_edges lvl2_cross_chunk_edge_dict[new_parent_id][ cc_layer] = layer_cross_edges if len(val_dict) > 0: rows.append( cg.mutate_row(serializers.serialize_uint64(new_parent_id), val_dict, time_stamp=time_stamp)) if cg.n_layers == 2: rows.extend( update_root_id_lineage(cg, new_parent_ids, [lvl2_node_id], operation_id=operation_id, time_stamp=time_stamp)) # Write atomic nodes rows.extend( _write_atomic_split_edges(cg, atomic_edges, time_stamp=time_stamp)) # Propagate changes up the tree if cg.n_layers > 2: new_root_ids, new_rows = propagate_edits_to_root( cg, lvl2_dict.copy(), lvl2_cross_chunk_edge_dict, operation_id=operation_id, time_stamp=time_stamp) rows.extend(new_rows) else: new_root_ids = np.array(list(lvl2_dict.keys())) return new_root_ids, list(lvl2_dict.keys()), rows