示例#1
0
    def __init__(
        self,
        cg: "ChunkedGraph",
        *,
        user_id: str,
        source_ids: Sequence[np.uint64],
        sink_ids: Sequence[np.uint64],
        source_coords: Sequence[Sequence[np.int]],
        sink_coords: Sequence[Sequence[np.int]],
        bbox_offset: Sequence[np.int],
    ) -> None:
        super().__init__(cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords)
        self.removed_edges = None  # Calculated from coordinates and IDs
        self.source_ids = np.atleast_1d(source_ids).astype(basetypes.NODE_ID)
        self.sink_ids = np.atleast_1d(sink_ids).astype(basetypes.NODE_ID)
        self.bbox_offset = np.atleast_1d(bbox_offset).astype(basetypes.COORDINATES)

        if np.any(np.in1d(self.sink_ids, self.source_ids)):
            raise cg_exceptions.PreconditionError(
                f"One or more supervoxel exists as both, sink and source."
            )

        for supervoxel_id in itertools.chain(self.source_ids, self.sink_ids):
            layer = self.cg.get_chunk_layer(supervoxel_id)
            if layer != 1:
                raise cg_exceptions.PreconditionError(
                    f"Supervoxel expected, but {supervoxel_id} is a layer {layer} node."
                )
示例#2
0
    def __init__(
        self,
        cg: "ChunkedGraph",
        *,
        user_id: str,
        added_edges: Sequence[Sequence[np.uint64]],
        source_coords: Optional[Sequence[Sequence[np.int]]] = None,
        sink_coords: Optional[Sequence[Sequence[np.int]]] = None,
        affinities: Optional[Sequence[np.float32]] = None,
    ) -> None:
        super().__init__(cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords)
        self.added_edges = np.atleast_2d(added_edges).astype(basetypes.NODE_ID)
        self.affinities = None

        if affinities is not None:
            self.affinities = np.atleast_1d(affinities).astype(basetypes.EDGE_AFFINITY)
            if self.affinities.size == 0:
                self.affinities = None

        if np.any(np.equal(self.added_edges[:, 0], self.added_edges[:, 1])):
            raise cg_exceptions.PreconditionError(
                f"Requested merge operation contains at least one self-loop."
            )

        for supervoxel_id in self.added_edges.ravel():
            layer = self.cg.get_chunk_layer(supervoxel_id)
            if layer != 1:
                raise cg_exceptions.PreconditionError(
                    f"Supervoxel expected, but {supervoxel_id} is a layer {layer} node."
                )
示例#3
0
 def _update_root_ids(self) -> np.ndarray:
     root_ids = np.unique(self.cg.get_roots(self.removed_edges.ravel()))
     if len(root_ids) > 1:
         raise cg_exceptions.PreconditionError(
             f"All supervoxel must belong to the same object. Already split?"
         )
     return root_ids
示例#4
0
 def _update_root_ids(self) -> np.ndarray:
     sink_and_source_ids = np.concatenate((self.source_ids, self.sink_ids))
     root_ids = np.unique(self.cg.get_roots(sink_and_source_ids))
     if len(root_ids) > 1:
         raise cg_exceptions.PreconditionError(
             f"All supervoxel must belong to the same object. Already split?"
         )
     return root_ids
示例#5
0
    def __init__(
        self,
        cg: "ChunkedGraph",
        *,
        user_id: str,
        removed_edges: Sequence[Sequence[np.uint64]],
        source_coords: Optional[Sequence[Sequence[np.int]]] = None,
        sink_coords: Optional[Sequence[Sequence[np.int]]] = None,
    ) -> None:
        super().__init__(cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords)
        self.removed_edges = np.atleast_2d(removed_edges).astype(basetypes.NODE_ID)

        if np.any(np.equal(self.removed_edges[:, 0], self.removed_edges[:, 1])):
            raise cg_exceptions.PreconditionError(
                f"Requested split operation contains at least one self-loop."
            )

        for supervoxel_id in self.removed_edges.ravel():
            layer = self.cg.get_chunk_layer(supervoxel_id)
            if layer != 1:
                raise cg_exceptions.PreconditionError(
                    f"Supervoxel expected, but {supervoxel_id} is a layer {layer} node."
                )
示例#6
0
    def _filter_graph_connected_components(self):
        """
        Filter out connected components in the graph
        that are not involved in the local mincut
        """
        ccs = flatgraph_utils.connected_components(self.weighted_graph)

        removed = self.weighted_graph.new_vertex_property("bool")
        removed.a = False
        if len(ccs) > 1:
            for cc in ccs:
                # If connected component contains no sources or no sinks,
                # remove its nodes from the mincut computation
                if not (np.any(np.in1d(self.source_graph_ids, cc))
                        and np.any(np.in1d(self.sink_graph_ids, cc))):
                    for node_id in cc:
                        removed[node_id] = True

        self.weighted_graph.set_vertex_filter(removed, inverted=True)
        pruned_graph = graph_tool.Graph(self.weighted_graph, prune=True)
        # Test that there is only one connected component left
        ccs = flatgraph_utils.connected_components(pruned_graph)

        if len(ccs) > 1:
            if self.logger is not None:
                self.logger.warning(
                    "Not all sinks and sources are within the same (local)"
                    "connected component")
            raise cg_exceptions.PreconditionError(
                "Not all sinks and sources are within the same (local)"
                "connected component")
        elif len(ccs) == 0:
            raise cg_exceptions.PreconditionError(
                "Sinks and sources are not connected through the local graph. "
                "Please try a different set of vertices to perform the mincut."
            )
示例#7
0
    def _sink_and_source_connectivity_sanity_check(self, cut_edge_set):
        """
        Similar to _gt_mincut_sanity_check, except we do the check again *after*
        removing the fake infinite affinity edges.
        """
        time_start = time.time()
        for cut_edge in cut_edge_set:
            # May be more than one edge from vertex cut_edge[0] to vertex cut_edge[1], remove them all
            parallel_edges = self.weighted_graph.edge(cut_edge[0],
                                                      cut_edge[1],
                                                      all_edges=True)
            for edge_to_remove in parallel_edges:
                self.edges_to_remove[edge_to_remove] = True

        self.weighted_graph.set_edge_filter(self.edges_to_remove, True)
        ccs_test_post_cut = flatgraph_utils.connected_components(
            self.weighted_graph)

        # Make sure sinks and sources are among each other and not in different sets
        # after removing the cut edges and the fake infinity edges
        illegal_split = False
        try:
            for cc in ccs_test_post_cut:
                if np.any(np.in1d(self.source_graph_ids, cc)):
                    assert np.all(np.in1d(self.source_graph_ids, cc))
                    assert ~np.any(np.in1d(self.sink_graph_ids, cc))

                if np.any(np.in1d(self.sink_graph_ids, cc)):
                    assert np.all(np.in1d(self.sink_graph_ids, cc))
                    assert ~np.any(np.in1d(self.source_graph_ids, cc))
        except AssertionError:
            if self.split_preview:
                # If we are performing a split preview, we allow these illegal splits,
                # but return a flag to return a message to the user
                illegal_split = True
            else:
                raise cg_exceptions.PreconditionError(
                    "Failed to find a cut that separated the sources from the sinks. "
                    "Please try another cut that partitions the sets cleanly if possible. "
                    "If there is a clear path between all the supervoxels in each set, "
                    "that helps the mincut algorithm.")

        dt = time.time() - time_start
        if self.logger is not None:
            self.logger.debug("Verifying local graph: %.2fms" % (dt * 1000))

        return ccs_test_post_cut, illegal_split
示例#8
0
def test_failed_graph_operation(mocker, root_lock_tracker):
    """Ensure that root locks got released after successful
        root lock acquisition + *unsuccessful* graph operation"""
    fake_operation_id = big_uint64()
    fake_locked_root_ids = np.array((big_uint64(), big_uint64()))

    cg = mocker.MagicMock()
    cg.get_unique_operation_id = mocker.MagicMock(
        return_value=fake_operation_id)
    cg.lock_root_loop = mocker.MagicMock(
        return_value=(True, fake_locked_root_ids),
        side_effect=root_lock_tracker.add_locks)
    cg.unlock_root = mocker.MagicMock(
        return_value=True, side_effect=root_lock_tracker.remove_lock)

    with pytest.raises(cg_exceptions.PreconditionError):
        with RootLock(cg, fake_locked_root_ids):
            raise cg_exceptions.PreconditionError("Something went wrong")

    assert not root_lock_tracker.active_locks[fake_operation_id]
示例#9
0
def handle_l2_chunk_children(table_id, chunk_id, as_array):
    current_app.request_type = "l2_chunk_children"
    current_app.table_id = table_id

    # Convert seconds since epoch to UTC datetime
    try:
        timestamp = float(request.args.get("timestamp", time.time()))
        timestamp = datetime.fromtimestamp(timestamp, UTC)
    except (TypeError, ValueError) as e:
        raise (cg_exceptions.BadRequest("Timestamp parameter is not a valid"
                                        " unix timestamp"))

    # Call ChunkedGraph
    cg = app_utils.get_cg(table_id)

    chunk_layer = cg.get_chunk_layer(chunk_id)
    if chunk_layer != 2:
        raise (cg_exceptions.PreconditionError(
            f'This function only accepts level 2 chunks, the chunk requested is a level {chunk_layer} chunk'
        ))

    rr_chunk = cg.range_read_chunk(chunk_id=np.uint64(chunk_id),
                                   columns=column_keys.Hierarchy.Child,
                                   time_stamp=timestamp)

    if as_array:
        l2_chunk_array = []

        for l2 in rr_chunk:
            svs = rr_chunk[l2][0].value
            for sv in svs:
                l2_chunk_array.extend([l2, sv])

        return np.array(l2_chunk_array)
    else:
        # store in dict of keys to arrays to remove reliance on bigtable
        l2_chunk_dict = {}
        for k in rr_chunk:
            l2_chunk_dict[k] = rr_chunk[k][0].value

        return l2_chunk_dict
示例#10
0
    def read_byte_rows(
        self,
        start_key: Optional[bytes] = None,
        end_key: Optional[bytes] = None,
        end_key_inclusive: bool = False,
        row_keys: Optional[Iterable[bytes]] = None,
        columns: Optional[Union[Iterable[column_keys._Column],
                                column_keys._Column]] = None,
        start_time: Optional[datetime.datetime] = None,
        end_time: Optional[datetime.datetime] = None,
        end_time_inclusive: bool = False
    ) -> Dict[bytes, Union[Dict[column_keys._Column,
                                List[bigtable.row_data.Cell]],
                           List[bigtable.row_data.Cell]]]:
        """Main function for reading a row range or non-contiguous row sets from Bigtable using
        `bytes` keys.

        Keyword Arguments:
            start_key {Optional[bytes]} -- The first row to be read, ignored if `row_keys` is set.
                If None, no lower boundary is used. (default: {None})
            end_key {Optional[bytes]} -- The end of the row range, ignored if `row_keys` is set.
                If None, no upper boundary is used. (default: {None})
            end_key_inclusive {bool} -- Whether or not `end_key` itself should be included in the
                request, ignored if `row_keys` is set or `end_key` is None. (default: {False})
            row_keys {Optional[Iterable[bytes]]} -- An `Iterable` containing possibly
                non-contiguous row keys. Takes precedence over `start_key` and `end_key`.
                (default: {None})
            columns {Optional[Union[Iterable[column_keys._Column], column_keys._Column]]} --
                Optional filtering by columns to speed up the query. If `columns` is a single
                column (not iterable), the column key will be omitted from the result.
                (default: {None})
            start_time {Optional[datetime.datetime]} -- Ignore cells with timestamp before
                `start_time`. If None, no lower bound. (default: {None})
            end_time {Optional[datetime.datetime]} -- Ignore cells with timestamp after `end_time`.
                If None, no upper bound. (default: {None})
            end_time_inclusive {bool} -- Whether or not `end_time` itself should be included in the
                request, ignored if `end_time` is None. (default: {False})

        Returns:
            Dict[bytes, Union[Dict[column_keys._Column, List[bigtable.row_data.Cell]],
                              List[bigtable.row_data.Cell]]] --
                Returns a dictionary of `byte` rows as keys. Their value will be a mapping of
                columns to a List of cells (one cell per timestamp). Each cell has a `value`
                property, which returns the deserialized field, and a `timestamp` property, which
                returns the timestamp as `datetime.datetime` object.
                If only a single `column_keys._Column` was requested, the List of cells will be
                attached to the row dictionary directly (skipping the column dictionary).
        """

        # Create filters: Column and Time
        filter_ = get_time_range_and_column_filter(
            columns=columns,
            start_time=start_time,
            end_time=end_time,
            end_inclusive=end_time_inclusive)

        # Create filters: Rows
        row_set = RowSet()

        if row_keys is not None:
            for row_key in row_keys:
                row_set.add_row_key(row_key)
        elif start_key is not None and end_key is not None:
            row_set.add_row_range_from_keys(start_key=start_key,
                                            start_inclusive=True,
                                            end_key=end_key,
                                            end_inclusive=end_key_inclusive)
        else:
            raise cg_exceptions.PreconditionError(
                "Need to either provide a valid set of rows, or"
                " both, a start row and an end row.")

        # Bigtable read with retries
        rows = self._execute_read(row_set=row_set, row_filter=filter_)

        return rows