示例#1
0
    def compute_prefetch_keys(self, missed_key):
        """From the missed key, determine what to prefetch.

        Args:
            missed_key (string): Cached-cuboid key.

        Returns:
            (list): List of cache-cuboid keys to fetch.
        """
        key_parts = missed_key.rsplit('&', 1)
        morton_id = key_parts[1]
        coords = ndlib.MortonXYZ(int(morton_id))
        z = coords[2]
        coords_above = coords.copy()
        coords_above[2] = z + 1
        mortonid_above = ndlib.XYZMorton(coords_above)
        key_above = '{}&{}'.format(key_parts[0], mortonid_above)

        if z - 1 < 0:
            return [key_above]

        coords_below = coords.copy()
        coords_below[2] = z - 1
        mortonid_below = ndlib.XYZMorton(coords_below)
        key_below = '{}&{}'.format(key_parts[0], mortonid_below)

        return [key_above, key_below]
示例#2
0
    def test_compute_prefetch_keys_at_z0(self):
        xyz = [5, 10, 0]
        mortonid = ndlib.XYZMorton(xyz)
        mortonid_above = ndlib.XYZMorton([5, 10, 1])
        key_prefix = 'CACHED-CUBOID&1&2&3&4&5&'
        missed_key = '{}{}'.format(key_prefix, mortonid)
        expected = [
            '{}{}'.format(key_prefix, mortonid_above),
        ]
        actual = self.cache_miss.compute_prefetch_keys(missed_key)

        self.assertEqual(expected, actual)
示例#3
0
    def test_add_to_prefetch(self):
        cuboid_dims = CUBOIDSIZE[0]
        # Cuboid dimensions.
        x_dim = cuboid_dims[0]
        y_dim = cuboid_dims[1]
        z_dim = cuboid_dims[2]

        cube_above = Cube.create_cube(self.resource, [x_dim, y_dim, z_dim])
        cube_above.random()

        # Write cuboid that are stacked vertically.
        self.sp.write_cuboid(self.resource, (0, 0, z_dim * 2), 0,
                             cube_above.data)

        cube_above.morton_id = ndlib.XYZMorton([0, 0, z_dim * 2 // z_dim])

        cube_above_cache_key = self.sp.kvio.generate_cached_cuboid_keys(
            self.resource, 0, [0], [cube_above.morton_id])

        # Make sure cuboid saved.
        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim * 2),
                                  (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)

        # Clear cache so we can test prefetch.
        self.sp.kvio.cache_client.flushdb()

        # Also clear cache state before running test.
        self.sp.cache_state.status_client.flushdb()

        obj_keys = self.sp.objectio.cached_cuboid_to_object_keys(
            cube_above_cache_key)

        # Place a cuboid in the pretch queue.
        self.sp.cache_state.status_client.rpush('PRE-FETCH', obj_keys[0])

        # This is the system under test.
        self.prefetch.process()

        # Wait for cube to be prefetched.
        i = 0
        while not self.sp.kvio.cube_exists(cube_above_cache_key[0]) and i < 30:
            time.sleep(1)
            i += 1

        # Confirm cuboid now in cache.
        self.assertTrue(self.sp.kvio.cube_exists(cube_above_cache_key[0]))

        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim * 2),
                                  (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)
示例#4
0
    def write_cuboid(self,
                     resource,
                     corner,
                     resolution,
                     cuboid_data,
                     time_sample_start=0,
                     iso=False):
        """ Write a 3D/4D volume to the key-value store. Used by API/cache in consistent mode as it reconciles writes

        If cuboid_data.ndim == 4, data in time-series format - assume t,z,y,x
        If cuboid_data.ndim == 3, data not in time-series format - assume z,y,x

        Args:
            resource (project.BossResource): Data model info based on the request or target resource
            corner ((int, int, int)): the xyz location of the corner of the cutout
            resolution (int): the resolution level
            cuboid_data (numpy.ndarray): Matrix of data to write as cuboids
            time_sample_start (int): if cuboid_data.ndim == 3, the time sample for the data
                                     if cuboid_data.ndim == 4, the time sample for cuboid_data[0, :, :, :]
            iso (bool): Flag indicating if you want to write to the "isotropic" version of a channel, if available

        Returns:
            None
        """
        boss_logger = BossLogger()
        boss_logger.setLevel("info")
        blog = boss_logger.logger

        # Check if the resource is locked
        if self.resource_locked(resource.get_lookup_key()):
            raise SpdbError(
                'Resource Locked',
                'The requested resource is locked due to excessive write errors. Contact support.',
                ErrorCodes.RESOURCE_LOCKED)

        # Check to make sure the user is writing data at the BASE RESOLUTION
        channel = resource.get_channel()
        if channel.base_resolution != resolution:
            raise SpdbError(
                'Resolution Mismatch',
                "You can only write data to a channel's base resolution. Base Resolution: {}, Request Resolution: {}"
                .format(channel.base_resolution,
                        resolution), ErrorCodes.RESOLUTION_MISMATCH)

        # Check if time-series
        if cuboid_data.ndim == 4:
            # Time-series - coords in xyz, data in zyx so shuffle to be consistent and drop time value
            dim = cuboid_data.shape[::-1][:-1]
            time_sample_stop = time_sample_start + cuboid_data.shape[0]

        elif cuboid_data.ndim == 3:
            # Not time-series - coords in xyz, data in zyx so shuffle to be consistent
            dim = cuboid_data.shape[::-1]
            cuboid_data = np.expand_dims(cuboid_data, axis=0)
            time_sample_stop = time_sample_start + 1
        else:
            raise SpdbError('Invalid Data Shape', 'Matrix must be 4D or 3D',
                            ErrorCodes.SPDB_ERROR)

        # Get the size of cuboids
        [x_cube_dim, y_cube_dim,
         z_cube_dim] = cube_dim = CUBOIDSIZE[resolution]

        # Round to the nearest larger cube in all dimensions
        [x_start, y_start, z_start] = list(map(floordiv, corner, cube_dim))

        z_num_cubes = (corner[2] + dim[2] + z_cube_dim -
                       1) // z_cube_dim - z_start
        y_num_cubes = (corner[1] + dim[1] + y_cube_dim -
                       1) // y_cube_dim - y_start
        x_num_cubes = (corner[0] + dim[0] + x_cube_dim -
                       1) // x_cube_dim - x_start

        [x_offset, y_offset, z_offset] = list(map(mod, corner, cube_dim))

        # Populate the data buffer
        data_buffer = np.zeros([time_sample_stop - time_sample_start] + [
            z_num_cubes * z_cube_dim, y_num_cubes * y_cube_dim,
            x_num_cubes * x_cube_dim
        ],
                               dtype=cuboid_data.dtype,
                               order="C")

        data_buffer[:, z_offset:z_offset + dim[2], y_offset:y_offset + dim[1],
                    x_offset:x_offset + dim[0]] = cuboid_data

        # Get keys ready
        experiment = resource.get_experiment()
        if iso is True and resolution > resource.get_isotropic_level(
        ) and experiment.hierarchy_method.lower() == "anisotropic":
            base_write_cuboid_key = "WRITE-CUBOID&ISO&{}&{}".format(
                resource.get_lookup_key(), resolution)
        else:
            base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(
                resource.get_lookup_key(), resolution)

        blog.info(
            "Writing Cuboid - Base Key: {}".format(base_write_cuboid_key))

        # Get current cube from db, merge with new cube, write back to the to db
        # TODO: Move splitting up data and computing morton into c-lib as single method
        page_out_cnt = 0
        for z in range(z_num_cubes):
            for y in range(y_num_cubes):
                for x in range(x_num_cubes):
                    # Get the morton ID for the cube
                    morton_idx = ndlib.XYZMorton(
                        [x + x_start, y + y_start, z + z_start])

                    # Get sub-cube
                    temp_cube = Cube.create_cube(
                        resource, [x_cube_dim, y_cube_dim, z_cube_dim],
                        [time_sample_start, time_sample_stop])
                    temp_cube.data = np.ascontiguousarray(
                        data_buffer[:, z * z_cube_dim:(z + 1) * z_cube_dim,
                                    y * y_cube_dim:(y + 1) * y_cube_dim,
                                    x * x_cube_dim:(x + 1) * x_cube_dim],
                        dtype=data_buffer.dtype)

                    # For each time sample put cube into write-buffer and add to temp page out key
                    for t in range(time_sample_start, time_sample_stop):
                        # Add cuboid to write buffer
                        write_cuboid_key = self.kvio.insert_cube_in_write_buffer(
                            base_write_cuboid_key, t, morton_idx,
                            temp_cube.to_blosc_by_time_index(t))

                        # Page Out Attempt Loop
                        temp_page_out_key = "TEMP&{}".format(uuid.uuid4().hex)
                        # Check for page out
                        if self.cache_state.in_page_out(
                                temp_page_out_key, resource.get_lookup_key(),
                                resolution, morton_idx, t):
                            blog.info(
                                "Writing Cuboid - Delayed Write: {}".format(
                                    write_cuboid_key))
                            # Delay Write!
                            self.cache_state.add_to_delayed_write(
                                write_cuboid_key, resource.get_lookup_key(),
                                resolution, morton_idx, t, resource.to_json())
                            # You are done. continue
                        else:
                            # Attempt to get write slot by checking page out
                            in_page_out = self.cache_state.add_to_page_out(
                                temp_page_out_key, resource.get_lookup_key(),
                                resolution, morton_idx, t)

                            if not in_page_out:
                                # Good to trigger lambda!
                                self.objectio.trigger_page_out(
                                    {
                                        "kv_config":
                                        self.kv_config,
                                        "state_config":
                                        self.state_conf,
                                        "object_store_config":
                                        self.object_store_config
                                    }, write_cuboid_key, resource)
                                page_out_cnt += 1
                                # All done. continue.
                            else:
                                # Ended up in page out during transaction. Make delayed write.
                                blog.info("Writing Cuboid - Delayed Write: {}".
                                          format(write_cuboid_key))
                                self.cache_state.add_to_delayed_write(
                                    write_cuboid_key,
                                    resource.get_lookup_key(), resolution,
                                    morton_idx, t, resource.to_json())
        blog.info("Triggered {} Page Out Operations".format(page_out_cnt))
示例#5
0
    def cutout(self,
               resource,
               corner,
               extent,
               resolution,
               time_sample_range=None,
               filter_ids=None,
               iso=False,
               no_cache=False):
        """Extract a cube of arbitrary size. Need not be aligned to cuboid boundaries.

        corner represents the location of the cutout and extent the size.  As an example in 1D, if asking for
        a corner of 3 and extent of 2, this would be the values at 3 and 4.

        Provide a list of ids to filter the cutout contents if desired.  The list must be convertible to a numpy array
        via numpy.asarray().

        Args:
            resource (spdb.project.BossResource): Data model info based on the request or target resource
            corner ((int, int, int)): the xyz location of the corner of the cutout
            extent ((int, int, int)): the xyz extents
            resolution (int): the resolution level
            time_sample_range (list((int)):  a range of time samples to get [start, stop). Default is [0,1) if omitted
            filter_ids (optional[list]): Defaults to None. Otherwise, is a list of uint64 ids to filter cutout by.
            iso (bool): Flag indicating if you want to get to the "isotropic" version of a cuboid, if available
            no_cache (bool): True to read directly from S3 and bypass the cache.

        Returns:
            cube.Cube: The cutout data stored in a Cube instance

        Raises:
            (SPDBError):
        """
        boss_logger = BossLogger()
        boss_logger.setLevel("info")
        blog = boss_logger.logger

        if not time_sample_range:
            # If not time sample list defined, used default of 0
            time_sample_range = [0, 1]

        # if cutout is below resolution, get a smaller cube and scaleup
        # ONLY FOR ANNO CHANNELS - if data is missing on the current resolution but exists elsewhere...extrapolate
        # resource.get_channel().base_resolution is the "base" resolution and you assume data exists there.
        # If downsampled you don't have to worry about this.
        # currently we don't upsample annotations when hardening the database, so don't need to check for propagated.

        # Create namedtuple for consistency with re-sampling paths through the code
        result_tuple = namedtuple(
            'ResampleCoords',
            ['corner', 'extent', 'x_pixel_offset', 'y_pixel_offset'])

        # Check if you need to scale a cutout due to off-base resolution cutout and the downsample state
        channel = resource.get_channel()
        if not channel.is_image():
            # The channel is an annotation so we can dynamically re-sample
            base_res = channel.base_resolution

            if base_res > resolution and not resource.is_downsampled():
                # Desired cutout is below base res in hierarchy (higher res image). Must up-sample cutout dynamically
                # Find the effective dimensions of the up-sampled cutout
                raise SpdbError(
                    'Not Implemented',
                    'Dynamic resolution up-sampling not yet implemented.',
                    ErrorCodes.FUTURE)

                # cutout_coords = self._up_sample_cutout(resource, corner, extent, resolution)

                # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res]
                # cutout_resolution = base_res

            elif not channel.is_image(
            ) and base_res < resolution and not resource.is_downsampled():
                # Currently, let's not support this. We can cutout a smaller cube and up-sample for the user, but do not
                # want to deal with cutting out large regions and down-sampling
                raise SpdbError(
                    'Not Implemented',
                    'Dynamic resolution down-sampling not yet implemented.',
                    ErrorCodes.FUTURE)
                # If cutout is an annotation channel, above base resolution (lower res), and NOT propagated, down-sample
                # cutout_coords = self._down_sample_cutout(resource, corner, extent, resolution)

                # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res]
                # cutout_resolution = base_res
            else:
                # this is the default path when not DYNAMICALLY scaling the resolution

                # get the size of the image and cube
                [x_cube_dim, y_cube_dim,
                 z_cube_dim] = cube_dim = CUBOIDSIZE[resolution]
                cutout_resolution = resolution

                # Create namedtuple for consistency with re-sampling paths through the code
                cutout_coords = result_tuple(corner, extent, None, None)
        else:
            # Resource is an image channel, so no re-sampling
            # get the size of the image and cube
            [x_cube_dim, y_cube_dim,
             z_cube_dim] = cube_dim = CUBOIDSIZE[resolution]
            cutout_resolution = resolution

            # Create namedtuple for consistency with re-sampling paths through the code
            cutout_coords = result_tuple(corner, extent, None, None)

        # Round to the nearest larger cube in all dimensions
        z_start = cutout_coords.corner[2] // z_cube_dim
        y_start = cutout_coords.corner[1] // y_cube_dim
        x_start = cutout_coords.corner[0] // x_cube_dim

        z_num_cubes = (cutout_coords.corner[2] + cutout_coords.extent[2] +
                       z_cube_dim - 1) // z_cube_dim - z_start
        y_num_cubes = (cutout_coords.corner[1] + cutout_coords.extent[1] +
                       y_cube_dim - 1) // y_cube_dim - y_start
        x_num_cubes = (cutout_coords.corner[0] + cutout_coords.extent[0] +
                       x_cube_dim - 1) // x_cube_dim - x_start

        # Initialize the final output cube (before trim operation since adding full cuboids)
        out_cube = Cube.create_cube(resource, [
            x_num_cubes * x_cube_dim, y_num_cubes * y_cube_dim,
            z_num_cubes * z_cube_dim
        ], time_sample_range)

        # Build a list of indexes to access
        # TODO: Move this for loop directly into c-lib
        list_of_idxs = []
        for z in range(z_num_cubes):
            for y in range(y_num_cubes):
                for x in range(x_num_cubes):
                    morton_idx = ndlib.XYZMorton(
                        [x + x_start, y + y_start, z + z_start])
                    list_of_idxs.append(morton_idx)

        # Sort the indexes in Morton order
        list_of_idxs.sort()

        # xyz offset stored for later use
        lowxyz = ndlib.MortonXYZ(list_of_idxs[0])

        # Get index of missing keys for cuboids to read
        missing_key_idx, cached_key_idx, all_keys = self.kvio.get_missing_read_cache_keys(
            resource,
            cutout_resolution,
            time_sample_range,
            list_of_idxs,
            iso=iso)
        # Wait for cuboids that are currently being written to finish
        start_time = datetime.now()
        dirty_keys = all_keys
        blog.debug(
            "Waiting for {} writes to finish before read can complete".format(
                len(dirty_keys)))
        while dirty_keys:
            dirty_flags = self.kvio.is_dirty(dirty_keys)
            dirty_keys_temp, clean_keys = [], []
            for key, flag in zip(dirty_keys, dirty_flags):
                (dirty_keys_temp if flag else clean_keys).append(key)
            dirty_keys = dirty_keys_temp

            if (datetime.now() - start_time).seconds > self.dirty_read_timeout:
                # Took too long! Something must have crashed
                raise SpdbError(
                    '{} second timeout reached while waiting for dirty cubes to be flushed.'
                    .format(self.dirty_read_timeout), ErrorCodes.ASYNC_ERROR)
            # Sleep a bit so you don't kill the DB
            time.sleep(0.05)

        #
        # All dirty cubes flushed, can begin reading.
        #

        s3_key_idx = []
        cache_cuboids = []
        s3_cuboids = []
        zero_cuboids = []

        if no_cache:
            # If not using the cache, then consider all keys are missing.
            blog.debug("Bypassing cache; loading all cuboids directly from S3")
            missing_key_idx = [i for i in range(len(all_keys))]

        if len(missing_key_idx) > 0:
            # There are keys that are missing in the cache
            # Get index of missing keys that are in S3
            s3_key_idx, zero_key_idx = self.objectio.cuboids_exist(
                all_keys, missing_key_idx)

            if len(s3_key_idx) > 0:
                if no_cache:
                    temp_keys = self.objectio.cached_cuboid_to_object_keys(
                        itemgetter(*s3_key_idx)(all_keys))

                    # Get objects
                    temp_cubes = self.objectio.get_objects(temp_keys)
                    # keys will be just the morton id and time sample.
                    keys_and_cubes = []
                    for key, cube in zip(temp_keys, temp_cubes):
                        vals = key.split("&")
                        keys_and_cubes.append(
                            (int(vals[-1]), int(vals[-2]), cube))
                    s3_cuboids = self.sort_cubes(resource, keys_and_cubes)
                else:
                    # Load data into cache.
                    blog.debug("Data missing from cache, but present in S3")

                    if len(s3_key_idx) > self.read_lambda_threshold:
                        # Trigger page-in of available blocks from object store and wait for completion
                        blog.debug("Triggering Lambda Page-in")
                        self.page_in_cubes(itemgetter(*s3_key_idx)(all_keys))
                    else:
                        # Read cuboids from S3 into cache directly
                        # Convert cuboid-cache keys to object keys
                        blog.debug("Paging-in Keys Directly")
                        temp_keys = self.objectio.cached_cuboid_to_object_keys(
                            itemgetter(*s3_key_idx)(all_keys))

                        # Get objects
                        temp_cubes = self.objectio.get_objects(temp_keys)

                        # write to cache
                        blog.debug("put keys on direct page in: {}".format(
                            itemgetter(*s3_key_idx)(all_keys)))
                        self.kvio.put_cubes(
                            itemgetter(*s3_key_idx)(all_keys), temp_cubes)

            if len(zero_key_idx) > 0:
                if not no_cache:
                    blog.debug("Data missing in cache, but not in S3")
                else:
                    blog.debug(
                        "No data for some keys, making cuboids with zeros")

                # Keys that don't exist in object store render as zeros
                [x_cube_dim, y_cube_dim, z_cube_dim] = CUBOIDSIZE[resolution]
                for idx in zero_key_idx:
                    parts, m_id = all_keys[idx].rsplit("&", 1)
                    _, t_start = parts.rsplit("&", 1)
                    temp_cube = Cube.create_cube(
                        resource, [x_cube_dim, y_cube_dim, z_cube_dim],
                        [int(t_start), int(t_start) + 1])
                    temp_cube.morton_id = int(m_id)
                    temp_cube.zeros()
                    zero_cuboids.append(temp_cube)

        # Get cubes from the cache database (either already there or freshly paged in)
        if not no_cache:
            # TODO: Optimize access to cache data and checking for dirty cubes
            if len(s3_key_idx) > 0:
                blog.debug("Get cubes from cache that were paged in from S3")
                blog.debug(itemgetter(*s3_key_idx)(all_keys))

                s3_cuboids = self.get_cubes(resource,
                                            itemgetter(*s3_key_idx)(all_keys))

                # Record misses that were found in S3 for possible pre-fetching
                self.cache_state.add_cache_misses(
                    itemgetter(*s3_key_idx)(all_keys))

            # Get previously cached cubes, waiting for dirty cubes to be updated if needed
            if len(cached_key_idx) > 0:
                blog.debug("Get cubes that were already present in the cache")

                # Get the cached keys once in list form
                cached_keys_list = itemgetter(*cached_key_idx)(all_keys)
                if isinstance(cached_keys_list, str):
                    cached_keys_list = [cached_keys_list]
                if isinstance(cached_keys_list, tuple):
                    cached_keys_list = list(cached_keys_list)

                # Split clean and dirty keys
                dirty_flags = self.kvio.is_dirty(cached_keys_list)
                dirty_keys, clean_keys = [], []
                for key, flag in zip(cached_keys_list, dirty_flags):
                    (dirty_keys if flag else clean_keys).append(key)

                # Get all the clean cubes immediately, removing them from the list of cached keys to get
                for k in clean_keys:
                    cached_keys_list.remove(k)
                cache_cuboids.extend(self.get_cubes(resource, clean_keys))

                # Get the dirty ones when you can with a timeout
                start_time = datetime.now()
                while dirty_keys:
                    dirty_flags = self.kvio.is_dirty(cached_keys_list)
                    dirty_keys, clean_keys = [], []
                    for key, flag in zip(cached_keys_list, dirty_flags):
                        (dirty_keys if flag else clean_keys).append(key)

                    if clean_keys:
                        # Some keys are ready now. Remove from list and get them
                        for k in clean_keys:
                            cached_keys_list.remove(k)
                        cache_cuboids.extend(
                            self.get_cubes(resource, clean_keys))

                    if (datetime.now() -
                            start_time).seconds > self.dirty_read_timeout:
                        # Took too long! Something must have crashed
                        raise SpdbError(
                            '{} second timeout reached while waiting for dirty cubes to be flushed.'
                            .format(self.dirty_read_timeout),
                            ErrorCodes.ASYNC_ERROR)

                    # Sleep a bit so you don't kill the DB
                    time.sleep(0.05)

        #
        # At this point, have all cuboids whether or not the cache was used.
        #

        # Add all cuboids (which have all time samples packed in already) to final cube of data
        for cube in cache_cuboids + s3_cuboids + zero_cuboids:
            # Compute offset so data inserted properly
            curxyz = ndlib.MortonXYZ(cube.morton_id)
            offset = [
                curxyz[0] - lowxyz[0], curxyz[1] - lowxyz[1],
                curxyz[2] - lowxyz[2]
            ]

            # add it to the output cube
            out_cube.add_data(cube, offset)

        # A smaller cube was cutout due to off-base resolution query: up-sample and trim
        base_res = channel.base_resolution
        if not channel.is_image(
        ) and base_res > resolution and not resource.is_downsampled():
            raise SpdbError(
                'Not Implemented',
                'Dynamic resolution up-sampling not yet implemented.',
                ErrorCodes.FUTURE)
            # TODO: implement dynamic re-sampling
            # out_cube.zoomData(base_res - resolution)

            # need to trim based on the cube cutout at new resolution
            # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.x_pixel_offset,
            #               extent[0],
            #               corner[1] % (y_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.y_pixel_offset,
            #               extent[1],
            #               corner[2] % z_cube_dim,
            #               extent[2])

        # A larger cube was cutout due to off-base resolution query: down-sample and trim
        elif not channel.is_image(
        ) and base_res < resolution and not resource.is_downsampled():
            raise SpdbError(
                'Not Implemented',
                'Dynamic resolution down-sampling not yet implemented.',
                ErrorCodes.FUTURE)
            # out_cube.downScale(resolution - base_res)
            # # need to trim based on the cube cutout at new resolution
            # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))),
            #               extent[0],
            #               corner[1] % (y_cube_dim * (2 ** (base_res - resolution))),
            #               extent[1],
            #               corner[2] % z_cube_dim,
            #               extent[2])

        # Trim cube since cutout was not cuboid aligned
        elif extent[0] % x_cube_dim == 0 and \
             extent[1] % y_cube_dim == 0 and \
             extent[2] % z_cube_dim == 0 and \
             corner[0] % x_cube_dim == 0 and \
             corner[1] % y_cube_dim == 0 and \
             corner[2] % z_cube_dim == 0:
            # Cube is already the correct dimensions
            pass
        else:
            out_cube.trim(corner[0] % x_cube_dim, extent[0],
                          corner[1] % y_cube_dim, extent[1],
                          corner[2] % z_cube_dim, extent[2])

        # Filter out ids not in list.
        if filter_ids is not None:
            try:
                out_cube.data = ndlib.filter_ctype_OMP(out_cube.data,
                                                       filter_ids)
            except ValueError as ve:
                raise SpdbError(
                    'filter_ids probably not convertible to numpy uint64 array: {}'
                    .format(ve), ErrorCodes.DATATYPE_MISMATCH) from ve
            except:
                raise SpdbError('unknown error filtering cutout',
                                ErrorCodes.SPDB_ERROR)

        return out_cube
示例#6
0
 def morton(self):
     return ndlib.XYZMorton((self.x, self.y, self.z))
示例#7
0
    def test_sqs_watcher_send_message(self):
        """Inject message into queue and test that SqsWatcher kicks off a lambda and writes cuboid to s3."""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config,
                       self.object_store_config)

        base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(
            self.resource.get_lookup_key(), 0)
        morton_idx = ndlib.XYZMorton([0, 0, 0])
        t = 0
        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(
            base_write_cuboid_key, t, morton_idx,
            cube1.to_blosc_by_time_index(t))

        # Put page out job on the queue
        sqs = boto3.client('sqs', region_name=get_region())

        msg_data = {
            "config": self.config_data,
            "write_cuboid_key": write_cuboid_key,
            "lambda-name": "s3_flush",
            "resource": self.resource.to_dict()
        }

        response = sqs.send_message(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            MessageBody=json.dumps(msg_data))
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200

        watcher = SqsWatcher(self.lambda_data)
        #  verify_queue() needs the be run multiple times to verify that the queue is not changing
        #  only then does it send off a lambda message.
        time.sleep(5)
        watcher.verify_queue()
        time.sleep(5)
        lambdas_invoked = watcher.verify_queue()
        if lambdas_invoked < 1:
            time.sleep(5)
            watcher.verify_queue()
        time.sleep(15)

        client = boto3.client('sqs', region_name=get_region())
        response = client.get_queue_attributes(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            AttributeNames=[
                'ApproximateNumberOfMessages',
                'ApproximateNumberOfMessagesNotVisible'
            ])
        https_status_code = response['ResponseMetadata']['HTTPStatusCode']
        queue_count = int(
            response['Attributes']['ApproximateNumberOfMessages'])
        # test that the queue count is now 0
        assert queue_count == 0

        s3 = boto3.client('s3', region_name=get_region())
        objects_list = s3.list_objects(
            Bucket=self.object_store_config['cuboid_bucket'])
        # tests that bucket has some Contents.
        assert "Contents" in objects_list.keys()