def _upload_to_zoo(exp_cfg: dict, collect_dir: str, upload_dir: str) -> None: src_uris = {} dst_uris = {} src_uris['eval'] = join(collect_dir, 'eval', 'eval.json') src_uris['bundle'] = join(collect_dir, 'bundle', 'model-bundle.zip') src_uris['sample_predictions'] = join(collect_dir, 'sample-predictions') dst_uris['eval'] = join(upload_dir, 'eval.json') dst_uris['bundle'] = join(upload_dir, 'model-bundle.zip') dst_uris['sample_predictions'] = join(upload_dir, 'sample-predictions') assert len(src_uris) == len(dst_uris) for k, src in src_uris.items(): dst = dst_uris[k] if not exists(src): console_failure(f'{k}: {src} not found.') if isfile(src): console_info(f'Uploading {k} file: {src} to {dst}.') upload_or_copy(src, dst) elif isdir(src): console_info(f'Syncing {k} dir: {src} to {dst}.') sync_to_dir(src, dst) else: raise ValueError()
def test_download_if_needed_local(self): with self.assertRaises(NotReadableError): file_to_str(self.local_path) str_to_file(self.content_str, self.local_path) upload_or_copy(self.local_path, self.local_path) local_path = download_if_needed(self.local_path, self.tmp_dir.name) self.assertEqual(local_path, self.local_path)
def test_file_exists_s3_true(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') directory = os.path.dirname(path) make_dir(directory, check_empty=False) str_to_file(self.lorem, path) s3_path = 's3://{}/lorem.txt'.format(self.bucket_name) upload_or_copy(path, s3_path) self.assertTrue(file_exists(s3_path))
def test_copy_to_local(self): path1 = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') path2 = os.path.join(self.tmp_dir.name, 'yyy', 'ipsum.txt') dir1 = os.path.dirname(path1) dir2 = os.path.dirname(path2) make_dir(dir1, check_empty=False) make_dir(dir2, check_empty=False) str_to_file(self.lorem, path1) upload_or_copy(path1, path2) self.assertEqual(len(list_paths(dir2)), 1)
def test_list_paths_s3(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name) s3_directory = 's3://{}/xxx/'.format(self.bucket_name) directory = os.path.dirname(path) make_dir(directory, check_empty=False) str_to_file(self.lorem, path) upload_or_copy(path, s3_path) list_paths(s3_directory) self.assertEqual(len(list_paths(s3_directory)), 1)
def test_last_modified_s3(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum1.txt') s3_path = 's3://{}/lorem1.txt'.format(self.bucket_name) directory = os.path.dirname(path) make_dir(directory, check_empty=False) fs = FileSystem.get_file_system(s3_path, 'r') str_to_file(self.lorem, path) upload_or_copy(path, s3_path) stamp = fs.last_modified(s3_path) self.assertTrue(isinstance(stamp, datetime.datetime))
def test_download_if_needed_s3(self): with self.assertRaises(NotReadableError): file_to_str(self.s3_path) str_to_file(self.content_str, self.local_path) upload_or_copy(self.local_path, self.s3_path) local_path = download_if_needed(self.s3_path, self.tmp_dir.name) content_str = file_to_str(local_path) self.assertEqual(self.content_str, content_str) wrong_path = 's3://wrongpath/x.txt' with self.assertRaises(NotWritableError): upload_or_copy(local_path, wrong_path)
def __exit__(self, type, value, traceback): """ This writes a zip file for a group of scenes at {output_uri}/{uuid}.zip. This method is called once per instance of the chip command. A number of instances of the chip command can run simultaneously to process chips in parallel. The uuid in the zip path above is what allows separate instances to avoid overwriting each others' output. """ output_path = join(self.tmp_dir_obj.name, 'output.zip') zipdir(self.sample_dir, output_path) upload_or_copy(output_path, self.output_uri) self.tmp_dir_obj.cleanup()
def test_file_exists(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') s3_path = 's3://{}/xxx/lorem.txt'.format(self.bucket_name) s3_path_prefix = 's3://{}/xxx/lorem'.format(self.bucket_name) s3_directory = 's3://{}/xxx/'.format(self.bucket_name) make_dir(path, check_empty=False, use_dirname=True) str_to_file(self.lorem, path) upload_or_copy(path, s3_path) self.assertTrue(file_exists(s3_directory, include_dir=True)) self.assertTrue(file_exists(s3_path, include_dir=False)) self.assertFalse(file_exists(s3_path_prefix, include_dir=True)) self.assertFalse(file_exists(s3_directory, include_dir=False)) self.assertFalse( file_exists(s3_directory + 'NOTPOSSIBLE', include_dir=False))
def crop_image(image_uri, window, crop_uri): im_dataset = rasterio.open(image_uri) rasterio_window = window.rasterio_format() im = im_dataset.read(window=rasterio_window) with TemporaryDirectory() as tmp_dir: crop_path = get_local_path(crop_uri, tmp_dir) make_dir(crop_path, use_dirname=True) meta = im_dataset.meta meta['width'], meta['height'] = window.get_width(), window.get_height() meta['transform'] = rasterio.windows.transform(rasterio_window, im_dataset.transform) with rasterio.open(crop_path, 'w', **meta) as dst: dst.colorinterp = im_dataset.colorinterp dst.write(im) upload_or_copy(crop_path, crop_uri)
def test_copy_to_http(self): path = os.path.join(self.tmp_dir.name, 'lorem', 'ipsum.txt') dst = 'http://localhost/' directory = os.path.dirname(path) make_dir(directory, check_empty=False) str_to_file(self.lorem, path) self.assertRaises(NotWritableError, lambda: upload_or_copy(path, dst)) os.remove(path)
def create_cog(source_uri, dest_uri, local_dir, block_size=DEFAULT_BLOCK_SIZE, resample_method=DEFAULT_RESAMPLE_METHOD, compression=DEFAULT_COMPRESSION, overviews=None): local_path = download_or_copy(source_uri, local_dir) commands, output_path = gdal_cog_commands( local_path, local_dir, block_size=block_size, resample_method=resample_method, compression=compression, overviews=overviews) for command in commands: run_cmd(command) upload_or_copy(output_path, dest_uri)
def write_vector_outputs(self, labels: SemanticSegmentationLabels) -> None: """Write vectorized outputs for all configs in self.vector_outputs.""" import mask_to_polygons.vectorification as vectorification import mask_to_polygons.processing.denoise as denoise log.info('Writing vector output to disk.') label_arr = self._labels_to_full_label_arr(labels) with click.progressbar(self.vector_outputs) as bar: for i, vo in enumerate(bar): if vo.uri is None: log.info(f'Skipping VectorOutputConfig at index {i} ' 'due to missing uri.') continue uri = get_local_path(vo.uri, self.tmp_dir) denoise_radius = vo.denoise mode = vo.get_mode() class_mask = (label_arr == vo.class_id).astype(np.uint8) def transform(x, y): return self.crs_transformer.pixel_to_map((x, y)) if denoise_radius > 0: class_mask = denoise.denoise(class_mask, denoise_radius) if mode == 'buildings': geojson = vectorification.geojson_from_mask( mask=class_mask, transform=transform, mode=mode, min_aspect_ratio=vo.min_aspect_ratio, min_area=vo.min_area, width_factor=vo.element_width_factor, thickness=vo.element_thickness) elif mode == 'polygons': geojson = vectorification.geojson_from_mask( mask=class_mask, transform=transform, mode=mode) str_to_file(geojson, uri) upload_or_copy(uri, vo.uri)
def save(self, labels): """Save. Args: labels - (SemanticSegmentationLabels) labels to be saved """ local_path = get_local_path(self.uri, self.tmp_dir) make_dir(local_path, use_dirname=True) transform = self.crs_transformer.get_affine_transform() crs = self.crs_transformer.get_image_crs() band_count = 1 dtype = np.uint8 if self.class_trans: band_count = 3 mask = (np.zeros((self.extent.ymax, self.extent.xmax), dtype=np.uint8) if self.vector_output else None) # https://github.com/mapbox/rasterio/blob/master/docs/quickstart.rst # https://rasterio.readthedocs.io/en/latest/topics/windowed-rw.html with rasterio.open(local_path, 'w', driver='GTiff', height=self.extent.ymax, width=self.extent.xmax, count=band_count, dtype=dtype, transform=transform, crs=crs) as dataset: for window in labels.get_windows(): label_arr = labels.get_label_arr(window) window = window.intersection(self.extent) label_arr = label_arr[0:window.get_height(), 0:window.get_width()] if mask is not None: mask[window.ymin:window.ymax, window.xmin:window.xmax] = label_arr window = window.rasterio_format() if self.class_trans: rgb_labels = self.class_trans.class_to_rgb(label_arr) for chan in range(3): dataset.write_band(chan + 1, rgb_labels[:, :, chan], window=window) else: img = label_arr.astype(dtype) dataset.write_band(1, img, window=window) upload_or_copy(local_path, self.uri) if self.vector_output: import mask_to_polygons.vectorification as vectorification import mask_to_polygons.processing.denoise as denoise for vo in self.vector_output: denoise_radius = vo.denoise uri = vo.uri mode = vo.get_mode() class_id = vo.class_id class_mask = np.array(mask == class_id, dtype=np.uint8) def transform(x, y): return self.crs_transformer.pixel_to_map((x, y)) if denoise_radius > 0: class_mask = denoise.denoise(class_mask, denoise_radius) if uri and mode == 'buildings': geojson = vectorification.geojson_from_mask( mask=class_mask, transform=transform, mode=mode, min_aspect_ratio=vo.min_aspect_ratio, min_area=vo.min_area, width_factor=vo.element_width_factor, thickness=vo.element_thickness) elif uri and mode == 'polygons': geojson = vectorification.geojson_from_mask( mask=class_mask, transform=transform, mode=mode) str_to_file(geojson, uri)
def _zxy2geotiff(tile_schema, zoom, bounds, output_uri, make_cog=False): """Generates a GeoTIFF of a bounded region from a ZXY tile server. Args: tile_schema: (str) the URI schema for zxy tiles (ie. a slippy map tile server) of the form /tileserver-uri/{z}/{x}/{y}.png. If {-y} is used, the tiles are assumed to be indexed using TMS coordinates, where the y axis starts at the southernmost point. The URI can be for http, S3, or the local file system. zoom: (int) the zoom level to use when retrieving tiles bounds: (list) a list of length 4 containing min_lat, min_lng, max_lat, max_lng output_uri: (str) where to save the GeoTIFF. The URI can be for http, S3, or the local file system """ min_lat, min_lng, max_lat, max_lng = bounds if min_lat >= max_lat: raise ValueError('min_lat must be < max_lat') if min_lng >= max_lng: raise ValueError('min_lng must be < max_lng') is_tms = False if '{-y}' in tile_schema: tile_schema = tile_schema.replace('{-y}', '{y}') is_tms = True tmp_dir_obj = tempfile.TemporaryDirectory() tmp_dir = tmp_dir_obj.name # Get range of tiles that cover bounds. output_path = get_local_path(output_uri, tmp_dir) tile_sz = 256 t = mercantile.tile(min_lng, max_lat, zoom) xmin, ymin = t.x, t.y t = mercantile.tile(max_lng, min_lat, zoom) xmax, ymax = t.x, t.y # The supplied bounds are contained within the "tile bounds" -- ie. the # bounds of the set of tiles that covers the supplied bounds. Therefore, # we need to crop out the imagery that lies within the supplied bounds. # We do this by computing a top, bottom, left, and right offset in pixel # units of the supplied bounds against the tile bounds. Getting the offsets # in pixel units involves converting lng/lat to web mercator units since we # assume that is the CRS of the tiles. These offsets are then used to crop # individual tiles and place them correctly into the output raster. nw_merc_x, nw_merc_y = lnglat2merc(min_lng, max_lat) left_pix_offset, top_pix_offset = merc2pixel(xmin, ymin, zoom, nw_merc_x, nw_merc_y) se_merc_x, se_merc_y = lnglat2merc(max_lng, min_lat) se_left_pix_offset, se_top_pix_offset = merc2pixel(xmax, ymax, zoom, se_merc_x, se_merc_y) right_pix_offset = tile_sz - se_left_pix_offset bottom_pix_offset = tile_sz - se_top_pix_offset uncropped_height = tile_sz * (ymax - ymin + 1) uncropped_width = tile_sz * (xmax - xmin + 1) height = uncropped_height - top_pix_offset - bottom_pix_offset width = uncropped_width - left_pix_offset - right_pix_offset transform = rasterio.transform.from_bounds(nw_merc_x, se_merc_y, se_merc_x, nw_merc_y, width, height) with rasterio.open(output_path, 'w', driver='GTiff', height=height, width=width, count=3, crs='epsg:3857', transform=transform, dtype=rasterio.uint8) as dataset: out_x = 0 for xi, x in enumerate(range(xmin, xmax + 1)): tile_xmin, tile_xmax = 0, tile_sz - 1 if x == xmin: tile_xmin += left_pix_offset if x == xmax: tile_xmax -= right_pix_offset window_width = tile_xmax - tile_xmin + 1 out_y = 0 for yi, y in enumerate(range(ymin, ymax + 1)): tile_ymin, tile_ymax = 0, tile_sz - 1 if y == ymin: tile_ymin += top_pix_offset if y == ymax: tile_ymax -= bottom_pix_offset window_height = tile_ymax - tile_ymin + 1 # Convert from xyz to tms if needed. # https://gist.github.com/tmcw/4954720 if is_tms: y = (2**zoom) - y - 1 tile_uri = tile_schema.format(x=x, y=y, z=zoom) tile_path = download_if_needed(tile_uri, tmp_dir) img = np.array(Image.open(tile_path)) img = img[tile_ymin:tile_ymax + 1, tile_xmin:tile_xmax + 1, :] window = Window(out_x, out_y, window_width, window_height) dataset.write(np.transpose(img[:, :, 0:3], (2, 0, 1)), window=window) out_y += window_height out_x += window_width if make_cog: create_cog(output_path, output_uri, tmp_dir) else: upload_or_copy(output_path, output_uri)