def generate_scenes(self): """Create a Scene and associated Image for each GeoTiff in self.s3_path Returns: Generator of Scenes """ s3 = boto3.resource('s3') for infile in self.files: # We can't use the temp file as a context manager because it'll be opened/closed multiple # times and by default is deleted when it's closed. So we use try/finally to ensure that # it gets cleaned up. bucket_name, key = s3_bucket_and_key_from_url(infile) filename = os.path.basename(key) logger.info('Downloading %s => %s', infile, filename) bucket = s3.Bucket(bucket_name) with get_tempdir() as tempdir: tmp_fname = os.path.join(tempdir, filename) bucket.download_file(key, tmp_fname) cog.add_overviews(tmp_fname) cog_path = cog.convert_to_cog(tmp_fname, tempdir) scene = self.create_geotiff_scene(tmp_fname, os.path.splitext(filename)[0]) scene.ingestLocation = upload_tifs([cog_path], self.owner, scene.id)[0] images = [self.create_geotiff_image( tmp_fname, urllib.unquote(scene.ingestLocation), scene, cog_path )] scene.thumbnails = [] scene.images = images yield scene
def generate_scenes(self): """Create a Scene and associated Image for each GeoTiff in self.s3_path Returns: Generator of Scenes """ s3 = boto3.resource('s3') for infile in self.files: # We can't use the temp file as a context manager because it'll be opened/closed multiple # times and by default is deleted when it's closed. So we use try/finally to ensure that # it gets cleaned up. bucket_name, key = s3_bucket_and_key_from_url(infile) filename = os.path.basename(key) logger.info('Downloading %s => %s', infile, filename) bucket = s3.Bucket(bucket_name) with get_tempdir() as tempdir: tmp_fname = os.path.join(tempdir, filename) bucket.download_file(key, tmp_fname) cog.add_overviews(tmp_fname) cog_path = cog.convert_to_cog(tmp_fname, tempdir) scene = self.create_geotiff_scene( tmp_fname, os.path.splitext(filename)[0]) scene.ingestLocation = upload_tifs([cog_path], self.owner, scene.id)[0] images = [ self.create_geotiff_image( tmp_fname, urllib.unquote(scene.ingestLocation), scene, cog_path) ] scene.thumbnails = [] scene.images = images yield scene
def fetch_image(location, filename, local_dir): bucket, key = s3_bucket_and_key_from_url(location) if bucket.startswith('sentinel'): extra_kwargs = {'RequestPayer': 'requester'} else: extra_kwargs = {} # both sentinel and landsat have these uris in bucket.s3.amazonaws.com/..., # so bucket and key from url does a bad job splitting correctly. follow up # by splitting on '.' and taking the first one bucket = bucket.split('.')[0] logger.info('Fetching image from bucket %s with key %s', bucket, key) dst = os.path.join(local_dir, filename) with open(dst, 'w') as outf: outf.write( s3client.get_object(Bucket=bucket, Key=key, **extra_kwargs)['Body'].read())
def fetch_image(location, filename, local_dir): bucket, key = s3_bucket_and_key_from_url(location) if bucket.startswith('sentinel'): extra_kwargs = {'RequestPayer': 'requester'} else: extra_kwargs = {} # both sentinel and landsat have these uris in bucket.s3.amazonaws.com/..., # so bucket and key from url does a bad job splitting correctly. follow up # by splitting on '.' and taking the first one bucket = bucket.split('.')[0] logger.info('Fetching image from bucket %s with key %s', bucket, key) dst = os.path.join(local_dir, filename) with open(dst, 'w') as outf: outf.write( s3client.get_object(Bucket=bucket, Key=key, **extra_kwargs)['Body'].read())
def process_jp2000(scene_id, jp2_source): """Converts a Jpeg 2000 file to a tif Args: scene_id (str): scene the image is associated with jp2_source (str): url to a jpeg 2000 file Return: str: s3 url to the converted tif """ with get_tempdir() as temp_dir: s3client = boto3.client('s3') in_bucket, in_key = s3_bucket_and_key_from_url(jp2_source) in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '') fname_part = os.path.split(in_key)[-1] out_bucket = os.getenv('DATA_BUCKET') out_key = os.path.join('sentinel-2-tifs', scene_id, fname_part.replace('.jp2', '.tif')) jp2_fname = os.path.join(temp_dir, fname_part) temp_tif_fname = jp2_fname.replace('.jp2', '-temp.tif') tif_fname = jp2_fname.replace('.jp2', '.tif') # Explicitly setting nbits is necessary because geotrellis only likes # powers of 2, and for some reason the value on the jpeg 2000 files # after translation is 15 temp_translate_cmd = [ 'gdal_translate', '-a_nodata', '0', # set 0 to nodata value '-co', 'NBITS=16', # explicitly set nbits = 16 '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', jp2_fname, temp_tif_fname ] warp_cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', '-t_srs', 'epsg:3857', temp_tif_fname, tif_fname ] dst_url = geotiff_io.s3_url(out_bucket, out_key) # Download the original jp2000 file logger.info('Downloading JPEG2000 file locally (%s/%s => %s)', in_bucket, in_key, jp2_fname) with open(jp2_fname, 'wb') as src: body = s3client.get_object(Bucket=in_bucket, Key=in_key, RequestPayer='requester')['Body'] src.write(body.read()) logger.info('Running translate command to convert to TIF') # Translate the original file and add 0 as a nodata value subprocess.check_call(temp_translate_cmd) logger.info('Running warp command to convert to web mercator') subprocess.check_call(warp_cmd) # Upload the converted tif logger.info('Uploading TIF to S3 (%s => %s/%s)', tif_fname, out_bucket, out_key) with open(tif_fname, 'r') as dst: s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst) # Return the s3 url to the converted image return dst_url