def test_get_s3_public_url(self): envs = dict(os.environ) if 'AWS_ACCESS_KEY_ID' in envs: del os.environ['AWS_ACCESS_KEY_ID'] if 'AWS_BUCKET_ACCESS_KEY_ID' in envs: del os.environ['AWS_BUCKET_ACCESS_KEY_ID'] url = utils.get_s3_signed_url(self.remote_url) assert (len(url) == 2) assert (url[0] == self.remote_url) assert (url[1] is None) os.environ.clear() os.environ.update(envs)
def test_get_s3_signed_url(self): url = utils.get_s3_signed_url(self.remote_url) assert (len(url) == 2)
def add_items(catalog, records, start_date=None, end_date=None, s3meta=False, prefix=None, publish=None): """ Stream records to a collection with a transform function Keyword arguments: start_date -- Process this date and after end_date -- Process this date and earlier s3meta -- Retrieve metadata from s3 rather than Sinergise URL (roda) """ # use existing collection or create new one if it doesn't exist cols = {c.id: c for c in catalog.collections()} if 'sentinel-2-l1c' not in cols.keys(): catalog.add_catalog(_collection) cols = {c.id: c for c in catalog.collections()} collection = cols['sentinel-2-l1c'] client = None if publish: parts = publish.split(':') client = boto3.client('sns', region_name=parts[3]) duration = [] # iterate through records for i, record in enumerate(records): start = datetime.now() if i % 50000 == 0: logger.info('%s: Scanned %s records' % (start, str(i))) dt = record['datetime'].date() if prefix is not None: # if path doesn't match provided prefix skip to next record if record['path'][:len(prefix)] != prefix: continue if s3meta: url = op.join(SETTINGS['s3_url'], record['path']) else: url = op.join(SETTINGS['roda_url'], record['path']) #if i == 10: # break if (start_date is not None and dt < start_date) or (end_date is not None and dt > end_date): # skip to next if before start_date continue try: if s3meta: signed_url, headers = utils.get_s3_signed_url( url, requestor_pays=True) resp = requests.get(signed_url, headers=headers) metadata = json.loads(resp.text) else: metadata = read_remote(url) item = transform(metadata) except Exception as err: logger.error('Error creating STAC Item %s: %s' % (record['path'], err)) continue try: collection.add_item(item, path=SETTINGS['path_pattern'], filename=SETTINGS['fname_pattern']) if client: client.publish(TopicArn=publish, Message=json.dumps(item.data)) duration.append((datetime.now() - start).total_seconds()) logger.info('Ingested %s in %s' % (item.filename, duration[-1])) except Exception as err: logger.error('Error adding %s: %s' % (item.id, err)) logger.info('Read in %s records averaging %4.2f sec (%4.2f stddev)' % (i, np.mean(duration), np.std(duration)))