def create_collections(collections): dg_collection = Collection.open( os.path.join(root_url, 'DGOpenData', 'catalog.json')) # Create collections if not exist current_cat_names = [ x.split('/')[-2] for x in dg_collection.links(rel='child') ] out_d = {} for coll in collections: if coll['id'] not in current_cat_names: print("Creating new collection: {}".format(coll['id'])) new_coll = Collection(coll) dg_collection.add_catalog(new_coll) out_d.update({coll['id']: new_coll}) dg_collection.save() else: print("Opening existing collection: {}".format(coll['id'])) out_d.update({ coll['id']: Collection.open( os.path.join(root_url, 'DGOpenData', coll['id'], 'catalog.json')) }) return out_d
def create_project_collections(self, projects, ds_name): with open(projects, 'r') as geoj: data = json.load(geoj) for feat in data['features']: if feat['extent']['temporal'][0]: year = feat['extent']['temporal'][0].split('-')[0] year_cat = Catalog.open( os.path.join(self.root, ds_name, year, 'catalog.json')) coll = Collection(feat) year_cat.add_catalog(coll) year_cat.save()
def toSTAC(self, collectionDir: str) -> Collection: dataset = Dataset(self.filePath()) dims = {name: dim.size for name, dim in dataset.dimensions.items()} aid = os.path.dirname(self.base) stacFile = os.path.join(collectionDir, f"{aid}.json") print(f"Generating STAC spec for agg {aid} at '{stacFile}'") metadata = dict( id=aid, description="", location=self.base, extent="", properties=dict(nFiles=self.nFiles, nTs=self.nTs, paths=self.paths, vars=self.vars, dims=dims), ) collection = Collection(data=metadata, filename=stacFile) return collection
def build_stac_catalog(id_list=None, verbose=False): prefix = '/data/' tempdir = tempfile.mkdtemp(prefix=prefix) tempthumbs = tempfile.mkdtemp(prefix=prefix) print("Catalog tempdir: {}".format(tempdir)) print("Thumbnails tempdir: {}".format(tempthumbs)) NoaaStormCatalog.verbose = verbose print("Running web scraper.") with ScrapyRunner(NoaaStormCatalog) as runner: scraped_items = list(runner.execute(ids=id_list)) collections = scraped_items.pop(0) item_count = scraped_items.pop(0) collections = create_collections(collections, scraped_items, id_list) # Build stac catalog locally root_catalog = Catalog.open(os.path.join(ROOT_URL, 'catalog.json')) root_catalog.save_as(filename=os.path.join(tempdir, 'catalog.json')) # NOAA Storm catalog os.mkdir(os.path.join(tempdir, 'NOAAStorm')) noaa_storm_cat = Catalog.open( os.path.join(ROOT_URL, 'NOAAStorm', 'catalog.json')) noaa_storm_cat.save_as( filename=os.path.join(tempdir, 'NOAAStorm', 'catalog.json')) print("Creating collections.") d = {} for collection in collections: coll = Collection(collection) noaa_storm_cat.add_catalog(coll) d.update({collection['id']: coll}) # Setup directories for thumbnails thumbdir = os.path.join(tempthumbs, 'thumbnails') os.mkdir(thumbdir) for coll in d: coll_dir = os.path.join(thumbdir, d[coll].id) if not os.path.exists(coll_dir): os.mkdir(coll_dir) # Sort assets archive_assets = [] for item in scraped_items: if 'archive' in item: if item['archive'].endswith('_RGB.tar'): archive_assets.append( RGBArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) elif item['archive'].endswith( ('GCS_NAD83.tar', 'GCS_NAD83.zip')): archive_assets.append( JpegTilesArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) elif item['archive'].endswith(('Oblique.tar', 'Oblique.zip')): archive_assets.append( ObliqueArchive( item, os.path.join(thumbdir, d[item['event_name']].id))) else: print("Found a JPG with disconnected world file") # Download archives download_archives(archive_assets, prefix) print("Creating items and thumbnails.") # Add items for item in build_stac_items(archive_assets): d[item['collection']].add_item(Item(item), path='${date}', filename='${id}') # Update spatial extent of collection try: if item['bbox'][0] < d[ item['collection']].extent['spatial'][0]: d[item['collection']].extent['spatial'][0] = item['bbox'][ 0] if item['bbox'][1] < d[ item['collection']].extent['spatial'][1]: d[item['collection']].extent['spatial'][1] = item['bbox'][ 1] if item['bbox'][2] < d[ item['collection']].extent['spatial'][2]: d[item['collection']].extent['spatial'][2] = item['bbox'][ 2] if item['bbox'][3] < d[ item['collection']].extent['spatial'][3]: d[item['collection']].extent['spatial'][3] = item['bbox'][ 3] except: d[item['collection']].extent['spatial'] = item['bbox'] # Update temporal extent of collection try: item_dt = load_datetime(item['properties']['datetime']) min_dt = load_datetime( d[item['collection']].extent['temporal'][0]) max_dt = load_datetime( d[item['collection']].extent['temporal'][1]) if item_dt < min_dt: d[item['collection']].extent['temporal'][0] = item[ 'properties']['datetime'] if item_dt > max_dt: d[item['collection']].extent['temporal'][1] = item[ 'properites']['datetime'] except: d[item['collection']].extent['temporal'] = [ item['properties']['datetime'], item['properties']['datetime'] ] # Upload catalog to S3 print("Uploading catalog to S3.") subprocess.call(f"aws s3 sync {tempdir} s3://cognition-disaster-data/", shell=True) print("Uploading thumbnails to S3.") # Upload thumbnails to S3 subprocess.call( f"aws s3 sync {thumbdir} s3://cognition-disaster-data/thumbnails/", shell=True) cleanup(prefix)
def collection(cls, cid): """ Get a Collection record """ url = op.join(config.API_URL, 'collections', cid) return Collection(cls.query(url=url))
def collection(self, cid, headers=None): """ Get a Collection record """ url = urljoin(self.url, 'collections/%s' % cid) return Collection(self.query(url=url, headers=headers))