Exemplo n.º 1
0
def create_collections(collections):
    dg_collection = Collection.open(
        os.path.join(root_url, 'DGOpenData', 'catalog.json'))

    # Create collections if not exist
    current_cat_names = [
        x.split('/')[-2] for x in dg_collection.links(rel='child')
    ]

    out_d = {}
    for coll in collections:
        if coll['id'] not in current_cat_names:
            print("Creating new collection: {}".format(coll['id']))
            new_coll = Collection(coll)
            dg_collection.add_catalog(new_coll)
            out_d.update({coll['id']: new_coll})
            dg_collection.save()
        else:
            print("Opening existing collection: {}".format(coll['id']))
            out_d.update({
                coll['id']:
                Collection.open(
                    os.path.join(root_url, 'DGOpenData', coll['id'],
                                 'catalog.json'))
            })
    return out_d
Exemplo n.º 2
0
 def create_project_collections(self, projects, ds_name):
     with open(projects, 'r') as geoj:
         data = json.load(geoj)
         for feat in data['features']:
             if feat['extent']['temporal'][0]:
                 year = feat['extent']['temporal'][0].split('-')[0]
                 year_cat = Catalog.open(
                     os.path.join(self.root, ds_name, year, 'catalog.json'))
                 coll = Collection(feat)
                 year_cat.add_catalog(coll)
                 year_cat.save()
Exemplo n.º 3
0
 def toSTAC(self, collectionDir: str) -> Collection:
     dataset = Dataset(self.filePath())
     dims = {name: dim.size for name, dim in dataset.dimensions.items()}
     aid = os.path.dirname(self.base)
     stacFile = os.path.join(collectionDir, f"{aid}.json")
     print(f"Generating STAC spec for agg {aid} at '{stacFile}'")
     metadata = dict(
         id=aid,
         description="",
         location=self.base,
         extent="",
         properties=dict(nFiles=self.nFiles,
                         nTs=self.nTs,
                         paths=self.paths,
                         vars=self.vars,
                         dims=dims),
     )
     collection = Collection(data=metadata, filename=stacFile)
     return collection
Exemplo n.º 4
0
def build_stac_catalog(id_list=None, verbose=False):
    prefix = '/data/'
    tempdir = tempfile.mkdtemp(prefix=prefix)
    tempthumbs = tempfile.mkdtemp(prefix=prefix)

    print("Catalog tempdir: {}".format(tempdir))
    print("Thumbnails tempdir: {}".format(tempthumbs))

    NoaaStormCatalog.verbose = verbose

    print("Running web scraper.")
    with ScrapyRunner(NoaaStormCatalog) as runner:
        scraped_items = list(runner.execute(ids=id_list))
        collections = scraped_items.pop(0)
        item_count = scraped_items.pop(0)

        collections = create_collections(collections, scraped_items, id_list)

        # Build stac catalog locally
        root_catalog = Catalog.open(os.path.join(ROOT_URL, 'catalog.json'))
        root_catalog.save_as(filename=os.path.join(tempdir, 'catalog.json'))

        # NOAA Storm catalog
        os.mkdir(os.path.join(tempdir, 'NOAAStorm'))
        noaa_storm_cat = Catalog.open(
            os.path.join(ROOT_URL, 'NOAAStorm', 'catalog.json'))
        noaa_storm_cat.save_as(
            filename=os.path.join(tempdir, 'NOAAStorm', 'catalog.json'))

        print("Creating collections.")
        d = {}
        for collection in collections:
            coll = Collection(collection)
            noaa_storm_cat.add_catalog(coll)
            d.update({collection['id']: coll})

        # Setup directories for thumbnails
        thumbdir = os.path.join(tempthumbs, 'thumbnails')
        os.mkdir(thumbdir)
        for coll in d:
            coll_dir = os.path.join(thumbdir, d[coll].id)
            if not os.path.exists(coll_dir):
                os.mkdir(coll_dir)

        # Sort assets
        archive_assets = []
        for item in scraped_items:
            if 'archive' in item:
                if item['archive'].endswith('_RGB.tar'):
                    archive_assets.append(
                        RGBArchive(
                            item,
                            os.path.join(thumbdir, d[item['event_name']].id)))
                elif item['archive'].endswith(
                    ('GCS_NAD83.tar', 'GCS_NAD83.zip')):
                    archive_assets.append(
                        JpegTilesArchive(
                            item,
                            os.path.join(thumbdir, d[item['event_name']].id)))
                elif item['archive'].endswith(('Oblique.tar', 'Oblique.zip')):
                    archive_assets.append(
                        ObliqueArchive(
                            item,
                            os.path.join(thumbdir, d[item['event_name']].id)))
            else:
                print("Found a JPG with disconnected world file")

        # Download archives
        download_archives(archive_assets, prefix)

        print("Creating items and thumbnails.")
        # Add items
        for item in build_stac_items(archive_assets):
            d[item['collection']].add_item(Item(item),
                                           path='${date}',
                                           filename='${id}')

            # Update spatial extent of collection
            try:
                if item['bbox'][0] < d[
                        item['collection']].extent['spatial'][0]:
                    d[item['collection']].extent['spatial'][0] = item['bbox'][
                        0]
                if item['bbox'][1] < d[
                        item['collection']].extent['spatial'][1]:
                    d[item['collection']].extent['spatial'][1] = item['bbox'][
                        1]
                if item['bbox'][2] < d[
                        item['collection']].extent['spatial'][2]:
                    d[item['collection']].extent['spatial'][2] = item['bbox'][
                        2]
                if item['bbox'][3] < d[
                        item['collection']].extent['spatial'][3]:
                    d[item['collection']].extent['spatial'][3] = item['bbox'][
                        3]
            except:
                d[item['collection']].extent['spatial'] = item['bbox']

            # Update temporal extent of collection
            try:
                item_dt = load_datetime(item['properties']['datetime'])
                min_dt = load_datetime(
                    d[item['collection']].extent['temporal'][0])
                max_dt = load_datetime(
                    d[item['collection']].extent['temporal'][1])
                if item_dt < min_dt:
                    d[item['collection']].extent['temporal'][0] = item[
                        'properties']['datetime']
                if item_dt > max_dt:
                    d[item['collection']].extent['temporal'][1] = item[
                        'properites']['datetime']
            except:
                d[item['collection']].extent['temporal'] = [
                    item['properties']['datetime'],
                    item['properties']['datetime']
                ]

    # Upload catalog to S3
    print("Uploading catalog to S3.")
    subprocess.call(f"aws s3 sync {tempdir} s3://cognition-disaster-data/",
                    shell=True)

    print("Uploading thumbnails to S3.")
    # Upload thumbnails to S3
    subprocess.call(
        f"aws s3 sync {thumbdir} s3://cognition-disaster-data/thumbnails/",
        shell=True)

    cleanup(prefix)
Exemplo n.º 5
0
 def collection(cls, cid):
     """ Get a Collection record """
     url = op.join(config.API_URL, 'collections', cid)
     return Collection(cls.query(url=url))
Exemplo n.º 6
0
 def collection(self, cid, headers=None):
     """ Get a Collection record """
     url = urljoin(self.url, 'collections/%s' % cid)
     return Collection(self.query(url=url, headers=headers))