def flow(self):

        with Flow(self.name,
                  storage=self.storage,
                  environment=self.environment) as _flow:
            # download to cache
            nc_sources = [
                download(x, cache_location=self.cache_location)
                for x in self.sources
            ]

            first = True
            write_tasks = []
            for source_group in chunked_iterable(nc_sources,
                                                 self.files_per_chunk):
                write_task = combine_and_write(source_group,
                                               self.target_location,
                                               self.concat_dim,
                                               first=first)
                write_tasks.append(write_task)
                first = False
            cm = consolidate_metadata(target_location)
            print(cm)

        return _flow
Пример #2
0
def main():

    url_list = cmr_search(
        short_name,
        version,
        time_start,
        time_end,
        bounding_box=bounding_box,
        polygon=polygon,
        filename_filter=filename_filter,
    )

    credentials = get_credentials(url_list[0])

    tasks = []
    for url_group in chunked_iterable(url_list, 100):
        tasks.append(
            cmr_download(
                url_group,
                "gs://carbonplan-scratch/glas-cache/",
                credentials=credentials,
            ))

    dask.compute(tasks, retries=3, scheduler="single-threaded")
Пример #3
0
@task
def consolidate_metadata(target):
    mapper = fsspec.get_mapper(target)
    zarr.consolidate_metadata(mapper)


keys = pd.date_range("1981-09-01", "1981-09-10", freq="D")
target_path = "gs://pangeo-scratch/rabernat/pangeo_smithy/oisst-avhrr-v02r01-target/"
concat_dim = "time"
files_per_chunk = 5

with Flow("Pangeo-Forge") as flow:
    sources = [download(k) for k in keys]
    first = True
    write_tasks = []
    for source_group in chunked_iterable(sources, files_per_chunk):
        write_task = combine_and_write(source_group, target_path, concat_dim, first=first)
        write_tasks.append(write_task)
        first = False
    cm = consolidate_metadata(target_path)

# create dependencies in imperative mode
for n in range(1, len(write_tasks)):
    write_tasks[n].set_upstream(write_tasks[n - 1], flow=flow)
cm.set_upstream(write_tasks[-1], flow=flow)


# DO IT! #
flow.run()
Пример #4
0
def chunk(sources, size):
    # TODO: move to pangeo_forge.
    return list(chunked_iterable(sources, size))
Пример #5
0
def test_chunked_iterable(iterable, size, expected):
    actual = list(chunked_iterable(iterable, size))
    assert actual == expected