Exemplo n.º 1
0
def cli(thredds_catalogue, skips, select, workers, outfile):
    """ Download Metadata from THREDDS server to tarball

    Example:

       \b
       Download files in directory that match `*yaml` and store them as a tar
        > thredds-to-tar -c "http://dapds00.nci.org.au/thredds/catalog/if87/2018-11-29/"
        -t ".*ARD-METADATA.yaml" -s '.*NBAR.*' -s '.*SUPPLEMENTARY.*'
         -s '.*NBART.*' -s '.*/QA/.*' -w 8 --outfile 2018-11-29.tar.gz

    """
    print("Searching {thredds_catalogue} for matching files".format(
        thredds_catalogue=thredds_catalogue))
    urls = thredds_find_glob(thredds_catalogue, skips, [select], workers)

    print("Found {0} metadata urls".format(str(len(urls))))

    yamls = download_yamls(urls, workers)

    # jam it all in a tar
    tar_opts = dict(name=outfile,
                    mode='w' + tar_mode(gzip=True, xz=True, is_pipe=False))
    with tarfile.open(**tar_opts) as tar:
        for yaml in yamls:
            add_txt_file(tar=tar, content=yaml[0], fname=yaml[1])

    print("Done!")
Exemplo n.º 2
0
def cli(
    skip_lineage: bool,
    fail_on_missing_lineage: bool,
    verify_lineage: bool,
    uri: str,
    product: str,
):
    skips = [".*NBAR.*", ".*SUPPLEMENTARY.*", ".*NBART.*", ".*/QA/.*"]
    select = [".*ARD-METADATA.yaml"]
    candidate_products = product.split()
    print(f"Crawling {uri} on Thredds")
    print(f"Matching to {candidate_products}")
    yaml_urls = thredds_find_glob(uri, skips, select)
    print(f"Found {len(yaml_urls)} datasets")

    yaml_contents = download_yamls(yaml_urls)

    # Consume generator and fetch YAML's
    dc = Datacube()
    added, failed = dump_list_to_odc(
        yaml_contents,
        dc,
        candidate_products,
        skip_lineage=skip_lineage,
        fail_on_missing_lineage=fail_on_missing_lineage,
        verify_lineage=verify_lineage,
    )

    print(f"Added {added} Datasets, Failed {failed} Datasets")
Exemplo n.º 3
0
def test_thredds_crawl():
    """Crawl a sample Thredds URL, this will fail if NCI loses this data
    or Thredds is down
    """
    thredds_catalog = "http://dapds00.nci.org.au/thredds/catalog/if87/2018-11-29/"
    select = [".*ARD-METADATA.yaml"]
    skips = [".*NBAR.*", ".*SUPPLEMENTARY.*", ".*NBART.*", ".*/QA/.*"]
    urls = thredds_find_glob(thredds_catalog, skips, select)
    assert urls
    assert len(urls) == 490