Exemplos de etl_dataset em Python, exemplos de helper.transform.etl_dataset em Python

Exemplo n.º 1

0

Exibir arquivo

def snapshot_etl():
    """Run ETL for snapshot of the RLIDGeo geodatabase."""
    name = "RLIDGeo_" + datestamp()
    xml_path = arcetl.workspace.create_geodatabase_xml_backup(
        geodatabase_path=database.RLIDGEO.path,
        output_path=os.path.join(tempfile.gettempdir(), name + ".xml"),
        include_data=False,
        include_metadata=True,
    )
    snapshot_path = arcetl.workspace.create_file_geodatabase(
        geodatabase_path=os.path.join(path.REGIONAL_DATA, "history",
                                      name + ".gdb"),
        xml_workspace_path=xml_path,
        include_xml_data=False,
    )
    os.remove(xml_path)
    # Push datasets to snapshot (ignore certain patterns).
    for name in arcetl.workspace.dataset_names(database.RLIDGEO.path):
        copy_name = name.split(".")[-1]
        if any(pattern.lower() in copy_name.lower()
               for pattern in IGNORE_PATTERNS_RLIDGEO_SNAPSHOT):
            # Need to delete ignored dataset in snapshot (created by the XML).
            arcetl.dataset.delete(os.path.join(snapshot_path, copy_name))
            continue

        transform.etl_dataset(
            source_path=os.path.join(database.RLIDGEO.path, name),
            output_path=os.path.join(snapshot_path, copy_name),
        )
    arcetl.workspace.compress(snapshot_path)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: exec_gimap_datasets.py Projeto: denkide/ColumbiaCarto

def weekly_datasets_etl():
    """Run ETL for map server datasets with weekly update cycle.

    This script should only be used for updating geodatabase datasets & other managed
    data stores. Purely file-based formats like shapefiles are best updated via
    `file_datasets_etl`, for reasons related to locking mechanisms.
    """
    conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        for kwargs in DATASET_KWARGS_WEEKLY:
            if kwargs.get("source_path"):
                transform.etl_dataset(**kwargs)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: exec_gimap_datasets.py Projeto: denkide/ColumbiaCarto

def rlidgeo_datasets_etl():
    """Run ETL for map server datasets in the RLIDGeo replica geodatabase."""
    conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        for name in arcetl.workspace.dataset_names(database.RLIDGEO.path):
            if any(
                pattern.lower() in name.lower()
                for pattern in IGNORE_PATTERNS_RLIDGEO_SNAPSHOT
            ):
                LOG.warning("%s matches ignore-pattern: Skipping.", name)
                continue

            transform.etl_dataset(
                source_path=os.path.join(database.RLIDGEO.path, name),
                output_path=os.path.join(DATA_PATH, "RLIDGeo.gdb", name.split(".")[-1]),
            )

Exemplo n.º 4

0

Exibir arquivo

Arquivo: exec_gimap_datasets.py Projeto: denkide/ColumbiaCarto

def file_datasets_etl():
    """Run ETL for map server file-based datasets.

    This script should only be used for updating shapefiles & other purely file-based
    datasets. Managed data store formats like geodatabases are best updated via
    `etl_gimap_dataset`, for reasons related to locking mechanisms.

    Essentially, the file-based formats will not append-load on shapefiles locked by a
    service. So we pre-load them to a staging copy, where a server-side batch script
    can clear the locks & wholly replace the files.
    """
    conn = credential.UNCPathCredential(STAGING_PATH, **credential.CPA_MAP_SERVER)
    with conn:
        for kwargs in DATASET_KWARGS_FILE:
            if kwargs.get("source_path"):
                transform.etl_dataset(**kwargs)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: oem_exec_deliveries.py Projeto: denkide/ColumbiaCarto

def oem_tillamook_delivery_etl():
    """Run ETL for OEM-Tillamook delivery."""
    name = "OEM_Tillamook"
    gdb_path = os.path.join(PATH["tillamook_deliverables"], name + ".gdb")
    for dataset_name, kwargs in OEM_TILLAMOOK_DATASET_KWARGS.items():
        kwargs["output_path"] = os.path.join(gdb_path, dataset_name)
        transform.etl_dataset(**kwargs)
    zip_name = "{}_{}.zip".format(name, datestamp())
    zip_path = os.path.join(PATH["tillamook_deliverables"], zip_name)
    path.archive_directory(
        directory_path=gdb_path,
        archive_path=zip_path,
        directory_as_base=True,
        archive_exclude_patterns=[".lock"],
    )
    send_links_email(urls=[zip_path], **OEM_TILLAMOOK_MESSAGE_KWARGS)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: oem_exec_tillamook_services.py Projeto: denkide/ColumbiaCarto

def service_datasets_monthly_etl():
    """Run ETL for GIMap datasets with weekly update cycle.

    This script should only be used for updating geodatabase datasets & other
    managed data stores. Purely file-based formats like shapefiles are best
    updated in another manner, for reasons related to locking mechanisms.
    """
    conn = credential.UNCPathCredential(path.RLID_MAPS_DATA_SHARE,
                                        **credential.CPA_MAP_SERVER)
    with conn:
        for gdb_relpath in sorted(KWARGS_MONTHLY_DATASETS):
            LOG.info("Update datasets in %s", gdb_relpath)
            gdb_path = os.path.join(DATA_PATH, gdb_relpath)
            for kwargs in KWARGS_MONTHLY_DATASETS[gdb_relpath]:
                kwargs['output_path'] = os.path.join(gdb_path,
                                                     kwargs['output_name'])
                transform.etl_dataset(**kwargs)

Exemplo n.º 7

0

Exibir arquivo

def lcso_cad_datasets_etl():
    """Run ETL for LSCO CAD delivery datasets."""
    for dataset_name, kwargs in DATASET_KWARGS.items():
        kwargs["output_path"] = os.path.join(DELIVERABLES_PATH, dataset_name + ".shp")
        transform.etl_dataset(**kwargs)
    zip_name = "LCSO_CAD_{}.zip".format(datestamp())
    zip_path = os.path.join(path.RLID_MAPS_WWW_SHARE, "Download", zip_name)
    conn = credential.UNCPathCredential(
        path.RLID_MAPS_WWW_SHARE, **credential.CPA_MAP_SERVER
    )
    with conn:
        path.archive_directory(
            directory_path=DELIVERABLES_PATH,
            archive_path=zip_path,
            directory_as_base=False,
            archive_exclude_patterns=[".lock", ".zip"],
        )
    zip_url = url.RLID_MAPS + "Download/" + zip_name
    send_links_email(urls=[zip_url], **MESSAGE_KWARGS)

Exemplo n.º 8

0

Exibir arquivo

Arquivo: oem_exec_deliveries.py Projeto: denkide/ColumbiaCarto

def tillamook_delivery_etl():
    """Run ETL for Tillamook delivery."""
    name = "Tillamook"
    gdb_path = os.path.join(PATH["tillamook_deliverables"], name + ".gdb")
    for dataset_name, kwargs in chain(TILLAMOOK_DATASET_KWARGS.items(),
                                      TILLAMOOK_GIS_DATASET_KWARGS.items()):
        kwargs["output_path"] = os.path.join(gdb_path, dataset_name)
        transform.etl_dataset(**kwargs)
    zip_name = "{}_{}.zip".format(name, datestamp())
    zip_path = os.path.join(path.RLID_MAPS_WWW_SHARE, "Download", zip_name)
    conn = credential.UNCPathCredential(path.RLID_MAPS_WWW_SHARE,
                                        **credential.CPA_MAP_SERVER)
    with conn:
        path.archive_directory(
            directory_path=gdb_path,
            archive_path=zip_path,
            directory_as_base=True,
            archive_exclude_patterns=[".lock"],
        )
    zip_url = url.RLID_MAPS + "Download/" + zip_name
    send_message_tillamook(zip_url,
                           metadata_where_sql="in_tillamook = 1",
                           **TILLAMOOK_MESSAGE_KWARGS)