def snapshot_etl(): """Run ETL for snapshot of the RLIDGeo geodatabase.""" name = "RLIDGeo_" + datestamp() xml_path = arcetl.workspace.create_geodatabase_xml_backup( geodatabase_path=database.RLIDGEO.path, output_path=os.path.join(tempfile.gettempdir(), name + ".xml"), include_data=False, include_metadata=True, ) snapshot_path = arcetl.workspace.create_file_geodatabase( geodatabase_path=os.path.join(path.REGIONAL_DATA, "history", name + ".gdb"), xml_workspace_path=xml_path, include_xml_data=False, ) os.remove(xml_path) # Push datasets to snapshot (ignore certain patterns). for name in arcetl.workspace.dataset_names(database.RLIDGEO.path): copy_name = name.split(".")[-1] if any(pattern.lower() in copy_name.lower() for pattern in IGNORE_PATTERNS_RLIDGEO_SNAPSHOT): # Need to delete ignored dataset in snapshot (created by the XML). arcetl.dataset.delete(os.path.join(snapshot_path, copy_name)) continue transform.etl_dataset( source_path=os.path.join(database.RLIDGEO.path, name), output_path=os.path.join(snapshot_path, copy_name), ) arcetl.workspace.compress(snapshot_path)
def weekly_datasets_etl(): """Run ETL for map server datasets with weekly update cycle. This script should only be used for updating geodatabase datasets & other managed data stores. Purely file-based formats like shapefiles are best updated via `file_datasets_etl`, for reasons related to locking mechanisms. """ conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER) with conn: for kwargs in DATASET_KWARGS_WEEKLY: if kwargs.get("source_path"): transform.etl_dataset(**kwargs)
def rlidgeo_datasets_etl(): """Run ETL for map server datasets in the RLIDGeo replica geodatabase.""" conn = credential.UNCPathCredential(DATA_PATH, **credential.CPA_MAP_SERVER) with conn: for name in arcetl.workspace.dataset_names(database.RLIDGEO.path): if any( pattern.lower() in name.lower() for pattern in IGNORE_PATTERNS_RLIDGEO_SNAPSHOT ): LOG.warning("%s matches ignore-pattern: Skipping.", name) continue transform.etl_dataset( source_path=os.path.join(database.RLIDGEO.path, name), output_path=os.path.join(DATA_PATH, "RLIDGeo.gdb", name.split(".")[-1]), )
def file_datasets_etl(): """Run ETL for map server file-based datasets. This script should only be used for updating shapefiles & other purely file-based datasets. Managed data store formats like geodatabases are best updated via `etl_gimap_dataset`, for reasons related to locking mechanisms. Essentially, the file-based formats will not append-load on shapefiles locked by a service. So we pre-load them to a staging copy, where a server-side batch script can clear the locks & wholly replace the files. """ conn = credential.UNCPathCredential(STAGING_PATH, **credential.CPA_MAP_SERVER) with conn: for kwargs in DATASET_KWARGS_FILE: if kwargs.get("source_path"): transform.etl_dataset(**kwargs)
def oem_tillamook_delivery_etl(): """Run ETL for OEM-Tillamook delivery.""" name = "OEM_Tillamook" gdb_path = os.path.join(PATH["tillamook_deliverables"], name + ".gdb") for dataset_name, kwargs in OEM_TILLAMOOK_DATASET_KWARGS.items(): kwargs["output_path"] = os.path.join(gdb_path, dataset_name) transform.etl_dataset(**kwargs) zip_name = "{}_{}.zip".format(name, datestamp()) zip_path = os.path.join(PATH["tillamook_deliverables"], zip_name) path.archive_directory( directory_path=gdb_path, archive_path=zip_path, directory_as_base=True, archive_exclude_patterns=[".lock"], ) send_links_email(urls=[zip_path], **OEM_TILLAMOOK_MESSAGE_KWARGS)
def service_datasets_monthly_etl(): """Run ETL for GIMap datasets with weekly update cycle. This script should only be used for updating geodatabase datasets & other managed data stores. Purely file-based formats like shapefiles are best updated in another manner, for reasons related to locking mechanisms. """ conn = credential.UNCPathCredential(path.RLID_MAPS_DATA_SHARE, **credential.CPA_MAP_SERVER) with conn: for gdb_relpath in sorted(KWARGS_MONTHLY_DATASETS): LOG.info("Update datasets in %s", gdb_relpath) gdb_path = os.path.join(DATA_PATH, gdb_relpath) for kwargs in KWARGS_MONTHLY_DATASETS[gdb_relpath]: kwargs['output_path'] = os.path.join(gdb_path, kwargs['output_name']) transform.etl_dataset(**kwargs)
def lcso_cad_datasets_etl(): """Run ETL for LSCO CAD delivery datasets.""" for dataset_name, kwargs in DATASET_KWARGS.items(): kwargs["output_path"] = os.path.join(DELIVERABLES_PATH, dataset_name + ".shp") transform.etl_dataset(**kwargs) zip_name = "LCSO_CAD_{}.zip".format(datestamp()) zip_path = os.path.join(path.RLID_MAPS_WWW_SHARE, "Download", zip_name) conn = credential.UNCPathCredential( path.RLID_MAPS_WWW_SHARE, **credential.CPA_MAP_SERVER ) with conn: path.archive_directory( directory_path=DELIVERABLES_PATH, archive_path=zip_path, directory_as_base=False, archive_exclude_patterns=[".lock", ".zip"], ) zip_url = url.RLID_MAPS + "Download/" + zip_name send_links_email(urls=[zip_url], **MESSAGE_KWARGS)
def tillamook_delivery_etl(): """Run ETL for Tillamook delivery.""" name = "Tillamook" gdb_path = os.path.join(PATH["tillamook_deliverables"], name + ".gdb") for dataset_name, kwargs in chain(TILLAMOOK_DATASET_KWARGS.items(), TILLAMOOK_GIS_DATASET_KWARGS.items()): kwargs["output_path"] = os.path.join(gdb_path, dataset_name) transform.etl_dataset(**kwargs) zip_name = "{}_{}.zip".format(name, datestamp()) zip_path = os.path.join(path.RLID_MAPS_WWW_SHARE, "Download", zip_name) conn = credential.UNCPathCredential(path.RLID_MAPS_WWW_SHARE, **credential.CPA_MAP_SERVER) with conn: path.archive_directory( directory_path=gdb_path, archive_path=zip_path, directory_as_base=True, archive_exclude_patterns=[".lock"], ) zip_url = url.RLID_MAPS + "Download/" + zip_name send_message_tillamook(zip_url, metadata_where_sql="in_tillamook = 1", **TILLAMOOK_MESSAGE_KWARGS)