def facility_etl(): """Run ETL for facilities. Currently only undertaken for other ETL purposes--not publication. """ with arcetl.ArcETL("Facilities") as etl: etl.extract(dataset.FACILITY.path("maint")) etl.transform( arcetl.dataset.rename_field, field_name="geofeat_id", new_field_name="address_intid", ) # Clean maintenance values. transform.clear_nonpositive(etl, field_names=["address_intid"]) transform.clean_whitespace( etl, field_names=["label", "label_full", "type", "type_full"]) transform.force_lowercase(etl, field_names=["type"]) transform.force_uppercase(etl, field_names=["label"]) transform.add_missing_fields(etl, dataset.FACILITY, tags=["pub"]) # Assign geometry attributes. coordinate_system_xy_keys = { 2914: { "x": "x_coordinate", "y": "y_coordinate" }, 4326: { "x": "longitude", "y": "latitude" }, } for spatial_reference_id, xy_key in coordinate_system_xy_keys.items(): for axis, key in xy_key.items(): etl.transform( arcetl.attributes.update_by_geometry, field_name=key, spatial_reference_item=spatial_reference_id, geometry_properties=["centroid", axis], ) etl.transform( arcetl.attributes.update_by_mapping, field_name="address_uuid", mapping=address_intid_to_uuid_map, key_field_names=["address_intid"], ) etl.load(dataset.FACILITY.path("pub"))
def plat_etl(): """Run ETL for plats.""" with arcetl.ArcETL("Plats") as etl: etl.extract(dataset.PLAT.path("maint")) transform.clean_whitespace(etl, field_names=["platname", "docnumber"]) transform.force_uppercase(etl, field_names=["platname"]) transform.clear_nonpositive(etl, field_names=["agencydocn"]) pub_field_names = { field["name"] for field in dataset.PLAT.fields if "pub" in field["tags"] } etl.transform( arcetl.features.delete, dataset_where_sql=" and ".join("{} is null".format(name) for name in pub_field_names), ) etl.transform( arcetl.features.dissolve, dissolve_field_names=pub_field_names, tolerance=TOLERANCE["xy"], ) etl.load(dataset.PLAT.path("pub"))
def zoning_county_etl(): """Run ETL for county zoning.""" overlay_field_names = [ name for name in dataset.ZONING_COUNTY.field_names if name.lower().startswith("over") ] with arcetl.ArcETL("County Zoning") as etl: etl.extract(dataset.ZONING_COUNTY.path("maint")) etl.transform( arcetl.features.insert_from_path, insert_dataset_path=dataset.ZONING_COUNTY.path("insert"), ) transform.add_missing_fields(etl, dataset.ZONING_COUNTY, tags=["pub"]) for new_name, old_name in [("zonecode", "ZONE_"), ("zonename", "ZONE_NAME")]: etl.transform( arcetl.attributes.update_by_function, field_name=new_name, function=(lambda x: x), field_as_first_arg=False, arg_field_names=[old_name], ) # UGB zoning has slightly different names. We want to standardize on the main # zoning dataset names. etl.transform( arcetl.attributes.update_by_mapping, field_name="zonename", mapping=county_zone_name_map, key_field_names="zonecode", ) # Clean maintenance values. transform.clean_whitespace(etl, field_names=["zonecode", "zonename"]) etl.transform(arcetl.features.delete, dataset_where_sql="zonecode is null") # Remove county zoning where city ones exist. etl.transform(arcetl.features.erase, erase_dataset_path=dataset.ZONING_CITY.path("pub")) # Assign zoning overlays. identity_kwargs = [ { "field_name": "coastalzonecode", "identity_field_name": "TYPE", "identity_dataset_path": os.path.join(LANE_ZONING_STAGING_PATH, "coastal_zones.shp"), }, { "field_name": "overas", "identity_field_name": "AIRPORT", "identity_dataset_path": os.path.join(LANE_ZONING_STAGING_PATH, "aszone.shp"), "replacement_value": "Y", }, { "field_name": "overcas", "identity_field_name": "AIRPORT", "identity_dataset_path": os.path.join(LANE_ZONING_STAGING_PATH, "caszone.shp"), "replacement_value": "Y", }, { "field_name": "overdms", "identity_field_name": "TYPE", "identity_dataset_path": os.path.join(LANE_ZONING_STAGING_PATH, "dredge_sites.shp"), "replacement_value": "Y", }, { "field_name": "overbd", "identity_field_name": "Shape_Leng", "identity_dataset_path": os.path.join(LANE_ZONING_STAGING_PATH, "beach_dune.shp"), "replacement_value": "Y", }, { "field_name": "overu", "identity_field_name": "urban", "identity_dataset_path": os.path.join(LANE_ZONING_STAGING_PATH, "interim_urban.shp"), "replacement_value": "Y", }, ] for kwargs in identity_kwargs: etl.transform(arcetl.geoset.identity, **kwargs) # Clean identity values. transform.clean_whitespace(etl, field_names=["coastalzonecode"]) etl.transform(arcetl.attributes.update_by_value, field_name="zonejuris", value="LC") etl.transform( arcetl.features.dissolve, dissolve_field_names=[ field["name"] for field in dataset.ZONING_COUNTY.fields if "pub" in field["tags"] ], tolerance=TOLERANCE["xy"], ) # Assign the overlay flags dependent on coastal zone code. for code in ["CE", "DE", "MD", "NE", "NRC", "PW", "RD", "SN"]: etl.transform( arcetl.attributes.update_by_function, field_name="over{}".format(code.lower()), function=(lambda czc, c=code: "Y" if czc == c else "N"), field_as_first_arg=False, arg_field_names=["coastalzonecode"], ) transform.force_uppercase(etl, overlay_field_names) transform.force_yn(etl, overlay_field_names, default="N") etl.transform( arcetl.attributes.update_by_function, field_name="alloverlays", function=concatenate_zoning_overlays, field_as_first_arg=False, kwarg_field_names=overlay_field_names, ) etl.load(dataset.ZONING_COUNTY.path("pub"))
def zoning_city_etl(): """Run ETL for city zoning.""" overlay_field_names = [ name for name in dataset.ZONING_CITY.field_names if name.lower().startswith("over") ] with arcetl.ArcETL("City Zoning") as etl: etl.init_schema(dataset.ZONING_CITY.path("pub")) for _path in dataset.ZONING_CITY.path("inserts"): etl.transform(arcetl.features.insert_from_path, insert_dataset_path=_path) # Clean maintenance values. transform.force_uppercase(etl, overlay_field_names) transform.force_yn(etl, overlay_field_names, default="N") etl.transform(arcetl.features.delete, dataset_where_sql="zonecode is null") etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.ZONING_CITY.field_names, tolerance=TOLERANCE["xy"], ) juris_domain = { "COB": "CoburgZoning", "COT": "CottageGroveZoning", "CRE": "CreswellZoning", "DUN": "DunesCityZoning", "EUG": "EugeneZoning", "FLO": "FlorenceZoning", "JUN": "JunctionCityZoning", "LOW": "LowellZoning", "OAK": "OakridgeZoning", "SPR": "SpringfieldZoning", "VEN": "VenetaZoning", "WES": "WestfirZoning", } for juris_code, domain_name in juris_domain.items(): etl.transform( arcetl.attributes.update_by_domain_code, field_name="zonename", code_field_name="zonecode", domain_name=domain_name, domain_workspace_path=database.LCOGGEO.path, dataset_where_sql="zonejuris = '{}'".format(juris_code), ) etl.transform( arcetl.attributes.update_by_domain_code, field_name="subareaname", code_field_name="subarea", domain_name="EugeneZoningSubarea", domain_workspace_path=database.LCOGGEO.path, ) # Clean domain-derived values. transform.clean_whitespace(etl, field_names=["zonename", "subareaname"]) etl.transform( arcetl.attributes.update_by_function, field_name="alloverlays", function=concatenate_zoning_overlays, field_as_first_arg=False, kwarg_field_names=overlay_field_names, ) etl.load(dataset.ZONING_CITY.path("pub"))
def address_point_etl(): """Run ETL for address points.""" with arcetl.ArcETL("Address Points") as etl: etl.extract(dataset.TILLAMOOK_ADDRESS_POINT.path("maint")) # Remove addresses flagged in validationas "not OK to publish". etl.transform( arcetl.dataset.join_field, join_dataset_path=dataset.TILLAMOOK_ADDRESS_POINT_ISSUES.path(), join_field_name="ok_to_publish", on_field_name="address_id", on_join_field_name="address_id", ) etl.transform(arcetl.features.delete, dataset_where_sql="ok_to_publish = 0") etl.transform(arcetl.dataset.delete_field, field_name="ok_to_publish") # Clean maintenance values. transform.clear_nonpositive(etl, field_names=["stnum"]) transform.clean_whitespace( etl, field_names=[ "stnumsuf", "predir", "name", "type", "sufdir", "unit_type", "unit", "postcomm", "zip", "county", ], ) transform.force_uppercase( etl, field_names=[ "stnumsuf", "predir", "name", "type", "unit_type", "unit", "postcomm", "county", "valid", "archived", "confidence", ], ) transform.clear_non_numeric_text(etl, field_names=["zip"]) transform.force_yn(etl, field_names=["archived"], default="N") transform.force_yn(etl, field_names=["valid"], default="Y") transform.add_missing_fields(etl, dataset.TILLAMOOK_ADDRESS_POINT, tags=["pub"]) # Assign geometry attributes. for x_name, y_name, srid in [("lon", "lat", 4326)]: for name, axis in [(x_name, "x"), (y_name, "y")]: etl.transform( arcetl.attributes.update_by_geometry, field_name=name, spatial_reference_item=srid, geometry_properties=["centroid", axis], ) # Assign joined values. etl.transform( arcetl.attributes.update_by_joined_value, field_name="join_id", join_dataset_path=dataset.TILLAMOOK_ALTERNATE_STREET_NAME.path(), join_field_name="join_id", on_field_pairs=[ ("predir", "prime_predir"), ("name", "prime_name"), ("type", "prime_type"), ("sufdir", "prime_sufdir"), ], ) # Assign overlays. overlay_kwargs = [ { "field_name": "city_limit", "overlay_field_name": "city", "overlay_dataset_path": dataset.TILLAMOOK_CITY_LIMITS.path(), }, { "field_name": "ems", "overlay_field_name": "district", "overlay_dataset_path": dataset.TILLAMOOK_EMS.path(), }, { "field_name": "esn", "overlay_field_name": "esn", "overlay_dataset_path": dataset.TILLAMOOK_EMERGENCY_SERVICE_ZONE.path(), }, { "field_name": "fire", "overlay_field_name": "district", "overlay_dataset_path": dataset.TILLAMOOK_FIRE.path(), }, { "field_name": "police", "overlay_field_name": "district", "overlay_dataset_path": dataset.TILLAMOOK_POLICE.path(), }, ] for kwargs in overlay_kwargs: etl.transform(arcetl.attributes.update_by_overlay, overlay_central_coincident=True, **kwargs) # Build values: Constants. value_kwargs = [{"field_name": "state", "value": "OR"}] transform.update_attributes_by_values(etl, value_kwargs) # Build values: Concatenations. etl.transform( arcetl.attributes.update_by_function, field_name="address", function=concatenate_arguments, field_as_first_arg=False, arg_field_names=[ "stnum", "stnumsuf", "predir", "name", "type", "sufdir", "unit_type", "unit", ], ) etl.load(dataset.TILLAMOOK_ADDRESS_POINT.path("pub"))
def site_address_etl(): """Run ETL for site addresses.""" with arcetl.ArcETL("Site Addresses") as etl: etl.extract(dataset.SITE_ADDRESS.path("maint")) # Clean maintenance values. transform.clear_nonpositive(etl, field_names=["house_nbr"]) transform.clean_whitespace( etl, field_names=[ "house_suffix_code", "pre_direction_code", "street_name", "street_type_code", "unit_type_code", "unit_id", "city_name", "landuse", "maptaxlot", "account", ], ) transform.force_uppercase( etl, field_names=[ "house_suffix_code", "pre_direction_code", "street_name", "street_type_code", "unit_type_code", "unit_id", "maptaxlot", "valid", "archived", ], ) transform.clear_non_numeric_text(etl, field_names=["account"]) etl.transform( arcetl.attributes.update_by_function, field_name="landuse", function=(lambda x: x if is_numeric(x) else "0"), ) transform.force_yn(etl, field_names=["archived"], default="N") transform.force_yn(etl, field_names=["valid"], default="Y") transform.add_missing_fields(etl, dataset.SITE_ADDRESS, tags=["pub"]) # Assign geometry attributes. coordinate_system_xy_keys = { 2914: { "x": "x_coordinate", "y": "y_coordinate" }, 4326: { "x": "longitude", "y": "latitude" }, } for spatial_reference_id, xy_key in coordinate_system_xy_keys.items(): for axis, key in xy_key.items(): etl.transform( arcetl.attributes.update_by_geometry, field_name=key, spatial_reference_item=spatial_reference_id, geometry_properties=["centroid", axis], ) # Assign overlays. overlay_kwargs = [ # City attributes. { "field_name": "geocity", "overlay_field_name": "inccityabbr", "overlay_dataset_path": dataset.INCORPORATED_CITY_LIMITS.path(), }, { "field_name": "annexhist", "overlay_field_name": "annexnum", "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"), }, # Have to do overlay rather than join because some lack codes. { "field_name": "yearanx", "overlay_field_name": "annexyear", "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"), }, { "field_name": "ugb", "overlay_field_name": "ugbcity", "overlay_dataset_path": dataset.UGB.path("pub"), }, # Planning & zoning attributes. { "field_name": "greenwy", "overlay_field_name": "greenway", "overlay_dataset_path": dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"), }, { "field_name": "nodaldev", "overlay_field_name": "nodearea", "overlay_dataset_path": dataset.NODAL_DEVELOPMENT_AREA.path("pub"), }, { "field_name": "plandes_id", "overlay_field_name": "plandes_id", "overlay_dataset_path": dataset.PLAN_DESIGNATION.path("pub"), }, { "field_name": "sprsvcbndy", "overlay_field_name": "is_inside", "overlay_dataset_path": dataset.SPRINGFIELD_HANSEN_EXTENT.path(), }, # Public safety attributes. { "field_name": "ambulance_district", "overlay_field_name": "asacode", "overlay_dataset_path": dataset.AMBULANCE_SERVICE_AREA.path("pub"), }, { "field_name": "firedist", "overlay_field_name": "fireprotprov", "overlay_dataset_path": dataset.FIRE_PROTECTION_AREA.path("pub"), }, { "field_name": "police_beat", "overlay_field_name": "CAD", "overlay_dataset_path": os.path.join( path.LCOG_GIS_PROJECTS, "Public_Safety\\PSAPS\\CLPSAP\\SunGard_CAD\\Maintained_Layers", "Maintained_Layers.gdb\\Fire_Law_Tow\\law_beat", ), }, { "field_name": "psap_code", "overlay_field_name": "psap_code", "overlay_dataset_path": dataset.PSAP_AREA.path("pub"), }, # Election attributes. { "field_name": "electionpr", "overlay_field_name": "precntnum", "overlay_dataset_path": dataset.ELECTION_PRECINCT.path("pub"), }, { "field_name": "ccward", "overlay_field_name": "ward", "overlay_dataset_path": dataset.CITY_WARD.path(), }, { "field_name": "clpud_subdivision", "overlay_field_name": "SUBDIVISIO", "overlay_dataset_path": os.path.join( path.LCOG_GIS_PROJECTS, "UtilityDistricts\\CentralLincolnPUD\\Redistricting2012", "CLPUD_Subdivisions.shp", ), }, { "field_name": "cocommdist", "overlay_field_name": "commrdist", "overlay_dataset_path": (dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub")), }, { "field_name": "epud", "overlay_field_name": "boardid", "overlay_dataset_path": dataset.EPUD_SUBDISTRICT.path("pub"), }, { "field_name": "hwpud_subdivision", "overlay_field_name": "BoardZone", "overlay_dataset_path": os.path.join( path.LCOG_GIS_PROJECTS, "UtilityDistricts\\HecetaWaterPUD\\NewBoardSubzones", "HecetaData.gdb", "ScenarioB", ), }, { "field_name": "lcczone", "overlay_field_name": "lccbrdzone", "overlay_dataset_path": dataset.LCC_BOARD_ZONE.path("pub"), }, { "field_name": "senatedist", "overlay_field_name": "sendist", "overlay_dataset_path": dataset.STATE_SENATOR_DISTRICT.path("pub"), }, { "field_name": "strepdist", "overlay_field_name": "repdist", "overlay_dataset_path": (dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub")), }, { "field_name": "swcd", "overlay_field_name": "swcdist", "overlay_dataset_path": (dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub")), }, { "field_name": "swcdzone", "overlay_field_name": "swczone", "overlay_dataset_path": (dataset.SOIL_WATER_CONSERVATION_DISTRICT.path("pub")), }, # Education attributes. { "field_name": "schooldist", "overlay_field_name": "district", "overlay_dataset_path": dataset.SCHOOL_DISTRICT.path("pub"), }, { "field_name": "elem", "overlay_field_name": "attend", "overlay_dataset_path": dataset.ELEMENTARY_SCHOOL_AREA.path("pub"), }, { "field_name": "middle", "overlay_field_name": "attend", "overlay_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"), }, { "field_name": "high", "overlay_field_name": "attend", "overlay_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"), }, # Transportation attributes. { "field_name": "ltddist", "overlay_field_name": "LTD", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "transport\\ltd\\2012 LTD Boundary.shp"), }, { "field_name": "ltdridesrc", "overlay_field_name": "LTD", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "transport\\ltd\\2015 RideSource Boundary.shp"), }, { "field_name": "cats", "overlay_field_name": "CATSBNDY", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "transport\\eug\\catsbndy.shp"), }, { "field_name": "trans_analysis_zone", "overlay_field_name": "TAZ_NUM", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "transport\\MTAZ16.shp"), }, # Natural attributes. { "field_name": "firmnumber", "overlay_field_name": "firm_pan", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\flood\\Flood.gdb\\FIRMPanel"), }, { "field_name": "soilkey", "overlay_field_name": "mukey", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\soils\\Soils.gdb\\Soil"), }, { "field_name": "wetland", "overlay_field_name": "WET_TYPE", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\eug\\Wetland\\wetlands.shp"), }, # Census attributes. { "field_name": "ctract", "overlay_field_name": "TRACT", "overlay_dataset_path": os.path.join( path.REGIONAL_DATA, "federal\\census\\lane\\2010", "lc_census2010.gdb\\lc_tracts2010", ), }, { "field_name": "blockgr", "overlay_field_name": "BlockGroup", "overlay_dataset_path": os.path.join( path.REGIONAL_DATA, "federal\\census\\lane\\2010", "lc_census2010.gdb\\lc_blockgroups2010", ), }, # Other district attributes. { "field_name": "neighbor", "overlay_field_name": "NEIBORHD", "overlay_dataset_path": os.path.join( path.REGIONAL_DATA, "boundary\\districts\\eug", "Boundary.gdb\\EugNeighborhoods", ), }, ] for kwargs in overlay_kwargs: etl.transform(arcetl.attributes.update_by_overlay, overlay_central_coincident=True, **kwargs) # Override overlays for special cases. for override in OVERRIDE_ATTRS: for kwargs in OVERRIDE_ATTRS[override].get("overlay_kwargs", []): etl.transform(arcetl.attributes.update_by_value, dataset_where_sql=OVERRIDE_ATTRS[override].get( "where_sql"), **kwargs) # Clean overlay values. transform.clean_whitespace(etl, field_names=[ "police_beat", "wetland", "ctract", "blockgr", "neighbor" ]) transform.force_uppercase( etl, field_names=["cats", "ltddist", "ltdridesrc"]) # Set default overlay values where missing. transform.force_yn( etl, field_names=[ "greenwy", "sprsvcbndy", "cats", "ltddist", "ltdridesrc" ], default="N", ) # Remove invalid overlay values. transform.clear_nonpositive(etl, field_names=["ctract", "blockgr"]) etl.transform( arcetl.attributes.update_by_function, field_name="neighbor", function=(lambda x: x if x and int(x) != 99 else None), ) # Assign joinable field values after overlays. join_kwargs = [ # Core attributes. { "field_name": "pre_direction", "join_field_name": "description", "join_dataset_path": dataset.STREET_DIRECTION.path(), "on_field_pairs": [("pre_direction_code", "code")], }, { "field_name": "street_type", "join_field_name": "description", "join_dataset_path": dataset.STREET_TYPE.path(), "on_field_pairs": [("street_type_code", "code")], }, { "field_name": "unit_type", "join_field_name": "description", "join_dataset_path": dataset.UNIT_TYPE.path(), "on_field_pairs": [("unit_type_code", "code")], }, { "field_name": "city_name_abbr", "join_field_name": "CityNameAbbr", "join_dataset_path": dataset.CITY.path(), "on_field_pairs": [("city_name", "CityName")], }, # Extended attributes. { "field_name": "five_digit_zip_code", "join_field_name": "zip_code", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, # Any addresses not assigned zip from USPS gets an overlay zip. { "field_name": "five_digit_zip_code", "dataset_where_sql": "five_digit_zip_code is null", "join_field_name": "zip_code_overlay", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "four_digit_zip_code", "join_field_name": "plus_four_code", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "usps_delivery_point_code", "join_field_name": "delivery_point_code", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "postal_carrier_route", "join_field_name": "carrier_route", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "usps_is_cmra", "join_field_name": "is_cmra", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "usps_is_vacant", "join_field_name": "is_vacant", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "usps_has_mail_service", "join_field_name": "has_mail_service", "join_dataset_path": dataset.ADDRESS_POSTAL_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, { "field_name": "landuse_desc", "join_field_name": "ludesc", "join_dataset_path": dataset.LAND_USE_CODES_DETAILED.path("pub"), "on_field_pairs": [("landuse", "landusec")], }, { "field_name": "usecode", "join_field_name": "usecode", "join_dataset_path": dataset.LAND_USE_CODES_DETAILED.path("pub"), "on_field_pairs": [("landuse", "landusec")], }, { "field_name": "usedesc", "join_field_name": "ucname", "join_dataset_path": dataset.LAND_USE_CODES_USE_CODES.path("pub"), "on_field_pairs": [("usecode", "usecode")], }, # A&T attributes. { "field_name": "tca", "join_field_name": "tax_code_overlay", "join_dataset_path": dataset.ADDRESS_ASSESS_TAX_INFO.path(), "on_field_pairs": [("geofeat_id", "geofeat_id")], }, # City attributes. { "field_name": "geocity_name", "join_field_name": "inccityname", "join_dataset_path": dataset.INCORPORATED_CITY_LIMITS.path(), "on_field_pairs": [("geocity", "inccityabbr")], }, { "field_name": "ugb_city_name", "join_field_name": "ugbcityname", "join_dataset_path": dataset.UGB.path("pub"), "on_field_pairs": [("ugb", "ugbcity")], }, # Planning & zoning attributes. { "field_name": "nodaldev_name", "join_field_name": "nodename", "join_dataset_path": dataset.NODAL_DEVELOPMENT_AREA.path("pub"), "on_field_pairs": [("nodaldev", "nodearea")], }, { "field_name": "plandesjuris", "join_field_name": "planjuris", "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"), "on_field_pairs": [("plandes_id", "plandes_id")], }, { "field_name": "plandes", "join_field_name": "plandes", "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"), "on_field_pairs": [("plandes_id", "plandes_id")], }, { "field_name": "plandesdesc", "join_field_name": "plandesnam", "join_dataset_path": dataset.PLAN_DESIGNATION.path("pub"), "on_field_pairs": [("plandes_id", "plandes_id")], }, # Public safety attributes. { "field_name": "ambulance_service_area", "join_field_name": "asa", "join_dataset_path": dataset.AMBULANCE_SERVICE_AREA.path("pub"), "on_field_pairs": [("ambulance_district", "asacode")], }, { "field_name": "ambulance_service_provider", "join_field_name": "provider", "join_dataset_path": dataset.AMBULANCE_SERVICE_AREA.path("pub"), "on_field_pairs": [("ambulance_district", "asacode")], }, { "field_name": "fire_protection_provider", "join_field_name": "fpprovname", "join_dataset_path": dataset.FIRE_PROTECTION_AREA.path("pub"), "on_field_pairs": [("firedist", "fireprotprov")], }, { "field_name": "psap_name", "join_field_name": "psap_name", "join_dataset_path": dataset.PSAP_AREA.path("pub"), "on_field_pairs": [("psap_code", "psap_code")], }, { "field_name": "emergency_service_number", "join_field_name": "emergency_service_number", "join_dataset_path": dataset.EMERGENCY_SERVICE_NUMBER.path(), "on_field_pairs": [ # City used as proxy for police. ("geocity", "city_limits"), ("ambulance_district", "asa_code"), ("firedist", "fire_district"), ("psap_code", "psap_code") ], }, { "field_name": "emergency_service_number", "join_field_name": "emergency_service_number", "join_dataset_path": dataset.EMERGENCY_SERVICE_NUMBER.path(), "on_field_pairs": [ # City used as proxy for police. ("geocity", "city_limits"), ("ambulance_district", "asa_code"), ("firedist", "fire_district"), ], "dataset_where_sql": "emergency_service_number is null", }, # Election attributes. { "field_name": "city_councilor", "join_field_name": "councilor", "join_dataset_path": dataset.CITY_WARD.path(), "on_field_pairs": [("ccward", "ward")], }, { "field_name": "cocommdist_name", "join_field_name": "cmdistname", "join_dataset_path": dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"), "on_field_pairs": [("cocommdist", "commrdist")], }, { "field_name": "county_commissioner", "join_field_name": "commrname", "join_dataset_path": dataset.COUNTY_COMMISSIONER_DISTRICT.path("pub"), "on_field_pairs": [("cocommdist", "commrdist")], }, { "field_name": "eweb_commissioner_name", "join_field_name": "eweb_commissioner_name", "join_dataset_path": dataset.EWEB_COMMISSIONER.path("pub"), "on_field_pairs": [("ccward", "city_council_ward")], }, { "field_name": "state_representative", "join_field_name": "repname", "join_dataset_path": dataset.STATE_REPRESENTATIVE_DISTRICT.path("pub"), "on_field_pairs": [("strepdist", "repdist")], }, { "field_name": "state_senator", "join_field_name": "senname", "join_dataset_path": dataset.STATE_SENATOR_DISTRICT.path("pub"), "on_field_pairs": [("senatedist", "sendist")], }, # Education attributes. { "field_name": "schooldist_name", "join_field_name": "names", "join_dataset_path": dataset.SCHOOL_DISTRICT.path("pub"), "on_field_pairs": [("schooldist", "district")], }, { "field_name": "elem_name", "join_field_name": "elem_school", "join_dataset_path": dataset.ELEMENTARY_SCHOOL_AREA.path("pub"), "on_field_pairs": [("elem", "attend")], }, { "field_name": "middle_name", "join_field_name": "middle_school", "join_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"), "on_field_pairs": [("middle", "attend")], }, { "field_name": "high_name", "join_field_name": "high_school", "join_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"), "on_field_pairs": [("high", "attend")], }, # Natural attributes. { "field_name": "firmprinted", "join_field_name": "panel_printed", "join_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\flood\\Flood.gdb\\FIRMPanel"), "on_field_pairs": [("firmnumber", "firm_pan")], }, { "field_name": "firm_community_id", "join_field_name": "com_nfo_id", "join_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\flood\\Flood.gdb\\CommunityInfo"), "on_field_pairs": [("geocity", "community_code")], }, { "field_name": "firm_community_post_firm_date", "join_field_name": "in_frm_dat", "join_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\flood\\Flood.gdb\\CommunityInfo"), "on_field_pairs": [("geocity", "community_code")], }, { "field_name": "soiltype", "join_field_name": "musym", "join_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\soils\\Soils.gdb\\MUAggAtt"), "on_field_pairs": [("soilkey", "mukey")], }, # Other district attributes. { "field_name": "neighborhood_name", "join_field_name": "NAME", "join_dataset_path": os.path.join( path.REGIONAL_DATA, "boundary\\districts\\eug\\Boundary.gdb\\EugNeighborhoods", ), "on_field_pairs": [("neighbor", "NEIBORHD")], }, ] for kwargs in join_kwargs: etl.transform(arcetl.attributes.update_by_joined_value, **kwargs) # Clean join values. transform.clean_whitespace(etl, field_names=["neighborhood_name"]) # Remove Metro Plan designations, per City of Eugene request. transform.clear_all_values( etl, field_names=["plandes", "plandesdesc"], dataset_where_sql="plandesjuris = 'MTP'", ) # Remove +4 ZIP where initial ZIP is missing. transform.clear_all_values( etl, field_names=["four_digit_zip_code"], dataset_where_sql="five_digit_zip_code is null", ) # Assign constants. constant_kwargs = [ { "field_name": "state_code", "value": "OR" }, { "field_name": "state_name", "value": "Oregon" }, { "field_name": "county_name", "value": "Lane" }, ] for kwargs in constant_kwargs: etl.transform(arcetl.attributes.update_by_value, **kwargs) # Override constants for special cases. for override in OVERRIDE_ATTRS: for kwargs in OVERRIDE_ATTRS[override].get("constant_kwargs", []): etl.transform(arcetl.attributes.update_by_value, dataset_where_sql=OVERRIDE_ATTRS[override].get( "where_sql"), **kwargs) # Build values from functions. function_kwargs = [ { "field_name": "street_name_full", "function": concatenate_arguments, "arg_field_names": [ "pre_direction_code", "street_name", "street_type_code", ], }, { "field_name": "city_state_zip", "function": city_state_zip, "kwarg_field_names": ["city_name", "state_code", "five_digit_zip_code"], }, { "field_name": "concat_address_no_unit", "function": concatenate_arguments, "arg_field_names": [ "house_nbr", "house_suffix_code", "street_name_full", ], }, { "field_name": "concat_address", "function": concatenate_arguments, "arg_field_names": [ "concat_address_no_unit", "unit_type_code", "unit_id", ], }, { "field_name": "concat_address_no_direction", "function": concatenate_arguments, "arg_field_names": [ "house_nbr", "house_suffix_code", "street_name", "street_type_code", "unit_type_code", "unit_id", ], }, { "field_name": "concat_address_full", "function": concat_address_full, "kwarg_field_names": [ "concat_address", "city_name", "state_code", "five_digit_zip_code", "four_digit_zip_code", ], }, { "field_name": "mapnumber", "function": (lambda x: x[:8] if x else None), "arg_field_names": ["maptaxlot"], }, { "field_name": "taxlot", "function": (lambda x: x[-5:] if x else None), "arg_field_names": ["maptaxlot"], }, { "field_name": "maptaxlot_hyphen", "function": maptaxlot_separated, "arg_field_names": ["maptaxlot"], }, ] for kwargs in function_kwargs: etl.transform(arcetl.attributes.update_by_function, field_as_first_arg=False, **kwargs) # Take care of addresses flagged not to update in publication. ids = {} id_set_kwargs = { "in_publication": { "dataset_path": dataset.SITE_ADDRESS.path("pub") }, "in_transform": { "dataset_path": etl.transform_path }, "no_update": { "dataset_path": dataset.ADDRESS_ISSUES.path(), "dataset_where_sql": "update_publication = 0", }, } for key, kwargs in id_set_kwargs.items(): ids[key] = set(_id for _id, in arcetl.attributes.as_iters( field_names="site_address_gfid", **kwargs)) ids["rollback"] = ids["no_update"] & ids["in_transform"] & ids[ "in_publication"] ids["hold"] = ids["no_update"] & (ids["in_transform"] - ids["in_publication"]) rollback_features = [ feat for feat in arcetl.attributes.as_dicts( dataset.SITE_ADDRESS.path("pub")) if feat["site_address_gfid"] in ids["rollback"] ] # Strip OIDs (not part of update). for feat in rollback_features: del feat["oid@"] if rollback_features: etl.transform( arcetl.features.update_from_dicts, update_features=rollback_features, id_field_names="site_address_gfid", field_names=rollback_features[0].keys(), delete_missing_features=False, ) etl.transform( arcetl.features.delete_by_id, delete_ids=ids["hold"], id_field_names="site_address_gfid", ) LOG.info("%s addresses held from publication", len(ids["hold"])) LOG.info("%s addresses rolled-back from publication", len(ids["rollback"])) if any([ids["hold"], ids["rollback"]]): send_publication_issues_message() etl.load(dataset.SITE_ADDRESS.path("pub")) send_new_lincom_address_message()
def land_use_area_etl(): """Run ETL for land use areas.""" with arcetl.ArcETL("Land Use Areas") as etl: etl.extract(dataset.LAND_USE_AREA.path("maint")) # Clean maintenance values. transform.clean_whitespace(etl, field_names=["maptaxlot"]) transform.clear_non_numeric_text(etl, field_names=["maptaxlot"]) etl.transform( arcetl.attributes.update_by_function, field_name="landuse", function=(lambda x: 0 if x is None or x < 0 else x), ) # Remove features with missing core identifiers. for name in dataset.LAND_USE_AREA.id_field_names: etl.transform(arcetl.features.delete, dataset_where_sql="{} is null".format(name)) # Dissolve on core maintenance fields that are used in publication. etl.transform( arcetl.features.dissolve, dissolve_field_names=dataset.LAND_USE_AREA.id_field_names, tolerance=TOLERANCE["xy"], ) transform.add_missing_fields(etl, dataset.LAND_USE_AREA, tags=["pub"]) # Assign geometry attributes. coordinate_system_xy_keys = { 2914: { "x": "xcoord", "y": "ycoord" }, 4326: { "x": "longitude", "y": "latitude" }, } for spatial_reference_id, xy_key in coordinate_system_xy_keys.items(): for axis, key in xy_key.items(): etl.transform( arcetl.attributes.update_by_geometry, field_name=key, spatial_reference_item=spatial_reference_id, geometry_properties=["centroid", axis], ) # Assign overlays. overlay_kwargs = [ # City attributes. { "field_name": "geocity", "overlay_field_name": "inccityabbr", "overlay_dataset_path": dataset.INCORPORATED_CITY_LIMITS.path(), }, { "field_name": "yearanx", "overlay_field_name": "annexyear", "overlay_dataset_path": dataset.ANNEXATION_HISTORY.path("pub"), }, { "field_name": "ugb", "overlay_field_name": "ugbcity", "overlay_dataset_path": dataset.UGB.path("pub"), }, # Planning & zoning attributes. { "field_name": "greenwy", "overlay_field_name": "greenway", "overlay_dataset_path": dataset.WILLAMETTE_RIVER_GREENWAY.path("pub"), }, # Public safety attributes. { "field_name": "firedist", "overlay_field_name": "fireprotprov", "overlay_dataset_path": dataset.FIRE_PROTECTION_AREA.path("pub"), }, # Election attributes. { "field_name": "lcczone", "overlay_field_name": "lccbrdzone", "overlay_dataset_path": dataset.LCC_BOARD_ZONE.path("pub"), }, # Education attributes. { "field_name": "elem", "overlay_field_name": "attend", "overlay_dataset_path": dataset.ELEMENTARY_SCHOOL_AREA.path("pub"), }, { "field_name": "middle", "overlay_field_name": "attend", "overlay_dataset_path": dataset.MIDDLE_SCHOOL_AREA.path("pub"), }, { "field_name": "high", "overlay_field_name": "attend", "overlay_dataset_path": dataset.HIGH_SCHOOL_AREA.path("pub"), }, # Transportation attributes. { "field_name": "ltddist", "overlay_field_name": "LTD", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "transport\\ltd\\2012 LTD Boundary.shp"), }, # Natural attributes. { "field_name": "flood", "overlay_field_name": "fld_zone", "overlay_dataset_path": os.path.join(path.REGIONAL_DATA, "natural\\flood\\Flood.gdb\\FloodHazardArea"), }, # Census attributes. { "field_name": "ctract", "overlay_field_name": "TRACT", "overlay_dataset_path": os.path.join( path.REGIONAL_DATA, "federal\\census\\lane\\2010\\lc_census2010.gdb\\lc_tracts2010", ), }, { "field_name": "blockgr", "overlay_field_name": "BlockGroup", "overlay_dataset_path": os.path.join( path.REGIONAL_DATA, "federal\\census\\lane\\2010\\lc_census2010.gdb", "lc_blockgroups2010", ), }, # Other district attributes. { "field_name": "neighbor", "overlay_field_name": "NEIBORHD", "overlay_dataset_path": os.path.join( path.REGIONAL_DATA, "boundary\\districts\\eug\\Boundary.gdb\\EugNeighborhoods", ), }, ] for kwargs in overlay_kwargs: etl.transform(arcetl.attributes.update_by_overlay, overlay_central_coincident=True, **kwargs) # Clean overlay values. transform.clean_whitespace( etl, field_names=["ctract", "blockgr", "neighbor"]) transform.force_uppercase(etl, field_names=["ltddist"]) # Set default overlay values where missing. transform.force_yn(etl, field_names=["greenwy", "ltddist"], default="N") # Remove invalid overlay values. transform.clear_nonpositive(etl, field_names=["ctract", "blockgr"]) etl.transform( arcetl.attributes.update_by_function, field_name="neighbor", function=(lambda x: x if x and int(x) != 99 else None), ) # Assign joinable field values after overlays. join_kwargs = [ # Core attributes. { "field_name": "landusedes", "join_field_name": "ludesc", "join_dataset_path": dataset.LAND_USE_CODES_DETAILED.path("pub"), "on_field_pairs": [("landuse", "landuse")], }, { "field_name": "usecode", "join_field_name": "usecode", "join_dataset_path": dataset.LAND_USE_CODES_DETAILED.path("pub"), "on_field_pairs": [("landuse", "landuse")], }, { "field_name": "usecodedes", "join_field_name": "ucname", "join_dataset_path": dataset.LAND_USE_CODES_USE_CODES.path("pub"), "on_field_pairs": [("usecode", "usecode")], }, ] for kwargs in join_kwargs: etl.transform(arcetl.attributes.update_by_joined_value, **kwargs) # Build values from functions. function_kwargs = [ { "field_name": "mapnumber", "function": (lambda x: x[:8] if x else None), "arg_field_names": ["maptaxlot"], }, { "field_name": "taxlot", "function": (lambda x: x[-5:] if x else None), "arg_field_names": ["maptaxlot"], }, { "field_name": "maptaxlot_hyphen", "function": maptaxlot_separated, "arg_field_names": ["maptaxlot"], }, ] for kwargs in function_kwargs: etl.transform(arcetl.attributes.update_by_function, field_as_first_arg=False, **kwargs) # Build values from mappings. mapping_kwargs = [ { "field_name": "units", "mapping": total_units, "key_field_names": ["maptaxlot", "landuse"], }, { "field_name": "acres", "mapping": total_acres, "key_field_names": ["maptaxlot", "landuse"], }, ] for kwargs in mapping_kwargs: etl.transform(arcetl.attributes.update_by_mapping, **kwargs) etl.transform( arcetl.attributes.update_by_feature_match, field_name="landusecount", id_field_names=["maptaxlot"], update_type="match_count", ) etl.load(dataset.LAND_USE_AREA.path("pub"))