def main(): """Summary Args: jobs (TYPE): Description **kwargs: Description Returns: TYPE: Descriptio """ script_name = os.path.basename(__file__).replace(".py", "") # job_agol = jobutil.Job( # name=f"{script_name}_agol", # url=JOB_DB_API_URL, # source="dropbox", # destination="agol", # auth=JOB_DB_API_TOKEN, # ) # job_agol.start() # job_socrata = jobutil.Job( # name=f"{script_name}_socrata", # url=JOB_DB_API_URL, # source="dropbox", # destination="socrata", # auth=JOB_DB_API_TOKEN, # ) # job_socrata.start() data = get_data(dropbox_path, DROPBOX_BCYCLE_TOKEN) data = handle_data(data) data = datautil.upper_case_keys(data) data = datautil.replace_keys(data, {"STATUS": "KIOSK_STATUS"}) layer = agolutil.get_item(auth=AGOL_CREDENTIALS, service_id=service_id) res = layer.manager.truncate() agolutil.handle_response(res) adds = agolutil.feature_collection(data) res = layer.edit_features(adds=adds) agolutil.handle_response(res) socratautil.Soda( auth=SOCRATA_CREDENTIALS, records=data, resource=socrata_resource_id, lat_field="latitude", lon_field="longitude", location_field="location", replace=True, ) return len(data)
def get_layer(service_id, layer_id, auth): return agolutil.get_item( auth=auth, service_id=service_id, layer_id=layer_id, item_type="layer", )
def agol_pub(records, cfg_dataset, replace): """ Upsert or replace records on arcgis online features service Args: records (TYPE): Description cfg_dataset (TYPE): Description replace (TYPE): Description Returns: TYPE: Description Raises: Exception: Description """ if cfg_dataset.get("location_fields"): lat_field = cfg_dataset["location_fields"]["lat"] lon_field = cfg_dataset["location_fields"]["lon"] else: lat_field = None lon_field = None layer = agolutil.get_item(auth=AGOL_CREDENTIALS, service_id=cfg_dataset["service_id"]) if replace: res = layer.manager.truncate() if not res.get("success"): raise Exception("AGOL truncate failed.") else: """ Delete objects by primary key. ArcGIS api does not currently support an upsert method, although the Python api defines one via the layer.append method, it is apparently still under development. So our "upsert" consists of a delete by primary key then add. """ primary_key = cfg_dataset.get("primary_key") delete_ids = [record[primary_key] for record in records] delete_ids = ", ".join(f"'{x}'" for x in delete_ids) # generate a SQL-like where statement to identify records for deletion where = "{} in ({})".format(primary_key, delete_ids) res = layer.delete_features(where=where) agolutil.handle_response(res) for i in range(0, len(records), 1000): print(i) adds = agolutil.feature_collection(records[i:i + 1000], lat_field=lat_field, lon_field=lon_field) res = layer.edit_features(adds=adds) agolutil.handle_response(res) return True
def main(): global config args = cli_args() auth = KNACK_CREDENTIALS[args.app_name] records_processed = 0 last_run_date = args.last_run_date if not last_run_date or args.replace: # replace dataset by setting the last run date to a long, long time ago # the arrow package needs a specific date and timeformat last_run_date = "1970-01-01" """ We include a filter in our API call to limit to records which have been modified on or after the date the last time this job ran successfully. The Knack API supports filter requests by date only (not time), so we must apply an additional filter on the data after we receive it. """ for cfg in config.items(): # fetch data for all config objects cfg[1]["records"] = fetch_records(cfg[1], last_run_date, auth) config["work_order_signs_locations"]["records"] = process_locations( config["work_order_signs_locations"]["records"], config["work_order_signs_locations"]["geometry_field_name"], config["work_order_signs_locations"]["primary_key"], ) config["work_orders_signs_asset_spec_actuals"][ "records"] = append_locations_to_specs(config) config["work_orders_signs"]["records"] = append_locations_work_orders( config) # drop work orders with no locations config["work_orders_signs"]["records"] = [ x for x in config["work_orders_signs"]["records"] if x.get("points") ] # extract attachment url from each attachment record for record in config["work_orders_attachments"]["records"]: if record.get("ATTACHMENT"): record["ATTACHMENT_URL"] = record.get("ATTACHMENT") record.pop("ATTACHMENT") for name, cfg in config.items(): if not cfg.get("service_id"): # ignore confige objects that do not hav service ids, i.e., do not # have agol acontent, i.e., the locations object which was merged into # other layers continue update_layer = agolutil.get_item( auth=AGOL_CREDENTIALS, service_id=cfg["service_id"], layer_id=cfg["layer_id"], item_type=cfg["item_type"], ) if args.replace: res = update_layer.delete_features(where="1=1") agolutil.handle_response(res) else: """ Delete objects by primary key in chunks. ArcGIS api does not currently support an upsert method, although the Python api defines one via the layer.append method, it is apparently still under development. So our "upsert" consists of a delete by primary key then add. """ primary_key = cfg.get("primary_key") for i in range(0, len(cfg["records"]), 1000): delete_ids = [ record.get(primary_key) for record in cfg["records"][i:i + 1000] ] delete_ids = ", ".join(f"'{x}'" for x in delete_ids) # generate a SQL-like where statement to identify records for deletion where = "{} in ({})".format(primary_key, delete_ids) res = update_layer.delete_features(where=where) agolutil.handle_response(res) for i in range(0, len(cfg["records"]), 1000): # insert agol features in chunks # assemble an arcgis feature collection set from records records = agolutil.feature_collection(cfg["records"][i:i + 1000], lat_field="y", lon_field="x") # insert new features res = update_layer.edit_features(adds=records) agolutil.handle_response(res) records_processed += len(records) return records_processed
def main(): # just reading all the data into memory # because we expect < 1mb of data with open(PERMITS_FILE, "r") as fin: reader = csv.DictReader(fin) permits = [row for row in reader] permits = index_by_key(permits, "PERMIT_RSN") with open(SEMGENTS_FILE, "r") as fin: reader = csv.DictReader(fin) segments = [row for row in reader] # **number of segment scoring** permits_with_segments = segments_by_permit(segments) permits_weighted_with_seg_count = score_permits_by_segment_count( permits_with_segments ) # join segment weight to permits permits = append_key( permits, permits_weighted_with_seg_count, FIELD_CFG["street_segments"]["score_key"] ) # join segment id list to permits permits = append_key(permits, permits_with_segments, "street_segments") # **duration scoring** permits = score_permits_by_duration(permits) # **segment road class scoring** segment_ids = [ segment[FIELD_CFG["street_segments"]["segment_id_key"]] for segment in segments ] # remove dupes segment_ids = list(set(segment_ids)) # query segment data from ArcGIS Online in chunks chunksize = 500 segment_features = [] segments_with_zones = {} # get arcgis feature layers segment_layer = agolutil.get_item( auth=AGOL_CREDENTIALS, service_id=SEGMENT_LAYER_CFG["service_id"], layer_id=SEGMENT_LAYER_CFG["layer_id"], ) inspector_layer = agolutil.get_item( auth=AGOL_CREDENTIALS, service_id=INSPECTOR_LAYER_CFG["service_id"], layer_id=INSPECTOR_LAYER_CFG["layer_id"], ) segments_with_road_class = {} segments_with_zones = {} segments_with_zones_and_road_class = {} for i in range(0, len(segment_ids), chunksize): # get road class segment_features_subset = get_segment_data( segment_ids[i : i + chunksize], segment_layer, SEGMENT_LAYER_CFG ) segments_with_road_class_subset = parse_road_class( segment_features_subset, SEGMENT_LAYER_CFG["primary_key"] ) segments_with_road_class.update(segments_with_road_class_subset) # get inspector area via intersect query segment_with_zones_subset = get_inspector_areas( segment_features_subset, inspector_layer, INSPECTOR_LAYER_CFG ) segments_with_zones.update(segment_with_zones_subset) # merge zones and road class data for key in segments_with_zones: segments_with_zones_and_road_class[key] = {} segments_with_zones_and_road_class[key][ "inspector_zones" ] = segments_with_zones[key]["inspector_zones"] segments_with_zones_and_road_class[key][ "road_class" ] = segments_with_road_class[key]["road_class"] # TODO # only doing small chunks now # fix this to play with segments_with_zones_and_road_class # segment_road_class = index_by_key(segment_road_class, SEGMENT_LAYER_CFG["primary_key"]) permits = get_max_road_class(permits, segments_with_zones_and_road_class) permits = score_permits_by_road_class( permits, FIELD_CFG["road_classes"]["source_key"], FIELD_CFG["road_classes"]["score_key"], ) # add inspector zones to permits dict permits = merge_inspector_zones(permits, segments_with_zones_and_road_class) # **score DAPCZ segments** permits = score_dapcz_segments(permits, DAPCZ_SEGMENTS) permits = stringify_list(permits, "inspector_zones") permits = stringify_list(permits, "road_classes") permits = stringify_list(permits, "street_segments") # **total up the score score_keys = get_all_score_keys(FIELD_CFG) permits = get_total_scores(permits, score_keys) # **write to csv** output_data = [permits[permit_id] for permit_id in permits.keys()] with open(OUTPUT_FILE, "w") as fout: # assume fieldnames are consistent across all records, # so just take the keys from the first entry as fieldnames writer = csv.DictWriter(fout, fieldnames=output_data[0].keys()) writer.writeheader() for row in output_data: writer.writerow(row)
def main(): args = cli_args() auth = KNACK_CREDENTIALS[args.app_name] records_processed = 0 last_run_date = args.last_run_date if not last_run_date or args.replace: # replace dataset by setting the last run date to a long, long time ago # the arrow package needs a specific date and timeformat last_run_date = "1970-01-01" """ We include a filter in our API call to limit to records which have been modified on or after the date the last time this job ran successfully. The Knack API supports filter requests by date only (not time), so we must apply an additional filter on the data after we receive it. """ for cfg in config: print(cfg["name"]) filters = knackutil.date_filter_on_or_after( last_run_date, cfg["modified_date_field_id"]) kn = knackpy_wrapper(cfg, auth, filters=filters) if kn.data: # Filter data for records that have been modifed after the last # job run (see comment above) last_run_timestamp = arrow.get(last_run_date).timestamp * 1000 kn.data = filter_by_date(kn.data, cfg["modified_date_field"], last_run_timestamp) if not kn.data: records_processed += 0 continue records = kn.data if cfg.get("name") == "markings_work_orders": # markings work order geometries are retrieved from AGOL # reduce to unique segment ids from all records segment_ids = datautil.unique_from_list_field( records, list_field=cfg["geometry_record_id_field"]) if segment_ids: geometry_layer = agolutil.get_item( auth=AGOL_CREDENTIALS, service_id=cfg["geometry_service_id"], layer_id=cfg["geometry_layer_id"], ) source_geometries_all = [] chunksize = 200 for i in range(0, len(segment_ids), chunksize): # fetch agol source geometries in chunks where_ids = ", ".join( f"'{x}'" for x in segment_ids[i:i + chunksize]) if where_ids: where = "{} in ({})".format( cfg["geometry_record_id_field"], where_ids) source_geometries_chunk = geometry_layer.query( where=where, outFields=cfg["geometry_record_id_field"]) if not source_geometries_chunk: raise Exception( "No features returned from source geometry layer query" ) source_geometries_all.extend(source_geometries_chunk) records = append_paths( kn.data, source_geometries_all, path_id_field=cfg["geometry_record_id_field"], ) global work_order_geometries work_order_geometries = copy.deepcopy(records) elif cfg.get("name") == "markings_jobs": # get data from markings records records = get_paths_from_work_orders(records) if cfg.get("extract_attachment_url"): for record in records: if record.get("ATTACHMENT"): record["ATTACHMENT_URL"] = record.get("ATTACHMENT") record.pop("ATTACHMENT") records = remove_empty_strings( records) # AGOL has unexepected handling of empty values update_layer = agolutil.get_item( auth=AGOL_CREDENTIALS, service_id=cfg["service_id"], layer_id=cfg["layer_id"], item_type=cfg["item_type"], ) if args.replace: # we used to delete all features using a `where="1=1"` statement, but fails with a large number # of features. so we now fetch the OIDs of existing features, and pass them to the delete existing_features = update_layer.query(return_geometry=False, out_fields="OBJECTID") oids = [ str(f.attributes.get("OBJECTID")) for f in existing_features.features ] if oids: oid_chunksize = 500 for i in range(0, len(oids), oid_chunksize): # we delete in chunks because Esri doesn't like deleting lots of features at once deletes = ", ".join(oids[i:i + oid_chunksize]) res = update_layer.delete_features(deletes=deletes) agolutil.handle_response(res) else: """ Delete objects by primary key. ArcGIS api does not currently support an upsert method, although the Python api defines one via the layer.append method, it is apparently still under development. So our "upsert" consists of a delete by primary key then add. """ primary_key = cfg.get("primary_key") delete_ids = [record.get(primary_key) for record in records] delete_ids = ", ".join(f"'{x}'" for x in delete_ids) # generate a SQL-like where statement to identify records for deletion where = "{} in ({})".format(primary_key, delete_ids) res = update_layer.delete_features(where=where) agolutil.handle_response(res) for i in range(0, len(records), 1000): # insert agol features in chunks adds = agolutil.feature_collection(records[i:i + 1000], spatial_ref=102739) res = update_layer.edit_features(adds=adds) agolutil.handle_response(res) records_processed += len(adds) return records_processed
def main(): SOCRATA_RESOURCE_ID = "jqhg-imb3" FIELDNAMES = [ "COMMENT_FIELD2", "START_DATE", "SITE_CODE", "COMMENT_FIELD1", "GLOBALID", "DATA_FILE", "COMMENT_FIELD4", "COMMENT_FIELD3", "LATITUDE", "LONGITUDE", ] CONFIG = { "service_url": "http://services.arcgis.com/0L95CJ0VTaxqcmED/arcgis/rest/services/Traffic_Count_Location/FeatureServer/0/", "service_id": "3c56025e645045998ee499c0725dfebb", "params": { "f": "json", "where": "1=1", "outFields": "*", "returnGeometry": True, "outSr": 4326, # return WGS84 }, } layer = agolutil.get_item(auth=AGOL_CREDENTIALS, service_id=CONFIG["service_id"]) features = layer.query(**CONFIG["params"]) features_add = [] for feature in features: feature_add = { key.upper(): value for key, value in feature.attributes.items() if key.upper() in FIELDNAMES } feature_add["LONGITUDE"] = float(str( feature.geometry["x"])[:10]) # truncate coordinate feature_add["LATITUDE"] = float(str(feature.geometry["y"])[:10]) if feature_add.get("START_DATE"): feature_add["START_DATE"] = parse_mills(feature_add["START_DATE"]) features_add.append(feature_add) socratautil.Soda( auth=SOCRATA_CREDENTIALS, resource=SOCRATA_RESOURCE_ID, records=features_add, lat_field="latitude", lon_field="longitude", location_field="location", replace=True, ) return len(features_add)