def upload_zip(source_name, output_directory): """CLI option to upload zip files from update process run with load_to_drive=False.""" feature = spec_manager.get_feature(source_name) output_name = feature['name'] # Zip up outputs new_gdb_zip = os.path.join(output_directory, '{}_gdb.zip'.format(output_name)) new_shape_zip = os.path.join(output_directory, '{}_shp.zip'.format(output_name)) new_hash_zip = os.path.join(output_directory, '{}_hash.zip'.format(output_name)) if not os.path.exists(new_gdb_zip) and \ not os.path.exists(new_shape_zip) and \ not os.path.exists(new_hash_zip): raise (Exception( 'Required zip file do not exist at {}'.format(output_directory))) # Upload to drive load_zip_to_drive(feature, 'gdb_id', new_gdb_zip, feature['parent_ids']) print 'GDB loaded' load_zip_to_drive(feature, 'shape_id', new_shape_zip, feature['parent_ids']) print 'Shape loaded' load_zip_to_drive(feature, 'hash_id', new_hash_zip, [HASH_DRIVE_FOLDER]) print 'Hash loaded' spec_manager.save_spec_json(feature)
def get_feature_download_links(): features = spec_manager.get_feature_specs() feature_links = {} for feature in features: feature_links[feature['sgid_name'].lower()] = { 'gdb': driver.get_download_link(feature['gdb_id']), 'shp': driver.get_download_link(feature['shape_id']) } spec_manager.save_spec_json(feature_links, 'data/feature_downloads.json')
def set_cycle_by_date_in_name(): dated = re.compile(r'\d{4}') for feature in spec_manager.get_feature_specs(): sgid_name = feature['sgid_name'] matches = dated.findall(sgid_name) if len(matches) == 1 and feature['update_cycle'] == 'day': print sgid_name, matches feature['update_cycle'] = spec_manager.UPDATE_CYCLES.NEVER spec_manager.save_spec_json(feature)
def set_spec_update_types(feature_name, update_type): import arcpy if not arcpy.Exists( os.path.join( r'Database Connections\Connection to sgid.agrc.utah.gov.sde', feature_name)): print '^Does not exist' return feature = spec_manager.get_feature(feature_name) feature['update_cycle'] = update_type spec_manager.save_spec_json(feature)
def reassign_feature_parents(): ided_feature_specs = get_spec_catnames( spec_manager.get_feature_spec_path_list(), True) for spec_name in ided_feature_specs: print spec_name spec = ided_feature_specs[spec_name] old_parent_id = spec['parent_ids'][0] new_parent_id = get_name_folder_id(spec['name'], old_parent_id) user_drive.change_file_parent(spec['gdb_id'], old_parent_id, new_parent_id) user_drive.change_file_parent(spec['shape_id'], old_parent_id, new_parent_id) spec['parent_ids'] = [new_parent_id] spec_manager.save_spec_json(spec)
def sync_feature_to_package(feature_spec, package_spec): """Remove packages from feature if feature is not listed in package.""" feature_list = [f.lower() for f in package_spec['feature_classes']] if feature_spec['sgid_name'].lower() not in feature_list: feature_spec['packages'].remove(package_spec['name']) if package_spec['gdb_id'] in drive.get_parents(feature_spec['gdb_id']): get_user_drive().remove_file_parent(feature_spec['gdb_id'], package_spec['gdb_id']) print 'remove package gdb_id' if package_spec['shape_id'] in drive.get_parents( feature_spec['shape_id']): get_user_drive().remove_file_parent(feature_spec['shape_id'], package_spec['shape_id']) print 'remove package shape_id' spec_manager.save_spec_json(feature_spec)
def sync_package_and_features(package_spec): """Add package to features if it is not already there.""" feature_list = [f.lower() for f in package_spec['feature_classes']] current_gdb_ids = [] current_shp_ids = [] for feature_spec in [spec_manager.get_feature(f) for f in feature_list]: package_list = [p.lower() for p in feature_spec['packages']] if package_spec['name'].lower() not in package_list: feature_spec['packages'].append(package_spec['name']) if package_spec['gdb_id'] not in drive.get_parents( feature_spec['gdb_id']): get_user_drive().add_file_parent(feature_spec['gdb_id'], package_spec['gdb_id']) print 'add package gdb_id' if package_spec['shape_id'] not in drive.get_parents( feature_spec['shape_id']): get_user_drive().add_file_parent(feature_spec['shape_id'], package_spec['shape_id']) print 'add package shape_id' current_gdb_ids.append(feature_spec['gdb_id']) current_shp_ids.append(feature_spec['shape_id']) spec_manager.save_spec_json(feature_spec) folder_gdb_ids = [ name_id[1] for name_id in drive.list_files_in_directory(package_spec['gdb_id']) ] for gdb_id in folder_gdb_ids: if gdb_id not in current_gdb_ids: get_user_drive().remove_file_parent(gdb_id, package_spec['gdb_id']) print 'remove package gdb_id' folder_shp_ids = [ name_id[1] for name_id in drive.list_files_in_directory(package_spec['shape_id']) ] for shp_id in folder_shp_ids: if shp_id not in current_shp_ids: get_user_drive().remove_file_parent(shp_id, package_spec['shape_id']) print 'remove package shp_id'
def set_cycle_by_csv(): update_csv = 'data/update_cycle.csv' update_cycles = {} with open(update_csv, 'rb') as cycles: reader = csv.DictReader(cycles) for row in reader: name = row['SGID name'] update = row['Update frequency'] if update == 'on-demand': update = 'demand' update_cycles[name] = update for feature in spec_manager.get_feature_specs(): sgid_name = feature['sgid_name'] if sgid_name in update_cycles: feature['update_cycle'] = update_cycles[sgid_name] spec_manager.save_spec_json(feature) # print sgid_name, feature['update_cycle'], update_cycles[sgid_name] else: print sgid_name, 'not found!!!'
def init_drive_package(package): """ Create Drive folders for package and get Drive ids. package: package spec """ category_id = get_category_folder_id(package['category'], UTM_DRIVE_FOLDER) category_packages_id = get_category_folder_id('packages', category_id) drive_folder_id = get_category_folder_id(package['name'], category_packages_id) gdb_folder_id = get_category_folder_id(package['name'] + '_gdb', drive_folder_id) shp_folder_id = get_category_folder_id(package['name'] + '_shp', drive_folder_id) if drive_folder_id not in package['parent_ids']: package['parent_ids'].append(drive_folder_id) if gdb_folder_id != package['gdb_id']: package['gdb_id'] = gdb_folder_id if shp_folder_id != package['shape_id']: package['shape_id'] = shp_folder_id spec_manager.save_spec_json(package)
def update_package(workspace, package_name, output_directory, load_to_drive=True, force_update=False): """Update a package on drive.""" print '\nStarting package:', package_name package = spec_manager.get_package(package_name) # Check for category folder category_id = get_category_folder_id(package['category'], UTM_DRIVE_FOLDER) category_packages_id = get_category_folder_id('packages', category_id) drive_folder_id = get_category_folder_id(package['name'], category_packages_id) if drive_folder_id not in package['parent_ids']: package['parent_ids'].append(drive_folder_id) package_gdb = arcpy.CreateFileGDB_management(output_directory, package['name'])[0] package_shape = os.path.join(output_directory, package['name']) os.makedirs(package_shape) print 'Copying...' for feature_class in package['feature_classes']: spec_name = spec_manager.create_feature_spec_name(feature_class) feature_spec = os.path.join('features', spec_name) if not os.path.exists(feature_spec): print 'New feature' update_feature(workspace, feature_class, os.path.join(output_directory, '..'), load_to_drive, force_update) spec = spec_manager.get_feature(feature_class, [package_name], create=True) is_table = arcpy.Describe(os.path.join( workspace, feature_class)).datasetType.lower() == 'table' copier = _get_copier(is_table) feature_output_name = spec['name'] out_fc_path = os.path.join(package_gdb, feature_output_name) shape_directory_path = os.path.join(output_directory, '..', feature_output_name) fc_path = os.path.join(shape_directory_path + '.gdb', feature_output_name) if os.path.exists(shape_directory_path) and arcpy.Exists(fc_path): # print feature_class, 'local' copier(fc_path, out_fc_path) shutil.copytree(shape_directory_path, os.path.join(package_shape, feature_output_name)) else: # print feature_class, 'workspace' copier(os.path.join(workspace, feature_class), out_fc_path) s_dir = os.path.join(package_shape, feature_output_name) os.makedirs(s_dir) copier(os.path.join(workspace, feature_class), os.path.join(s_dir, feature_output_name)) # Zip up outputs new_gdb_zip = os.path.join(output_directory, '{}_gdb.zip'.format(package['name'])) new_shape_zip = os.path.join(output_directory, '{}_shp.zip'.format(package['name'])) print 'Zipping...' zip_folder(package_gdb, new_gdb_zip) zip_folder(package_shape, new_shape_zip) if load_to_drive: # Upload to drive load_zip_to_drive(package, 'gdb_id', new_gdb_zip, package['parent_ids']) load_zip_to_drive(package, 'shape_id', new_shape_zip, package['parent_ids']) print 'All zips loaded' spec_manager.save_spec_json(package)
def update_feature(workspace, feature_name, output_directory, load_to_drive=True, force_update=False): """ Update a feature class on drive if it has changed. workspace: string path or connection to a workspace that contains feature_name feature_name: string SGID name such as SGID10.RECREATION.Trails """ print '\nStarting feature:', feature_name feature_time = clock() input_feature_path = os.path.join(workspace, feature_name) feature = spec_manager.get_feature(feature_name, create=True) if not src_data_exists(input_feature_path): now = datetime.now() log_sheet_values = [[ '{}.{}'.format(feature['category'], feature['name']), 'Does not exist', now.strftime('%m/%d/%Y'), now.strftime('%H:%M:%S.%f'), clock() - feature_time ]] sheets.append_row(LOG_SHEET_ID, LOG_SHEET_NAME, log_sheet_values) return [] # Handle new packages and changes to feature['packages'] list for package in [spec_manager.get_package(p) for p in feature['packages']]: sync_feature_to_package(feature, package) category_id = get_category_folder_id(feature['category'], UTM_DRIVE_FOLDER) # Check for name folder name_id = get_category_folder_id(feature['name'], category_id) if name_id not in feature['parent_ids']: feature['parent_ids'].append(name_id) output_name = feature['name'] # Get the last hash from drive to check changes past_hash_directory = os.path.join(output_directory, 'pasthashes') hash_field = 'hash' past_hash_zip = os.path.join(output_directory, output_name + '_hash' + '.zip') past_hash_store = os.path.join(past_hash_directory, output_name + '_hash', output_name + '_hashes.csv') past_hashes = None if feature['hash_id']: drive.download_file(feature['hash_id'], past_hash_zip) print 'Past hashes downloaded' unzip(past_hash_zip, past_hash_directory) past_hashes = get_hash_lookup(past_hash_store, hash_field) else: past_hashes = {} # Check for changes # Create directory for feature hashes hash_directory = os.path.join(output_directory, output_name + '_hash') if not os.path.exists(hash_directory): os.makedirs(hash_directory) hash_store = os.path.join(hash_directory, '{}_hashes.csv'.format(output_name)) # Get fields for hashing fields = set([fld.name for fld in arcpy.ListFields(input_feature_path)]) fields = _filter_fields(fields) shape_token = None if not arcpy.Describe(input_feature_path).datasetType.lower() == 'table': shape_token = 'SHAPE@WKT' try: change_count = detect_changes(input_feature_path, fields, past_hashes, hash_store, shape_token) except RuntimeError: change_count = -1 packages = [] if change_count != 0 or force_update: packages = feature['packages'] # Copy data local print 'Copying...' fc_directory, shape_directory = create_outputs(output_directory, input_feature_path, output_name) # Zip up outputs new_gdb_zip = os.path.join(output_directory, '{}_gdb.zip'.format(output_name)) new_shape_zip = os.path.join(output_directory, '{}_shp.zip'.format(output_name)) new_hash_zip = os.path.join(output_directory, '{}_hash.zip'.format(output_name)) print 'Zipping...' zip_folder(fc_directory, new_gdb_zip) zip_folder(shape_directory, new_shape_zip) zip_folder(hash_directory, new_hash_zip) # Upload to drive if load_to_drive: load_zip_to_drive(feature, 'gdb_id', new_gdb_zip, feature['parent_ids']) load_zip_to_drive(feature, 'shape_id', new_shape_zip, feature['parent_ids']) load_zip_to_drive(feature, 'hash_id', new_hash_zip, [HASH_DRIVE_FOLDER]) print 'All zips loaded' spec_manager.save_spec_json(feature) now = datetime.now() if change_count == -1: change_count = 'Change detection error' log_sheet_values = [[ '{}.{}'.format(feature['category'], feature['name']), change_count, now.strftime('%m/%d/%Y'), now.strftime('%H:%M:%S.%f'), clock() - feature_time ]] sheets.append_row(LOG_SHEET_ID, LOG_SHEET_NAME, log_sheet_values) return packages