def delete_saved_search(search_name, owner): """Deletes an existing saved search. This is used when overwriting a saved search.""" try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] get_url = "{0}/api/rest/display/ssearch/export".format(voyager_server) get_response = requests.get(get_url, verify=verify_ssl, headers={ 'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner) }) if get_response.status_code == 200: delete_url = '' saved_searches = get_response.json()['searches'] for ss in saved_searches: if ss['title'] == search_name: search_id = ss['id'] delete_url = "{0}/api/rest/display/ssearch/{1}".format( voyager_server, search_id) break if delete_url: res = requests.delete(delete_url, verify=verify_ssl, headers={ 'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner) }) if not res.status_code == 200: if hasattr(res, 'content'): return False, eval(res.content)['error'] else: return False, 'Error creating saved search: {0}: {1}'.format( search_name, res.reason) else: return True, '' else: return True, '' else: return False, eval(get_response.content)['message'] except requests.HTTPError as http_error: return False, http_error except requests.exceptions.InvalidURL as url_error: return False, url_error except requests.RequestException as re: return False, re
def get_existing_saved_search_query(search_name, owner): """Retrieves the query from an existing saved search.""" try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] get_url = "{0}/api/rest/display/ssearch/export".format(voyager_server) get_response = requests.get(get_url, headers={ 'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner) }) search_query = '' if get_response.status_code == 200: saved_searches = get_response.json()['searches'] for ss in saved_searches: if ss['title'] == search_name: search_query = ss['path'] return True, search_query except requests.HTTPError as http_error: return False, http_error except requests.exceptions.InvalidURL as url_error: return False, url_error except requests.RequestException as re: return False, re
def delete_items(fq_query, q_query, thumbs, metadata, layers, owner): """Delete items from the index using the Voyager API.""" try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] # voyager_server = "http://localhost:8888" if not q_query and fq_query: query = fq_query fq = "" else: query = q_query fq = "&fq={0}".format(fq_query) url = "{0}/api/rest/index/records?query={1}{2}&items=true&thumbnails={3}&metadata={4}&layers={5}".format( voyager_server, query, fq, thumbs, metadata, layers) response = requests.delete(url, headers={ 'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner) }) if response.status_code == 200: return True, 'Deleted items: {0}'.format(response.json()) else: return False, 'Error deleting items: {0}: {1}'.format( 'delete_items', response.reason) except requests.HTTPError as http_error: return False, http_error except requests.exceptions.InvalidURL as url_error: return False, url_error except requests.RequestException as re: return False, re
def execute(request): """Move files to a target folder. :param request: json as a dict. """ moved = 0 skipped = 0 errors = 0 new_folder = False parameters = request['params'] target_folder = task_utils.get_parameter_value(parameters, 'target_folder', 'value') flatten_results = task_utils.get_parameter_value(parameters, 'flatten_results', 'value') if not os.path.exists(request['folder']): os.makedirs(request['folder']) if target_folder: if not os.path.exists(target_folder): os.makedirs(target_folder) new_folder = True num_results, response_index = task_utils.get_result_count(parameters) task_utils.CHUNK_SIZE = num_results # Query the index for results in groups of 25. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'move_files') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]),headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs'], True, True) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = move_files(input_items, target_folder, flatten_results) moved += result[0] errors += result[1] skipped += result[2] status_writer.send_percent(i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'move_files') # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), moved, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Copies files to a target folder. :param request: json as a dict. """ copied = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] target_dirs = '' target_folder = task_utils.get_parameter_value(parameters, 'target_folder', 'value') flatten_results = task_utils.get_parameter_value(parameters, 'flatten_results', 'value') if not flatten_results: target_dirs = os.path.splitdrive(target_folder)[1] flatten_results = 'false' if not os.path.exists(request['folder']): os.makedirs(request['folder']) # Query the index for results in groups of 25. result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'copy_files') i = 0. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs'], list_components=True) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = copy_files(input_items, target_folder, flatten_results, target_dirs) copied += result[0] errors += result[1] skipped += result[2] # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), copied, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Copies files to a target folder. :param request: json as a dict. """ copied = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] target_dirs = '' target_folder = task_utils.get_parameter_value(parameters, 'target_folder', 'value') flatten_results = task_utils.get_parameter_value(parameters, 'flatten_results', 'value') if not flatten_results: target_dirs = os.path.splitdrive(target_folder)[1] flatten_results = 'false' if not os.path.exists(request['folder']): os.makedirs(request['folder']) # Query the index for results in groups of 25. result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'copy_files') i = 0. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs'], list_components=True) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = copy_files(input_items, target_folder, flatten_results, target_dirs) copied += result[0] errors += result[1] skipped += result[2] # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), copied, skipped, errors, errors_reasons, skipped_reasons)
def create_saved_search(search_name, groups, owner, query, has_q): """Create the saved search using Voyager API.""" try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] url = "{0}/api/rest/display/ssearch".format(voyager_server) if query: template_id = get_display_tempate_id(owner) if has_q: if query.endswith('/'): path = "/q=" + query + 'disp={0}'.format(template_id) else: path = "/q=" + query + '/disp={0}'.format(template_id) else: if query.endswith('/'): path = "/" + query + 'disp={0}'.format(template_id) else: path = "/" + query + '/disp={0}'.format(template_id) query = { "title": str(search_name), "owner": str(owner['name']), "path": str(path), "share": groups, "overwrite": True } else: query = { "title": search_name, "owner": owner['name'], "path": "", "share": groups } response = requests.post(url, json.dumps(query), verify=verify_ssl, headers={ 'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner) }) if response.status_code == 200: return True, 'Created save search: {0}'.format( response.json()['title']) else: if hasattr(response, 'content'): return False, eval(response.content)['error'] else: return False, 'Error creating saved search: {0}: {1}'.format( search_name, response.reason) except requests.HTTPError as http_error: return False, http_error except requests.exceptions.InvalidURL as url_error: return False, url_error except requests.RequestException as re: return False, re
def get_display_tempate_id(owner): try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] get_url = "{0}/api/rest/display/config/default".format(voyager_server) get_response = requests.get(get_url, headers={ 'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner) }) if get_response.status_code == 200: return get_response.json()['id'] else: return '' except requests.HTTPError: return '' except requests.exceptions.InvalidURL: return '' except requests.RequestException: return ''
def execute(request): """Exports search results a CSV, shapefile or XML document. :param request: json as a dict. """ chunk_size = task_utils.CHUNK_SIZE file_name = task_utils.get_parameter_value(request['params'], 'file_name', 'value') fields = task_utils.get_parameter_value(request['params'], 'fields', 'value') out_format = task_utils.get_parameter_value(request['params'], 'output_format', 'value') if not 'path' in fields and 'path:[absolute]' in fields: fields.append('path') if 'geo' in fields: i_geo = fields.index('geo') fields.remove('geo') fields.insert(i_geo, '[geo]') # Create the temporary workspace. task_folder = os.path.join(request['folder'], 'temp') if not os.path.exists(task_folder): os.makedirs(task_folder) headers = {'x-access-token': task_utils.get_security_token(request['owner'])} num_results, response_index = task_utils.get_result_count(request['params']) query = '{0}/select?&wt=json&fl={1}'.format(sys.argv[2].split('=')[1], ','.join(fields)) if 'query' in request['params'][response_index]: # Voyager Search Traditional UI for p in request['params']: if 'query' in p: request_qry = p['query'] break if 'voyager.list' in request_qry: query += '&voyager.list={0}'.format(request_qry['voyager.list']) # Replace spaces with %20 & remove \\ to avoid HTTP Error 400. if 'fq' in request_qry: try: query += '&fq={0}'.format(request_qry['fq'].replace("\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['fq']: query += '&fq={0}'.format(qry).replace("\\", "").replace(' ', '%20') if 'q' in request_qry: try: query += '&q={0}'.format(request_qry['q'].replace("\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['q']: query += '&q={0}'.format(qry).replace("\\", "").replace(' ', '%20') if 'place' in request_qry: try: query += '&place={0}'.format(request_qry['place'].replace("\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['place']: query += '&place={0}'.format(qry).replace("\\", "").replace(' ', '%20') if 'place.op' in request_qry: query += '&place.op={0}'.format(request_qry['place.op']) query += '&rows={0}&start={1}' exported_cnt = 0. for i in xrange(0, num_results, chunk_size): req = urllib2.Request(query.replace('{0}', str(chunk_size)).replace('{1}', str(i)), headers=headers) for n in urllib2.urlopen(req): jobs = eval(n.replace('null', '"null"'))['response']['docs'] if out_format == 'CSV': export_to_csv(jobs, file_name, task_folder, fields) elif out_format == 'XML': export_to_xml(jobs, file_name, task_folder) elif out_format == 'SHP': export_to_shp(jobs, file_name, task_folder) exported_cnt += chunk_size if exported_cnt > num_results: status_writer.send_percent(100, 'exported: 100%', 'export_results') else: percent_done = exported_cnt / num_results status_writer.send_percent(percent_done, '{0}: {1:.0f}%'.format("exported", percent_done * 100), 'export_results') else: # Voyager Search Portal/Cart UI ids = [] for p in request['params']: if 'ids' in p: ids = p['ids'] break groups = task_utils.grouper(list(ids), chunk_size, '') i = 0 for group in groups: i += len([v for v in group if not v == '']) req = urllib2.Request(query + '&ids={0}'.format(','.join(group)), headers=headers) results = urllib2.urlopen(req) jobs = eval(results.read())['response']['docs'] if out_format == 'CSV': export_to_csv(jobs, file_name, task_folder, fields) elif out_format == 'XML': export_to_xml(jobs, file_name, task_folder) elif out_format == 'SHP': export_to_shp(jobs, file_name, task_folder) percent_done = float(i) / num_results status_writer.send_percent(percent_done, '{0}: {1:.0f}%'.format("exported", percent_done * 100), 'export_results') # Zip up outputs. if exported_count == 0: task_utils.report(os.path.join(request['folder'], '__report.json'), exported_count, 0, errors_count, errors_reasons) else: task_utils.report(os.path.join(request['folder'], '__report.json'), exported_count, 0, errors_count, errors_reasons) zip_file = task_utils.zip_data(task_folder, 'output.zip') shutil.move(zip_file, os.path.join(os.path.dirname(task_folder), os.path.basename(zip_file)))
def execute(request): """Replace the workspace path for layer files and map document layers. :param request: json as a dict. """ updated = 0 skipped = 0 parameters = request['params'] backup = task_utils.get_parameter_value(parameters, 'create_backup', 'value') old_data_source = task_utils.get_parameter_value(parameters, 'old_data_source', 'value').lower() new_data_source = task_utils.get_parameter_value(parameters, 'new_data_source', 'value') if not os.path.exists(request['folder']): os.makedirs(request['folder']) if not arcpy.Exists(new_data_source): status_writer.send_state( status.STAT_FAILED, _('{0} does not exist').format(new_data_source)) return if os.path.splitext(new_data_source)[1] not in ('.gdb', '.mdb', '.sde'): new_dataset = os.path.basename(new_data_source) dsc = arcpy.Describe(os.path.dirname(new_data_source)) else: dsc = arcpy.Describe(new_data_source) new_dataset = '' wks_type = 'NONE' if dsc.dataType == 'FeatureDataset': new_workspace = dsc.path wks_type = get_workspace_type(new_workspace) elif dsc.dataType == 'Workspace': new_workspace = dsc.catalogPath wks_type = get_workspace_type(new_workspace) elif dsc.dataType == 'Folder': dsc = arcpy.Describe(new_data_source) new_workspace = dsc.catalogPath if new_dataset.endswith('.shp'): wks_type = 'SHAPEFILE_WORKSPACE' new_dataset = new_dataset.rsplit('.shp')[0] else: if arcpy.Describe(new_data_source).dataType == 'RasterDataset': wks_type = 'RASTER_WORKSPACE' elif dsc.dataType == 'CadDrawingDataset': new_workspace = dsc.path wks_type = 'CAD_WORKSPACE' else: new_workspace = os.path.dirname(new_data_source) num_results, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}'.format(sys.argv[2].split('=')[1], '/select?&wt=json') fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'replace_data_source') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "{0}&rows={1}&start={2}".format( fl, task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get(query + "{0}&rows={1}&start={2}".format( fl, task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) input_items = task_utils.get_input_items( results.json()['response']['docs'], True) if not input_items: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) result = replace_data_source(input_items, old_data_source, new_workspace, new_dataset, wks_type, backup, headers) updated += result[0] skipped += result[1] status_writer.send_percent( i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'replace_data_source') # Update state if necessary. if skipped > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), updated, skipped, skipped_details=skipped_reasons)
def execute(request): """Exports search results a CSV, shapefile or XML document. :param request: json as a dict. """ # Get SSL trust setting. verify_ssl = task_utils.get_ssl_mode() chunk_size = task_utils.CHUNK_SIZE file_name = task_utils.get_parameter_value(request['params'], 'file_name', 'value') fields = task_utils.get_parameter_value(request['params'], 'fields', 'value') out_format = task_utils.get_parameter_value(request['params'], 'output_format', 'value') if not 'path' in fields and 'path:[absolute]' in fields: fields.append('path') if 'geo' in fields: i_geo = fields.index('geo') fields.remove('geo') fields.insert(i_geo, '[geo]') # Create the temporary workspace. task_folder = os.path.join(request['folder'], 'temp') if not os.path.exists(task_folder): os.makedirs(task_folder) headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } num_results, response_index = task_utils.get_result_count( request['params']) if len(sys.argv) == 2: query = '{0}/solr/v0/select?&wt=json&fl={1}'.format( 'http://localhost:8888', ','.join(fields)) else: query = '{0}/select?&wt=json&fl={1}'.format(sys.argv[2].split('=')[1], ','.join(fields)) if 'query' in request['params'][response_index]: # Voyager Search Traditional UI for p in request['params']: if 'query' in p: request_qry = p['query'] break if 'voyager.list' in request_qry: query += '&voyager.list={0}'.format(request_qry['voyager.list']) # Replace spaces with %20 & remove \\ to avoid HTTP Error 400. if 'fq' in request_qry: try: if isinstance(request_qry['fq'], list): for fq in request_qry['fq']: try: query += '&fq={0}'.format(str(fq)) except UnicodeEncodeError: query += '&fq={0}'.format(str(fq.encode('utf-8'))) else: query += '&fq={0}'.format(request_qry['fq']) if '{!expand}' in query: query = query.replace('{!expand}', '') if '{!tag' in query: tag = re.findall('{!(.*?)}', query) if tag: tag_str = "{!" + tag[0] + "}" query = query.replace(tag_str, '') query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['fq']: query += '&fq={0}'.format(qry).replace("\\", "").replace( ' ', '%20') if 'q' in request_qry: try: query += '&q={0}'.format(request_qry['q'].replace("\\", "")) query = query.replace(' ', '%20') except UnicodeEncodeError: query += '&q={0}'.format( request_qry['q'].encode('utf-8').replace("\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['q']: query += '&q={0}'.format(qry).replace("\\", "").replace( ' ', '%20') if 'place' in request_qry: try: query += '&place={0}'.format(request_qry['place'].replace( "\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['place']: query += '&place={0}'.format(qry).replace("\\", "").replace( ' ', '%20') if 'place.op' in request_qry: query += '&place.op={0}'.format(request_qry['place.op']) query += '&rows={0}&start={1}' exported_cnt = 0. for i in range(0, num_results, chunk_size): url = query.replace('{0}', str(chunk_size)).replace('{1}', str(i)) res = requests.get(url, verify=verify_ssl, headers=headers) jobs = res.json()['response']['docs'] if out_format == 'CSV': export_to_csv(jobs, file_name, task_folder, fields) elif out_format == 'XML': export_to_xml(jobs, file_name, task_folder) elif out_format == 'SHP': export_to_shp(jobs, file_name, task_folder) exported_cnt += chunk_size if exported_cnt > num_results: status_writer.send_percent(100, 'exported: 100%', 'export_results') else: percent_done = exported_cnt / num_results status_writer.send_percent( percent_done, '{0}: {1:.0f}%'.format("exported", percent_done * 100), 'export_results') else: # Voyager Search Portal/Cart UI ids = [] for p in request['params']: if 'ids' in p: ids = p['ids'] break groups = task_utils.grouper(list(ids), chunk_size, '') i = 0 for group in groups: i += len([v for v in group if not v == '']) results = requests.get(query + '&ids={0}'.format(','.join(group)), verify=verify_ssl, headers=headers) jobs = eval(results.text)['response']['docs'] if out_format == 'CSV': export_to_csv(jobs, file_name, task_folder, fields) elif out_format == 'XML': export_to_xml(jobs, file_name, task_folder) elif out_format == 'SHP': export_to_shp(jobs, file_name, task_folder) percent_done = float(i) / num_results status_writer.send_percent( percent_done, '{0}: {1:.0f}%'.format("exported", percent_done * 100), 'export_results') # Zip up outputs. if exported_count == 0: status_writer.send_state(status.STAT_FAILED) task_utils.report(os.path.join(request['folder'], '__report.json'), exported_count, 0, errors_count, errors_reasons) else: task_utils.report(os.path.join(request['folder'], '__report.json'), exported_count, 0, errors_count, errors_reasons) zip_file = task_utils.zip_data(task_folder, '{0}.zip'.format(file_name)) shutil.move( zip_file, os.path.join(os.path.dirname(task_folder), os.path.basename(zip_file)))
def execute(request): """Copies files to a target folder. :param request: json as a dict. """ created = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } if not os.path.exists(request['folder']): os.makedirs(request['folder']) # meta_folder = task_utils.get_parameter_value(parameters, 'meta_data_folder', 'value') voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] url = "{0}/api/rest/system/settings".format(voyager_server) response = requests.get(url, verify=verify_ssl, headers=headers) meta_folder = response.json()['folders']['meta'] result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'create_layer_files') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) docs = results.json()['response']['docs'] # docs = eval(results.read().replace('false', 'False').replace('true', 'True'))['response']['docs'] if not docs: docs = parameters[response_index]['response']['docs'] input_items = [] for doc in docs: if 'path' in doc: input_items.append( (doc['id'], doc['path'], doc['name'], doc['location'])) result = create_layer_file(input_items, meta_folder, voyager_server, headers) created += result[0] errors += result[1] skipped += result[2] try: shutil.copy2( os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', '_thumb.png'), request['folder']) except IOError: pass # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), created, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Zips all input files to output.zip. :param request: json as a dict. """ zipped = 0 skipped = 0 parameters = request['params'] flatten_results = task_utils.get_parameter_value(parameters, 'flatten_results', 'value') if not flatten_results: flatten_results = False zip_file_location = request['folder'] if not os.path.exists(zip_file_location): os.makedirs(request['folder']) output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'output' zip_file = os.path.join(zip_file_location, '{0}.zip'.format(output_file_name)) zipper = task_utils.ZipFileManager(zip_file, 'w', zipfile.ZIP_DEFLATED) num_results, response_index = task_utils.get_result_count(parameters) headers = {'x-access-token': task_utils.get_security_token(request['owner'])} # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) #query = '{0}{1}{2}'.format("http://localhost:8888/solr/v0", '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'zip_files') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs'], list_components=True) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs'], list_components=True) result = zip_files(zipper, input_items, zip_file_location, flatten_results) zipped += result[0] skipped += result[1] status_writer.send_percent(i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'zip_files') zipper.close() if zipped == 0: status_writer.send_state(status.STAT_FAILED, _('No results were zipped.')) return # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), zipped, skipped, skipped_details=skipped_reasons)
def execute(request): """Builds raster pyramids for input raster datasets. :param request: json as a dict. """ processed = 0 skipped = 0 parameters = request['params'] # Get the extent for for which to use to calculate statistics. extent = '' try: try: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') if ext: sr = task_utils.get_spatial_reference("4326") extent = task_utils.from_wkt(ext, sr) except KeyError: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'feature') if ext: extent = arcpy.Describe(ext).extent except KeyError: pass horizontal_skip_factor = task_utils.get_parameter_value( parameters, 'horizontal_skip_factor', 'value') vertical_skip_factor = task_utils.get_parameter_value( parameters, 'vertical_skip_factor', 'value') ignore_pixel_values = task_utils.get_parameter_value( parameters, 'ignore_pixel_values', 'value') # Create the task folder to hold report files. task_folder = request['folder'] if not os.path.exists(task_folder): os.makedirs(task_folder) headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } num_results, response_index = task_utils.get_result_count(parameters) if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper( list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'calculate_raster_statistics') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format( task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get( query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format( task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) input_items = task_utils.get_input_items( results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) result = calculate_raster_statistics(input_items, extent, horizontal_skip_factor, vertical_skip_factor, ignore_pixel_values) processed += result[0] skipped += result[1] status_writer.send_percent( i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'calculate_raster_statistics') else: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) processed, skipped = calculate_raster_statistics( input_items, extent, horizontal_skip_factor, vertical_skip_factor, ignore_pixel_values, True) # Update state if necessary. if skipped > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), processed, skipped, skipped_details=skipped_reasons)
def execute(request): """Copies files to a target folder. :param request: json as a dict. """ created = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] if not os.path.exists(request['folder']): os.makedirs(request['folder']) meta_folder = task_utils.get_parameter_value(parameters, 'meta_data_folder', 'value') result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'create_layer_files') i = 0. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] # docs = eval(results.read().replace('false', 'False').replace('true', 'True'))['response']['docs'] if not docs: docs = parameters[response_index]['response']['docs'] input_items = [] for doc in docs: if 'path' in doc: input_items.append((doc['id'], doc['path'], doc['name'], doc['location'])) result = create_layer_file(input_items, meta_folder) created += result[0] errors += result[1] skipped += result[2] try: shutil.copy2(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', '_thumb.png'), request['folder']) except IOError: pass # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), created, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Clips selected search results using the clip geometry. :param request: json as a dict. """ clipped = 0 errors = 0 skipped = 0 global result_count parameters = request['params'] # Retrieve the clip features. clip_features = task_utils.get_parameter_value(parameters, 'clip_features', 'value') # Retrieve the coordinate system code. out_coordinate_system = int(task_utils.get_parameter_value(parameters, 'output_projection', 'code')) # Retrieve the output format, create mxd and output file name parameter values. out_format = task_utils.get_parameter_value(parameters, 'output_format', 'value') create_mxd = task_utils.get_parameter_value(parameters, 'create_mxd', 'value') output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'clip_results' # Create the temporary workspace if clip_feature_class: out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Set the output coordinate system. if not out_coordinate_system == 0: # Same as Input out_sr = task_utils.get_spatial_reference(out_coordinate_system) arcpy.env.outputCoordinateSystem = out_sr # Set the output workspace. status_writer.send_status(_('Setting the output workspace...')) if not out_format == 'SHP': out_workspace = arcpy.CreateFileGDB_management(out_workspace, 'output.gdb').getOutput(0) arcpy.env.workspace = out_workspace # Query the index for results in groups of 25. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl # Get the Clip features by id. id = clip_features['id'] clip_query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', "&fl=id,path,fullpath:[absolute],absolute_path:[absolute],[lyrFile],[geo]&q=id:{0}".format(id)) clip_result = requests.get(clip_query, headers=headers) clipper = clip_result.json()['response']['docs'][0] if 'absolute_path' in clipper and not clipper['absolute_path'].startswith('s3'): clip_features = clipper['absolute_path'] elif '[lyrFile]' in clipper: clip_features = clipper['[lyrFile]'] elif '[geo]' in clipper: clip_features = arcpy.AsShape(clipper['[geo]']).projectAs(arcpy.SpatialReference(4326)) elif 'absolute_path' in clipper and clipper['absolute_path'].startswith('s3'): base_name = os.path.basename(clipper['path']) temp_folder = tempfile.mkdtemp() if '[downloadURL]' in clipper: download = os.path.join(temp_folder, os.path.basename(clipper['[downloadURL]'])) response = requests.get(clipper['[downloadURL]']) with open(download, 'wb') as fp: fp.write(response.content) if download.endswith('.zip'): zip = zipfile.ZipFile(download) zip.extractall(temp_folder) clip_features = os.path.join(temp_folder, base_name) else: clip_features = download else: bbox = clipper['bbox'].split() extent = arcpy.Extent(*bbox) pt_array = arcpy.Array([extent.lowerLeft, extent.upperLeft, extent.upperRight, extent.lowerRight]) clip_features = arcpy.Polygon(pt_array, 4326) query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'clip_data') for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) input_rows = collections.defaultdict(list) for doc in docs: if 'path' not in doc: input_rows[doc['name']].append(doc) if input_rows: result = clip_data(input_rows, out_workspace, clip_features, out_format) clipped += result[0] errors += result[1] skipped += result[2] if input_items: result = clip_data(input_items, out_workspace, clip_features, out_format) clipped += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _('No items to process. Check if items exist.')) return if arcpy.env.workspace.endswith('.gdb'): out_workspace = os.path.dirname(arcpy.env.workspace) if clipped > 0: try: if out_format == 'MPK': mxd_template = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'MapTemplate.mxd') mxd = task_utils.create_mxd(out_workspace, mxd_template, 'output') status_writer.send_status(_("Packaging results...")) task_utils.create_mpk(out_workspace, mxd, files_to_package) shutil.move(os.path.join(out_workspace, 'output.mpk'), os.path.join(os.path.dirname(out_workspace), '{0}.mpk'.format(output_file_name))) elif out_format == 'LPK': status_writer.send_status(_("Packaging results...")) task_utils.create_lpk(out_workspace, output_file_name, files_to_package) elif out_format == 'KML': task_utils.convert_to_kml(os.path.join(out_workspace, "output.gdb")) arcpy.env.workspace = '' arcpy.RefreshCatalog(os.path.join(out_workspace, "output.gdb")) try: arcpy.Delete_management(os.path.join(out_workspace, "output.gdb")) except arcpy.ExecuteError: pass zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) else: if create_mxd: mxd_template = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'MapTemplate.mxd') task_utils.create_mxd(out_workspace, mxd_template, 'output') zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) except arcpy.ExecuteError as ee: status_writer.send_state(status.STAT_FAILED, _(ee)) sys.exit(1) else: status_writer.send_state(status.STAT_FAILED, _('No output created. Zero inputs were clipped.')) # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(errors + skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), clipped, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Exports search results a CSV, shapefile or XML document. :param request: json as a dict. """ chunk_size = task_utils.CHUNK_SIZE file_name = task_utils.get_parameter_value(request['params'], 'file_name', 'value') fields = task_utils.get_parameter_value(request['params'], 'fields', 'value') out_format = task_utils.get_parameter_value(request['params'], 'output_format', 'value') if not 'path' in fields and 'path:[absolute]' in fields: fields.append('path') if 'geo' in fields: i_geo = fields.index('geo') fields.remove('geo') fields.insert(i_geo, '[geo]') # Create the temporary workspace. task_folder = os.path.join(request['folder'], 'temp') if not os.path.exists(task_folder): os.makedirs(task_folder) headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } num_results, response_index = task_utils.get_result_count( request['params']) query = '{0}/select?&wt=json&fl={1}'.format(sys.argv[2].split('=')[1], ','.join(fields)) if 'query' in request['params'][response_index]: # Voyager Search Traditional UI for p in request['params']: if 'query' in p: request_qry = p['query'] break if 'voyager.list' in request_qry: query += '&voyager.list={0}'.format(request_qry['voyager.list']) # Replace spaces with %20 & remove \\ to avoid HTTP Error 400. if 'fq' in request_qry: try: query += '&fq={0}'.format(request_qry['fq'].replace("\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['fq']: query += '&fq={0}'.format(qry).replace("\\", "").replace( ' ', '%20') if 'q' in request_qry: try: query += '&q={0}'.format(request_qry['q'].replace("\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['q']: query += '&q={0}'.format(qry).replace("\\", "").replace( ' ', '%20') if 'place' in request_qry: try: query += '&place={0}'.format(request_qry['place'].replace( "\\", "")) query = query.replace(' ', '%20') except AttributeError: for qry in request_qry['place']: query += '&place={0}'.format(qry).replace("\\", "").replace( ' ', '%20') if 'place.op' in request_qry: query += '&place.op={0}'.format(request_qry['place.op']) query += '&rows={0}&start={1}' exported_cnt = 0. for i in xrange(0, num_results, chunk_size): req = urllib2.Request(query.replace('{0}', str(chunk_size)).replace( '{1}', str(i)), headers=headers) for n in urllib2.urlopen(req): jobs = eval(n.replace('null', '"null"'))['response']['docs'] if out_format == 'CSV': export_to_csv(jobs, file_name, task_folder, fields) elif out_format == 'XML': export_to_xml(jobs, file_name, task_folder) elif out_format == 'SHP': export_to_shp(jobs, file_name, task_folder) exported_cnt += chunk_size if exported_cnt > num_results: status_writer.send_percent(100, 'exported: 100%', 'export_results') else: percent_done = exported_cnt / num_results status_writer.send_percent( percent_done, '{0}: {1:.0f}%'.format("exported", percent_done * 100), 'export_results') else: # Voyager Search Portal/Cart UI ids = [] for p in request['params']: if 'ids' in p: ids = p['ids'] break groups = task_utils.grouper(list(ids), chunk_size, '') i = 0 for group in groups: i += len([v for v in group if not v == '']) req = urllib2.Request(query + '&ids={0}'.format(','.join(group)), headers=headers) results = urllib2.urlopen(req) jobs = eval(results.read())['response']['docs'] if out_format == 'CSV': export_to_csv(jobs, file_name, task_folder, fields) elif out_format == 'XML': export_to_xml(jobs, file_name, task_folder) elif out_format == 'SHP': export_to_shp(jobs, file_name, task_folder) percent_done = float(i) / num_results status_writer.send_percent( percent_done, '{0}: {1:.0f}%'.format("exported", percent_done * 100), 'export_results') # Zip up outputs. if exported_count == 0: task_utils.report(os.path.join(request['folder'], '__report.json'), exported_count, 0, errors_count, errors_reasons) else: task_utils.report(os.path.join(request['folder'], '__report.json'), exported_count, 0, errors_count, errors_reasons) zip_file = task_utils.zip_data(task_folder, 'output.zip') shutil.move( zip_file, os.path.join(os.path.dirname(task_folder), os.path.basename(zip_file)))
def execute(request): """Copies data to an existing geodatabase or feature dataset. :param request: json as a dict. """ added = 0 errors = 0 skipped = 0 global result_count parameters = request["params"] # Get the target workspace location. out_gdb = task_utils.get_parameter_value(parameters, "target_workspace", "value") # Retrieve the coordinate system code. out_coordinate_system = task_utils.get_parameter_value(parameters, "output_projection", "code") if not out_coordinate_system == "0": # Same as Input arcpy.env.outputCoordinateSystem = task_utils.get_spatial_reference(out_coordinate_system) task_folder = request["folder"] if not os.path.exists(task_folder): os.makedirs(task_folder) # Check if the geodatabase exists or if it is a feature dataset. is_fds = False if not os.path.exists(out_gdb): if out_gdb.endswith(".gdb"): arcpy.CreateFileGDB_management(os.path.dirname(out_gdb), os.path.basename(out_gdb)) status_writer.send_status(_("Created output workspace: {0}").format(out_gdb)) elif out_gdb.endswith(".mdb"): arcpy.CreatePersonalGDB_management(os.path.dirname(out_gdb), os.path.basename(out_gdb)) status_writer.send_status(_("Created output workspace: {0}").format(out_gdb)) elif out_gdb.endswith(".sde"): status_writer.send_state(status.STAT_FAILED, _("{0} does not exist").format(out_gdb)) return else: # Possible feature dataset. is_fds = is_feature_dataset(out_gdb) if not is_fds: if os.path.dirname(out_gdb).endswith(".gdb"): if not os.path.exists(os.path.dirname(out_gdb)): arcpy.CreateFileGDB_management( os.path.dirname(os.path.dirname(out_gdb)), os.path.basename(os.path.dirname(out_gdb)) ) arcpy.CreateFeatureDataset_management(os.path.dirname(out_gdb), os.path.basename(out_gdb)) elif os.path.dirname(out_gdb).endswith(".mdb"): if not os.path.exists(os.path.dirname(out_gdb)): arcpy.CreatePersonalGDB_management( os.path.dirname(os.path.dirname(out_gdb)), os.path.basename(os.path.dirname(out_gdb)) ) arcpy.CreateFeatureDataset_management(os.path.dirname(out_gdb), os.path.basename(out_gdb)) status_writer.send_status(_("Setting the output workspace...")) arcpy.env.workspace = out_gdb headers = {"x-access-token": task_utils.get_security_token(request["owner"])} result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = "{0}{1}{2}".format(sys.argv[2].split("=")[1], "/select?&wt=json", fl) # query = '{0}{1}{2}'.format("http://localhost:8888/solr/v0", '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, "") query += fq elif "ids" in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]["ids"]), task_utils.CHUNK_SIZE, "") else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, "") status_writer.send_percent(0.0, _("Starting to process..."), "add_to_geodatabase") for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers ) elif "ids" in parameters[response_index]: results = requests.get(query + "{0}&ids={1}".format(fl, ",".join(group)), headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers ) docs = results.json()["response"]["docs"] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]["response"]["docs"]) input_rows = collections.defaultdict(list) for doc in docs: if "path" not in doc: input_rows[doc["title"]].append(doc) if input_rows: result = add_to_geodatabase(input_rows, out_gdb, is_fds) added += result[0] errors += result[1] skipped += result[2] if input_items: result = add_to_geodatabase(input_items, out_gdb, is_fds) added += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _("No items to process. Check if items exist.")) return # Update state if necessary. if skipped > 0 or errors > 0: status_writer.send_state(status.STAT_WARNING, _("{0} results could not be processed").format(skipped + errors)) task_utils.report( os.path.join(task_folder, "__report.json"), added, skipped, errors, errors_reasons, skipped_reasons )
def execute(request): """Converts each input dataset to kml (.kmz). :param request: json as a dict. """ converted = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Get the boundary box extent for input to KML tools. extent = '' try: try: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') if ext: sr = task_utils.get_spatial_reference("4326") extent = task_utils.from_wkt(ext, sr) except KeyError: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'feature') if ext: extent = arcpy.Describe(ext).extent except KeyError: pass # Get the output file name. output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'kml_results' result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'convert_to_kml') headers = {'x-access-token': task_utils.get_security_token(request['owner'])} for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) input_rows = collections.defaultdict(list) for doc in docs: if 'path' not in doc: input_rows[doc['name']].append(doc) if input_rows: result = convert_to_kml(input_rows, out_workspace, extent) converted += result[0] errors += result[1] skipped += result[2] if input_items: result = convert_to_kml(input_items, out_workspace, extent) converted += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _('No items to process. Check if items exist.')) return # Zip up kmz files if more than one. if converted > 1: status_writer.send_status("Converted: {}".format(converted)) zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) shutil.copy2(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', '_thumb.png'), request['folder']) elif converted == 1: try: kml_file = glob.glob(os.path.join(out_workspace, '*.kmz'))[0] tmp_lyr = arcpy.KMLToLayer_conversion(kml_file, out_workspace, 'kml_layer') task_utils.make_thumbnail(tmp_lyr.getOutput(0), os.path.join(request['folder'], '_thumb.png')) except arcpy.ExecuteError: pass shutil.move(kml_file, os.path.join(request['folder'], os.path.basename(kml_file))) # Update state if necessary. if skipped > 0 or errors > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(errors + skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), converted, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Builds raster pyramids for input raster datasets. :param request: json as a dict. """ processed = 0 skipped = 0 parameters = request['params'] resampling_method = task_utils.get_parameter_value(parameters, 'resampling_method', 'value') # Advanced options compression_method = task_utils.get_parameter_value(parameters, 'compression_method', 'value') compression_quality = task_utils.get_parameter_value(parameters, 'compression_quality', 'value') # Create the task folder to hold report files. task_folder = request['folder'] if not os.path.exists(task_folder): os.makedirs(task_folder) headers = {'x-access-token': task_utils.get_security_token(request['owner'])} num_results, response_index = task_utils.get_result_count(parameters) if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'build_raster_pyramids') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = build_pyramids(input_items, compression_method, compression_quality, resampling_method) processed += result[0] skipped += result[1] status_writer.send_percent(i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'build_raster_pyramids') else: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) processed, skipped = build_pyramids(input_items, compression_method, compression_quality, resampling_method, True) # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), processed, skipped, skipped_details=skipped_reasons)
def execute(request): """Report duplicate items. :param request: json as a dict. """ if not os.path.exists(request['folder']): os.makedirs(request['folder']) date_str = str(datetime.date.today()) report_file = os.path.join(request['folder'], 'Duplicates_{0}.csv'.format(date_str)) request_owner = request['owner'] headers = {'x-access-token': task_utils.get_security_token(request_owner)} fields = list(task_utils.get_parameter_value(request['params'], 'fields', 'value')) required_fields = ("name", "title", "bytes", "id", "format", "absolute_path") [fields.remove(f) for f in required_fields if f in fields] fields_str = "" if fields: fields_str = ",".join(fields) voyager_instance = sys.argv[2].split('=')[1] query = voyager_instance + "/select?q={!func}joindf(md5,md5)&f.md5.facet.mincount=2&f.contentHash.facet.mincount=2&f.schemaHash.facet.mincount=2&sort=md5 desc&start=0&rows=1&fl=id,title,name:[name],format,fullpath:[absolute],absolute_path:[absolute],download:[downloadURL],format_type,bytes,layerURL:[lyrURL],md5,path,name&fq={!frange l=2}{!func}joindf(md5,md5)&wt=json" results = requests.get(query, auth=("admin", "admin"), headers=headers) result_count = results.json()['response']['numFound'] if result_count == 0: status_writer.send_state(status.STAT_WARNING, "No duplicates found.") return duplicates = collections.defaultdict(list) groups = grouper(range(0, result_count), 25, '') url = voyager_instance.split("/solr")[0] req = requests.get("{0}/api/rest/i18n/field/format".format(url), headers=headers) formats = req.json()['VALUE']['format'] for group in groups: query = "%s/select?q={!func}joindf(md5,md5)&f.md5.facet.mincount=2&f.contentHash.facet.mincount=2&f.schemaHash.facet.mincount=2&sort=md5 desc&start=%s&rows=25&fl=id,title,name:[name],format,fullpath:[absolute],absolute_path:[absolute],download:[downloadURL],format_type,bytes,layerURL:[lyrURL],md5,path,name,%s&fq={!frange l=2}{!func}joindf(md5,md5)&wt=json" % (voyager_instance, group[0], fields_str) results = requests.get(query, auth=("admin", "admin"), headers=headers) docs = results.json()['response']['docs'] for doc in docs: file_size = 0 if "bytes" in doc: file_size = float(doc['bytes']) format_type = "" if "format" in doc and not doc["format"] in ("application/vnd.esri.gdb.file.data", "application/vnd.esri.gdb.personal.data"): try: format_type = formats[doc["format"]] except KeyError: format_type = doc["format"] else: continue file_path = "" if "absolute_path" in doc: file_path = doc["absolute_path"] id = "" if "id" in doc: id = doc["id"] name = "" if "name" in doc: name = doc["name"] elif "title" in doc: name = doc["title"] field_dict = {"FILE NAME": name, "FILE SIZE": file_size, "FORMAT": format_type, "ID": id, "FILE PATH": file_path} extra_fields = {} if fields: for fld in fields: if fld in doc: extra_fields[fld.upper()] = doc[fld] field_dict.update(extra_fields) duplicates[doc['md5']].append(field_dict) # Find total number of items in the index. all_query = "%s/select?disp=default&sort=score desc&place.op=within&start=0&fl=id&voyager.config.id=ace4bb77&wt=json" % (voyager_instance) results = requests.get(all_query, auth=("admin", "admin"), headers=headers) index_count = results.json()['response']['numFound'] duplicate_count = 0 total_file_size = 0 # Write all the duplicates to the report file. with open(report_file, "wb") as f: if extra_fields: keys = ["MD5", "FILE NAME", "FILE SIZE", "FORMAT", "ID", "FILE PATH"] + extra_fields.keys() else: keys = ["MD5", "FILE NAME", "FILE SIZE", "FORMAT", "ID", "FILE PATH"] writer = csv.DictWriter(f, fieldnames=keys) writer.writeheader() for md5, values in duplicates.items(): writer.writerow({}) val_count = len(values) if val_count > 1: duplicate_count += val_count - 1 for val in values: total_file_size += val["FILE SIZE"] * (val_count - 1) val["MD5"] = md5 val["FILE SIZE"] = convert_size(val["FILE SIZE"]) writer.writerow(val) # Report a summary to the report file. pct_dups = float(duplicate_count) / index_count with open(report_file, "ab") as f: writer = csv.DictWriter(f, fieldnames=["DUPLICATE COUNT", "INDEX COUNT", "PERCENT DUPLICATES", "TOTAL DUPLICATE FILE SIZE"]) writer.writerow({}) writer.writerow({}) writer.writeheader() writer.writerow({"DUPLICATE COUNT": duplicate_count, "INDEX COUNT": index_count, "PERCENT DUPLICATES": '{:.0f}%'.format(pct_dups * 100), "TOTAL DUPLICATE FILE SIZE": convert_size(total_file_size)}) status_writer.send_status("DUPLICATE COUNT: {0}".format(duplicate_count)) status_writer.send_status("INDEX COUNT: {0}".format(index_count)) status_writer.send_status("PERCENT DUPLICATES: {0}".format('{:.0f}%'.format(pct_dups * 100))) status_writer.send_status("TOTAL DUPLICATE FILE SIZE: {0}".format(convert_size(total_file_size))) status_writer.send_state(status.STAT_SUCCESS)
def delete_items(fq_query, q_query, thumbs, metadata, layers, owner): """Delete items from the index using the Voyager API.""" try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] # voyager_server = "http://localhost:8888" if not q_query and fq_query: query = fq_query fq = "" else: query = q_query fq = "&fq={0}".format(fq_query) url = "{0}/api/rest/index/records?query={1}{2}&items=true&thumbnails={3}&metadata={4}&layers={5}".format(voyager_server, query, fq, thumbs, metadata, layers) response = requests.delete(url, headers={'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner)}) if response.status_code == 200: return True, 'Deleted items: {0}'.format(response.json()) else: return False, 'Error deleting items: {0}: {1}'.format('delete_items', response.reason) except requests.HTTPError as http_error: return False, http_error except requests.exceptions.InvalidURL as url_error: return False, url_error except requests.RequestException as re: return False, re
def execute(request): """Replace the workspace path for layer files and map document layers. :param request: json as a dict. """ updated = 0 skipped = 0 parameters = request['params'] backup = task_utils.get_parameter_value(parameters, 'create_backup', 'value') old_data_source = task_utils.get_parameter_value(parameters, 'old_data_source', 'value').lower() new_data_source = task_utils.get_parameter_value(parameters, 'new_data_source', 'value') if not os.path.exists(request['folder']): os.makedirs(request['folder']) if not arcpy.Exists(new_data_source): status_writer.send_state(status.STAT_FAILED, _('{0} does not exist').format(new_data_source)) return if os.path.splitext(new_data_source)[1] not in ('.gdb', '.mdb', '.sde'): new_dataset = os.path.basename(new_data_source) dsc = arcpy.Describe(os.path.dirname(new_data_source)) else: dsc = arcpy.Describe(new_data_source) new_dataset = '' wks_type = 'NONE' if dsc.dataType == 'FeatureDataset': new_workspace = dsc.path wks_type = get_workspace_type(new_workspace) elif dsc.dataType == 'Workspace': new_workspace = dsc.catalogPath wks_type = get_workspace_type(new_workspace) elif dsc.dataType == 'Folder': dsc = arcpy.Describe(new_data_source) new_workspace = dsc.catalogPath if new_dataset.endswith('.shp'): wks_type = 'SHAPEFILE_WORKSPACE' new_dataset = new_dataset.rsplit('.shp')[0] else: if arcpy.Describe(new_data_source).dataType == 'RasterDataset': wks_type = 'RASTER_WORKSPACE' elif dsc.dataType == 'CadDrawingDataset': new_workspace = dsc.path wks_type = 'CAD_WORKSPACE' else: new_workspace = os.path.dirname(new_data_source) num_results, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}'.format(sys.argv[2].split('=')[1], '/select?&wt=json') fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'replace_data_source') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "{0}&rows={1}&start={2}".format(fl, task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers= headers) else: results = requests.get(query + "{0}&rows={1}&start={2}".format(fl, task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs'], True) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = replace_data_source(input_items, old_data_source, new_workspace, new_dataset, wks_type, backup, headers) updated += result[0] skipped += result[1] status_writer.send_percent(i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'replace_data_source') # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), updated, skipped, skipped_details=skipped_reasons)
def execute(request): """Copies files to a target folder. :param request: json as a dict. """ extracted = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] output_type = task_utils.get_parameter_value(parameters, 'output_format', 'value') task_folder = os.path.join(request['folder'], 'temp') if not os.path.exists(task_folder): os.makedirs(task_folder) if output_type == 'FGDB': arcpy.CreateFileGDB_management(task_folder, 'output.gdb') result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) # query = '{0}{1}{2}'.format("http://localhost:8888/solr/v0", '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') headers = {'x-access-token': task_utils.get_security_token(request['owner'])} status_writer.send_percent(0.0, _('Starting to process...'), 'locate_xt_arcgis_tool') for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] if not docs: docs = parameters[response_index]['response']['docs'] input_items = task_utils.get_input_items(docs) if input_items: result = extract(input_items, output_type, task_folder) extracted += result[0] errors += result[1] skipped += result[2] else: status_writer.send_state(status.STAT_FAILED, _('No items to process. Check if items exist.')) return # Zip up outputs. zip_file = task_utils.zip_data(task_folder, 'output.zip') shutil.move(zip_file, os.path.join(os.path.dirname(task_folder), os.path.basename(zip_file))) # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), extracted, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Builds raster pyramids for input raster datasets. :param request: json as a dict. """ processed = 0 skipped = 0 parameters = request['params'] resampling_method = task_utils.get_parameter_value(parameters, 'resampling_method', 'value') # Advanced options compression_method = task_utils.get_parameter_value(parameters, 'compression_method', 'value') compression_quality = task_utils.get_parameter_value(parameters, 'compression_quality', 'value') # Create the task folder to hold report files. task_folder = request['folder'] if not os.path.exists(task_folder): os.makedirs(task_folder) headers = {'x-access-token': task_utils.get_security_token(request['owner'])} num_results, response_index = task_utils.get_result_count(parameters) if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'build_raster_pyramids') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = build_pyramids(input_items, compression_method, compression_quality, resampling_method) processed += result[0] skipped += result[1] status_writer.send_percent(i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'build_raster_pyramids') else: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) processed, skipped = build_pyramids(input_items, compression_method, compression_quality, resampling_method, True) # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), processed, skipped, skipped_details=skipped_reasons)
def execute(request): """Builds raster pyramids for input raster datasets. :param request: json as a dict. """ processed = 0 skipped = 0 parameters = request['params'] # Get the extent for for which to use to calculate statistics. extent = '' try: try: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') if ext: sr = task_utils.get_spatial_reference("4326") extent = task_utils.from_wkt(ext, sr) except KeyError: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'feature') if ext: extent = arcpy.Describe(ext).extent except KeyError: pass horizontal_skip_factor = task_utils.get_parameter_value(parameters, 'horizontal_skip_factor', 'value') vertical_skip_factor = task_utils.get_parameter_value(parameters, 'vertical_skip_factor', 'value') ignore_pixel_values = task_utils.get_parameter_value(parameters, 'ignore_pixel_values', 'value') # Create the task folder to hold report files. task_folder = request['folder'] if not os.path.exists(task_folder): os.makedirs(task_folder) headers = {'x-access-token': task_utils.get_security_token(request['owner'])} num_results, response_index = task_utils.get_result_count(parameters) if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'calculate_raster_statistics') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) # input_items = task_utils.get_input_items(eval(results.read().replace('false', 'False').replace('true', 'True'))['response']['docs']) input_items = task_utils.get_input_items(results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) result = calculate_raster_statistics(input_items, extent, horizontal_skip_factor, vertical_skip_factor, ignore_pixel_values) processed += result[0] skipped += result[1] status_writer.send_percent(i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'calculate_raster_statistics') else: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) processed, skipped = calculate_raster_statistics(input_items, extent, horizontal_skip_factor, vertical_skip_factor, ignore_pixel_values, True) # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), processed, skipped, skipped_details=skipped_reasons)
def execute(request): """Adds a field and calculates it to some value. :param request: json as a dict. """ created = 0 skipped = 0 errors = 0 warnings = 0 global result_count parameters = request['params'] if not os.path.exists(request['folder']): os.makedirs(request['folder']) # Parameter values field_name = task_utils.get_parameter_value(parameters, 'field_name', 'value') field_type = task_utils.get_parameter_value(parameters, 'field_type', 'value') field_value = task_utils.get_parameter_value(parameters, 'field_value', 'value') # Query the index for results in groups of 25. headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl + ',links' query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'add_field') for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) docs = results.json()['response']['docs'] input_items = [] for doc in docs: if 'path' in doc: if 'links' in doc: links = eval(doc['links']) input_items.append((doc['id'], doc['absolute_path'], links['links'][0]['link'][0]['id'])) else: input_items.append((doc['id'], doc['absolute_path'])) result = add_field(input_items, field_name, field_type, field_value) created += result[0] errors += result[1] skipped += result[2] warnings += result[3] # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), created, skipped, errors, errors_reasons, skipped_reasons, warnings, warnings_reasons)
def create_snapshot(snapshot_name, owner, query=''): """TODO .""" try: voyager_server = sys.argv[2].split('=')[1].split('solr')[0][:-1] if query: url = "{0}/api/rest/snapshot/export?name={1}&query={2}&format=CORE".format(voyager_server, snapshot_name, query) else: url = "{0}/api/rest/snapshot/export?name={1}&format=CORE".format(voyager_server, snapshot_name) response = requests.post(url, headers={'Content-type': 'application/json', 'x-access-token': task_utils.get_security_token(owner)}) if response.status_code == 200: return True, 'Created snapshot: {0}'.format(response.json()['target']['file']) else: return False, 'Error creating snapshot: {0}: {1}'.format(snapshot_name, 'Error {0}: {1}'.format(response.status_code, response.reason)) except requests.HTTPError as http_error: return False, http_error except requests.exceptions.InvalidURL as url_error: return False, url_error except requests.RequestException as re: return False, re
def execute(request): """Package inputs to an Esri map or layer package. :param request: json as a dict. """ errors = 0 skipped = 0 layers = [] files = [] app_folder = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parameters = request['params'] out_format = task_utils.get_parameter_value(parameters, 'output_format', 'value') summary = task_utils.get_parameter_value(parameters, 'summary') tags = task_utils.get_parameter_value(parameters, 'tags') output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name') if not output_file_name: output_file_name = 'package_results' # Get the clip region as an extent object. clip_area = None try: clip_area_wkt = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') clip_area = task_utils.get_clip_region(clip_area_wkt) except (KeyError, ValueError): pass out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) num_results, response_index = task_utils.get_result_count(parameters) # if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } status_writer.send_status(_('Starting to process...')) for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) input_items = task_utils.get_input_items( results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) layers, files, errors, skipped = get_items(input_items, out_workspace) # else: # input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) # layers, files, errors, skipped = get_items(input_items, out_workspace) if errors == num_results: status_writer.send_state(status.STAT_FAILED, _('No results to package')) return try: if out_format == 'MPK': shutil.copyfile( os.path.join(app_folder, 'supportfiles', 'MapTemplate.mxd'), os.path.join(out_workspace, 'output.mxd')) mxd = arcpy.mapping.MapDocument( os.path.join(out_workspace, 'output.mxd')) if mxd.description == '': mxd.description = os.path.basename(mxd.filePath) df = arcpy.mapping.ListDataFrames(mxd)[0] for layer in layers: arcpy.mapping.AddLayer(df, layer) mxd.save() status_writer.send_status( _('Generating {0}. Large input {1} will take longer to process.' .format('MPK', 'results'))) if arcpy.GetInstallInfo()['Version'] == '10.0': arcpy.PackageMap_management( mxd.filePath, os.path.join(os.path.dirname(out_workspace), '{0}.mpk'.format(output_file_name)), 'PRESERVE', extent=clip_area) elif arcpy.GetInstallInfo()['Version'] == '10.1': arcpy.PackageMap_management( mxd.filePath, os.path.join(os.path.dirname(out_workspace), '{0}.mpk'.format(output_file_name)), 'PRESERVE', extent=clip_area, ArcGISRuntime='RUNTIME', version='10', additional_files=files, summary=summary, tags=tags) else: arcpy.PackageMap_management( mxd.filePath, os.path.join(os.path.dirname(out_workspace), '{0}.mpk'.format(output_file_name)), 'PRESERVE', extent=clip_area, arcgisruntime='RUNTIME', version='10', additional_files=files, summary=summary, tags=tags) # Create a thumbnail size PNG of the mxd. task_utils.make_thumbnail( mxd, os.path.join(request['folder'], '_thumb.png')) else: status_writer.send_status( _('Generating {0}. Large input {1} will take longer to process.' .format('LPK', 'results'))) for layer in layers: if layer.description == '': layer.description = layer.name if arcpy.GetInstallInfo()['Version'] == '10.0': arcpy.PackageLayer_management( layers, os.path.join(os.path.dirname(out_workspace), '{0}.lpk'.format(output_file_name)), 'PRESERVE', extent=clip_area, version='10') else: arcpy.PackageLayer_management( layers, os.path.join(os.path.dirname(out_workspace), '{0}.lpk'.format(output_file_name)), 'PRESERVE', extent=clip_area, version='10', additional_files=files, summary=summary, tags=tags) # Create a thumbnail size PNG of the mxd. task_utils.make_thumbnail( layers[0], os.path.join(request['folder'], '_thumb.png')) except (RuntimeError, ValueError, arcpy.ExecuteError) as ex: status_writer.send_state(status.STAT_FAILED, repr(ex)) return # Update state if necessary. if errors > 0 or skipped: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(errors + skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), num_results - (skipped + errors), skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Mosaics input raster datasets into a new raster dataset or mosaic dataset. :param request: json as a dict. """ status_writer = status.Writer() parameters = request['params'] target_workspace = task_utils.get_parameter_value(parameters, 'target_workspace', 'value') output_name = task_utils.get_parameter_value(parameters, 'output_dataset_name', 'value') out_coordinate_system = task_utils.get_parameter_value(parameters, 'output_projection', 'code') # Advanced options output_raster_format = task_utils.get_parameter_value(parameters, 'raster_format', 'value') compression_method = task_utils.get_parameter_value(parameters, 'compression_method', 'value') compression_quality = task_utils.get_parameter_value(parameters, 'compression_quality', 'value') arcpy.env.compression = '{0} {1}'.format(compression_method, compression_quality) if output_raster_format in ('FileGDB', 'MosaicDataset'): if not os.path.splitext(target_workspace)[1] in ('.gdb', '.mdb', '.sde'): status_writer.send_state(status.STAT_FAILED, _('Target workspace must be a geodatabase')) return task_folder = request['folder'] if not os.path.exists(task_folder): os.makedirs(task_folder) clip_area = None if not output_raster_format == 'MosaicDataset': # Get the clip region as an extent object. try: clip_area_wkt = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') if not clip_area_wkt: clip_area_wkt = 'POLYGON ((-180 -90, -180 90, 180 90, 180 -90, -180 -90))' if not out_coordinate_system == '0': clip_area = task_utils.get_clip_region(clip_area_wkt, out_coordinate_system) else: clip_area = task_utils.get_clip_region(clip_area_wkt) except KeyError: pass status_writer.send_status(_('Setting the output workspace...')) if not os.path.exists(target_workspace): status_writer.send_state(status.STAT_FAILED, _('Target workspace does not exist')) return arcpy.env.workspace = target_workspace status_writer.send_status(_('Starting to process...')) num_results, response_index = task_utils.get_result_count(parameters) raster_items = None if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') headers = {'x-access-token': task_utils.get_security_token(request['owner'])} for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) raster_items, pixels, bands, skipped = get_items(input_items) else: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) raster_items, pixels, bands, skipped = get_items(input_items) if not raster_items: if skipped == 0: status_writer.send_state(status.STAT_FAILED, _('Invalid input types')) skipped_reasons['All Items'] = _('Invalid input types') task_utils.report(os.path.join(request['folder'], '__report.json'), len(raster_items), num_results, skipped_details=skipped_reasons) return else: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), len(raster_items), skipped, skipped_details=skipped_reasons) return # Get most common pixel type. pixel_type = pixel_types[max(set(pixels), key=pixels.count)] if output_raster_format in ('FileGDB', 'GRID', 'MosaicDataset'): output_name = arcpy.ValidateTableName(output_name, target_workspace) else: output_name = '{0}.{1}'.format(arcpy.ValidateTableName(output_name, target_workspace), output_raster_format.lower()) if arcpy.Exists(os.path.join(target_workspace, output_name)): status_writer.send_state(status.STAT_FAILED, _('Output dataset already exists.')) return if output_raster_format == 'MosaicDataset': try: status_writer.send_status(_('Generating {0}. Large input {1} will take longer to process.'.format('Mosaic', 'rasters'))) if out_coordinate_system == '0': out_coordinate_system = raster_items[0] else: out_coordinate_system = None mosaic_ds = arcpy.CreateMosaicDataset_management(target_workspace, output_name, out_coordinate_system, max(bands), pixel_type) arcpy.AddRastersToMosaicDataset_management(mosaic_ds, 'Raster Dataset', raster_items) arcpy.MakeMosaicLayer_management(mosaic_ds, 'mosaic_layer') layer_object = arcpy.mapping.Layer('mosaic_layer') task_utils.make_thumbnail(layer_object, os.path.join(request['folder'], '_thumb.png')) except arcpy.ExecuteError: skipped += 1 skipped_reasons['All Items'] = arcpy.GetMessages(2) else: try: if len(bands) > 1: status_writer.send_state(status.STAT_FAILED, _('Input rasters must have the same number of bands')) return if out_coordinate_system == '0': out_coordinate_system = None status_writer.send_status(_('Generating {0}. Large input {1} will take longer to process.'.format('Mosaic', 'rasters'))) if clip_area: ext = '{0} {1} {2} {3}'.format(clip_area.XMin, clip_area.YMin, clip_area.XMax, clip_area.YMax) tmp_mosaic = arcpy.MosaicToNewRaster_management( raster_items, target_workspace, 'tmpMosaic', out_coordinate_system, pixel_type, number_of_bands=bands.keys()[0] ) status_writer.send_status(_('Clipping...')) out_mosaic = arcpy.Clip_management(tmp_mosaic, ext, output_name) arcpy.Delete_management(tmp_mosaic) else: out_mosaic = arcpy.MosaicToNewRaster_management(raster_items, target_workspace, output_name, out_coordinate_system, pixel_type, number_of_bands=bands.keys()[0], mosaic_method='BLEND') arcpy.MakeRasterLayer_management(out_mosaic, 'mosaic_layer') layer_object = arcpy.mapping.Layer('mosaic_layer') task_utils.make_thumbnail(layer_object, os.path.join(request['folder'], '_thumb.png')) except arcpy.ExecuteError: skipped += 1 skipped_reasons['All Items'] = arcpy.GetMessages(2) # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), len(raster_items), skipped, skipped_details=skipped_reasons)
def execute(request): """Copies files to a target folder. :param request: json as a dict. """ extracted = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] output_type = task_utils.get_parameter_value(parameters, 'output_format', 'value') task_folder = os.path.join(request['folder'], 'temp') if not os.path.exists(task_folder): os.makedirs(task_folder) if output_type == 'FGDB': arcpy.CreateFileGDB_management(task_folder, 'output.gdb') result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) # query = '{0}{1}{2}'.format("http://localhost:8888/solr/v0", '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } status_writer.send_percent(0.0, _('Starting to process...'), 'locate_xt_arcgis_tool') for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] if not docs: docs = parameters[response_index]['response']['docs'] input_items = task_utils.get_input_items(docs) if input_items: result = extract(input_items, output_type, task_folder) extracted += result[0] errors += result[1] skipped += result[2] else: status_writer.send_state( status.STAT_FAILED, _('No items to process. Check if items exist.')) return # Zip up outputs. zip_file = task_utils.zip_data(task_folder, 'output.zip') shutil.move( zip_file, os.path.join(os.path.dirname(task_folder), os.path.basename(zip_file))) # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), extracted, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Converts each input dataset to kml (.kmz). :param request: json as a dict. """ converted = 0 skipped = 0 errors = 0 global result_count parameters = request["params"] out_workspace = os.path.join(request["folder"], "temp") if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Get the boundary box extent for input to KML tools. extent = "" try: try: ext = task_utils.get_parameter_value(parameters, "processing_extent", "wkt") if ext: sr = task_utils.get_spatial_reference("4326") extent = task_utils.from_wkt(ext, sr) except KeyError: ext = task_utils.get_parameter_value(parameters, "processing_extent", "feature") if ext: extent = arcpy.Describe(ext).extent except KeyError: pass # Get the output file name. output_file_name = task_utils.get_parameter_value(parameters, "output_file_name", "value") if not output_file_name: output_file_name = "kml_results" result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = "{0}{1}{2}".format(sys.argv[2].split("=")[1], "/select?&wt=json", fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, "") query += fq elif "ids" in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]["ids"]), task_utils.CHUNK_SIZE, "") else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, "") # Begin processing status_writer.send_percent(0.0, _("Starting to process..."), "convert_to_kml") headers = {"x-access-token": task_utils.get_security_token(request["owner"])} for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers ) elif "ids" in parameters[response_index]: results = requests.get(query + "{0}&ids={1}".format(fl, ",".join(group)), headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers ) docs = results.json()["response"]["docs"] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]["response"]["docs"]) input_rows = collections.defaultdict(list) for doc in docs: if "path" not in doc: input_rows[doc["name"]].append(doc) if input_rows: result = convert_to_kml(input_rows, out_workspace, extent) converted += result[0] errors += result[1] skipped += result[2] if input_items: result = convert_to_kml(input_items, out_workspace, extent) converted += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _("No items to process. Check if items exist.")) return # Zip up kmz files if more than one. if converted > 1: status_writer.send_status("Converted: {}".format(converted)) zip_file = task_utils.zip_data(out_workspace, "{0}.zip".format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) shutil.copy2( os.path.join(os.path.dirname(os.path.dirname(__file__)), "supportfiles", "_thumb.png"), request["folder"] ) elif converted == 1: try: kml_file = glob.glob(os.path.join(out_workspace, "*.kmz"))[0] tmp_lyr = arcpy.KMLToLayer_conversion(kml_file, out_workspace, "kml_layer") task_utils.make_thumbnail(tmp_lyr.getOutput(0), os.path.join(request["folder"], "_thumb.png")) except arcpy.ExecuteError: pass shutil.move(kml_file, os.path.join(request["folder"], os.path.basename(kml_file))) # Update state if necessary. if skipped > 0 or errors > 0: status_writer.send_state(status.STAT_WARNING, _("{0} results could not be processed").format(errors + skipped)) task_utils.report( os.path.join(request["folder"], "__report.json"), converted, skipped, errors, errors_reasons, skipped_reasons )
def execute(request): """Clips selected search results using the clip geometry. :param request: json as a dict. """ clipped = 0 errors = 0 skipped = 0 global result_count parameters = request['params'] # Retrieve the clip features. clip_features = task_utils.get_parameter_value(parameters, 'clip_features', 'value') # Retrieve the coordinate system code. out_coordinate_system = int(task_utils.get_parameter_value(parameters, 'output_projection', 'code')) # Retrieve the output format, create mxd and output file name parameter values. out_format = task_utils.get_parameter_value(parameters, 'output_format', 'value') create_mxd = task_utils.get_parameter_value(parameters, 'create_mxd', 'value') output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'clip_results' # Create the temporary workspace if clip_feature_class: out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Set the output coordinate system. if not out_coordinate_system == 0: # Same as Input out_sr = task_utils.get_spatial_reference(out_coordinate_system) arcpy.env.outputCoordinateSystem = out_sr # Set the output workspace. status_writer.send_status(_('Setting the output workspace...')) if not out_format == 'SHP': out_workspace = arcpy.CreateFileGDB_management(out_workspace, 'output.gdb').getOutput(0) arcpy.env.workspace = out_workspace # Query the index for results in groups of 25. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl # Get the Clip features by id. id = clip_features['id'] clip_query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', "&fl=id,path,fullpath:[absolute],absolute_path:[absolute],[lyrFile],[geo]&q=id:{0}".format(id)) clip_result = requests.get(clip_query, verify=verify_ssl, headers=headers) clipper = clip_result.json()['response']['docs'][0] if 'absolute_path' in clipper and not clipper['absolute_path'].startswith('s3'): clip_features = clipper['absolute_path'] elif '[lyrFile]' in clipper: clip_features = clipper['[lyrFile]'] elif '[geo]' in clipper: clip_features = arcpy.AsShape(clipper['[geo]']).projectAs(arcpy.SpatialReference(4326)) elif 'absolute_path' in clipper and clipper['absolute_path'].startswith('s3'): base_name = os.path.basename(clipper['path']) temp_folder = tempfile.mkdtemp() if '[downloadURL]' in clipper: download = os.path.join(temp_folder, os.path.basename(clipper['[downloadURL]'])) response = requests.get(clipper['[downloadURL]'], verify=verify_ssl) with open(download, 'wb') as fp: fp.write(response.content) if download.endswith('.zip'): zip = zipfile.ZipFile(download) zip.extractall(temp_folder) clip_features = os.path.join(temp_folder, base_name) else: clip_features = download else: bbox = clipper['bbox'].split() extent = arcpy.Extent(*bbox) pt_array = arcpy.Array([extent.lowerLeft, extent.upperLeft, extent.upperRight, extent.lowerRight]) clip_features = arcpy.Polygon(pt_array, 4326) query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'clip_data') for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) docs = results.json()['response']['docs'] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) input_rows = collections.defaultdict(list) for doc in docs: if 'path' not in doc: input_rows[doc['name']].append(doc) if input_rows: result = clip_data(input_rows, out_workspace, clip_features, out_format) clipped += result[0] errors += result[1] skipped += result[2] if input_items: result = clip_data(input_items, out_workspace, clip_features, out_format) clipped += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _('No items to process. Check if items exist.')) return if arcpy.env.workspace.endswith('.gdb'): out_workspace = os.path.dirname(arcpy.env.workspace) if clipped > 0: try: if out_format == 'MPK': mxd_template = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'MapTemplate.mxd') mxd = task_utils.create_mxd(out_workspace, mxd_template, 'output') status_writer.send_status(_("Packaging results...")) task_utils.create_mpk(out_workspace, mxd, files_to_package) shutil.move(os.path.join(out_workspace, 'output.mpk'), os.path.join(os.path.dirname(out_workspace), '{0}.mpk'.format(output_file_name))) elif out_format == 'LPK': status_writer.send_status(_("Packaging results...")) task_utils.create_lpk(out_workspace, output_file_name, files_to_package) elif out_format == 'KML': task_utils.convert_to_kml(os.path.join(out_workspace, "output.gdb")) arcpy.env.workspace = '' arcpy.RefreshCatalog(os.path.join(out_workspace, "output.gdb")) try: arcpy.Delete_management(os.path.join(out_workspace, "output.gdb")) except arcpy.ExecuteError: pass zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) else: if create_mxd: mxd_template = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'MapTemplate.mxd') task_utils.create_mxd(out_workspace, mxd_template, 'output') zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) except arcpy.ExecuteError as ee: status_writer.send_state(status.STAT_FAILED, _(ee)) sys.exit(1) else: status_writer.send_state(status.STAT_FAILED, _('No output created. Zero inputs were clipped.')) # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(errors + skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), clipped, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Writes existing metadata for summary, description and tags. If overwrite is false, existing metadata is untouched unless any field is empty or does not exist, then it is created. :param request: json as a dict. """ updated = 0 errors = 0 skipped = 0 global result_count parameters = request['params'] summary = task_utils.get_parameter_value(parameters, 'summary', 'value') description = task_utils.get_parameter_value(parameters, 'description', 'value') tags = task_utils.get_parameter_value(parameters, 'tags', 'value') data_credits = task_utils.get_parameter_value(parameters, 'credits', 'value') constraints = task_utils.get_parameter_value(parameters, 'constraints', 'value') # Handle commas, spaces, and/or new line separators. tags = [tag for tag in re.split(' |,|\n', tags) if not tag == ''] overwrite = task_utils.get_parameter_value(parameters, 'overwrite', 'value') if not overwrite: overwrite = False if not os.path.exists(request['folder']): os.makedirs(request['folder']) # Stylesheet xslt_file = os.path.join( arcpy.GetInstallInfo()['InstallDir'], 'Metadata/Stylesheets/gpTools/exact copy of.xslt') # Template metadata file. template_xml = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'metadata_template.xml') result_count, response_index = task_utils.get_result_count(parameters) headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl + ',links' query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'write_metadata') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] if not docs: docs = parameters[response_index]['response']['docs'] input_items = [] for doc in docs: if 'path' in doc: if 'links' in doc: links = eval(doc['links']) input_items.append( (doc['path'], links['links'][0]['link'][0]['id'])) else: input_items.append((doc['path'], doc['id'])) result = write_metadata(input_items, template_xml, xslt_file, summary, description, tags, data_credits, constraints, overwrite, headers) updated += result[0] errors += result[1] skipped += result[2] status_writer.send_percent( i / result_count, '{0}: {1:.0f}%'.format("Processed", i / result_count * 100), 'write_metadata') # Report state. if skipped > 0 or errors > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) else: status_writer.send_state(status.STAT_SUCCESS) task_utils.report(os.path.join(request['folder'], '__report.json'), updated, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Writes existing metadata for summary, description and tags. If overwrite is false, existing metadata is untouched unless any field is empty or does not exist, then it is created. :param request: json as a dict. """ updated = 0 errors = 0 skipped = 0 global result_count parameters = request['params'] summary = task_utils.get_parameter_value(parameters, 'summary', 'value') description = task_utils.get_parameter_value(parameters, 'description', 'value') tags = task_utils.get_parameter_value(parameters, 'tags', 'value') data_credits = task_utils.get_parameter_value(parameters, 'credits', 'value') constraints = task_utils.get_parameter_value(parameters, 'constraints', 'value') # Handle commas, spaces, and/or new line separators. tags = [tag for tag in re.split(' |,|\n', tags) if not tag == ''] overwrite = task_utils.get_parameter_value(parameters, 'overwrite', 'value') if not overwrite: overwrite = False if not os.path.exists(request['folder']): os.makedirs(request['folder']) # Stylesheet xslt_file = os.path.join(arcpy.GetInstallInfo()['InstallDir'], 'Metadata/Stylesheets/gpTools/exact copy of.xslt') # Template metadata file. template_xml = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'metadata_template.xml') result_count, response_index = task_utils.get_result_count(parameters) headers = {'x-access-token': task_utils.get_security_token(request['owner'])} # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl + ',links' query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'write_metadata') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] if not docs: docs = parameters[response_index]['response']['docs'] input_items = [] for doc in docs: if 'path' in doc and os.path.exists(doc['path']): if 'links' in doc: links = eval(doc['links']) input_items.append((doc['path'], links['links'][0]['link'][0]['id'])) else: input_items.append((doc['path'], doc['id'])) elif 'absolute_path' in doc and os.path.exists(doc['absolute_path']): if 'links' in doc: links = eval(doc['links']) input_items.append((doc['absolute_path'], links['links'][0]['link'][0]['id'])) else: input_items.append((doc['absolute_path'], doc['id'])) elif '[downloadURL]' in doc: if 'links' in doc: links = eval(doc['links']) input_items.append((doc['[downloadURL]'], links['links'][0]['link'][0]['id'])) else: input_items.append((doc['[downloadURL]'], doc['id'])) result = write_metadata(input_items, template_xml, xslt_file, summary, description, tags, data_credits, constraints, overwrite, headers) updated += result[0] errors += result[1] skipped += result[2] status_writer.send_percent(i / result_count, '{0}: {1:.0f}%'.format("Processed", i / result_count * 100), 'write_metadata') # Report state. if skipped > 0 or errors > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) else: status_writer.send_state(status.STAT_SUCCESS) task_utils.report(os.path.join(request['folder'], '__report.json'), updated, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Mosaics input raster datasets into a new raster dataset. :param request: json as a dict. """ parameters = request['params'] out_coordinate_system = task_utils.get_parameter_value(parameters, 'output_projection', 'code') # Advanced options output_raster_format = task_utils.get_parameter_value(parameters, 'raster_format', 'value') compression_method = task_utils.get_parameter_value(parameters, 'compression_method', 'value') compression_quality = task_utils.get_parameter_value(parameters, 'compression_quality', 'value') output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'output' arcpy.env.compression = '{0} {1}'.format(compression_method, compression_quality) clip_area = None if not output_raster_format == 'MosaicDataset': # Get the clip region as an extent object. try: clip_area_wkt = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') if not clip_area_wkt: clip_area_wkt = 'POLYGON ((-180 -90, -180 90, 180 90, 180 -90, -180 -90))' if not out_coordinate_system == '0': clip_area = task_utils.get_clip_region(clip_area_wkt, out_coordinate_system) else: clip_area = task_utils.get_clip_region(clip_area_wkt) except KeyError: pass status_writer.send_status(_('Setting the output workspace...')) out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) if output_raster_format == 'FileGDB' or output_raster_format == 'MosaicDataset': out_workspace = arcpy.CreateFileGDB_management(out_workspace, 'output.gdb').getOutput(0) arcpy.env.workspace = out_workspace status_writer.send_status(_('Starting to process...')) num_results, response_index = task_utils.get_result_count(parameters) raster_items = None if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') headers = {'x-access-token': task_utils.get_security_token(request['owner'])} for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items(results.json()['response']['docs']) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) raster_items, pixels, bands, skipped = get_items(input_items) else: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) raster_items, pixels, bands, skipped = get_items(input_items) if not raster_items: if skipped == 0: status_writer.send_state(status.STAT_FAILED, _('Invalid input types')) skipped_reasons['All Items'] = _('Invalid input types') task_utils.report(os.path.join(request['folder'], '__report.json'), len(raster_items), num_results, skipped_details=skipped_reasons) return else: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), len(raster_items), skipped, skipped_details=skipped_reasons) return # Get most common pixel type. pixel_type = pixel_types[max(set(pixels), key=pixels.count)] if output_raster_format in ('FileGDB', 'GRID', 'MosaicDataset'): output_name = arcpy.ValidateTableName('mosaic', out_workspace) else: output_name = '{0}.{1}'.format(arcpy.ValidateTableName('mosaic', out_workspace)[:9], output_raster_format.lower()) status_writer.send_status(output_name) if output_raster_format == 'MosaicDataset': try: status_writer.send_status(_('Generating {0}. Large input {1} will take longer to process.'.format('Mosaic', 'rasters'))) if out_coordinate_system == '0': out_coordinate_system = raster_items[0] else: out_coordinate_system = None mosaic_ds = arcpy.CreateMosaicDataset_management(out_workspace, output_name, out_coordinate_system, max(bands), pixel_type) arcpy.AddRastersToMosaicDataset_management(mosaic_ds, 'Raster Dataset', raster_items) arcpy.MakeMosaicLayer_management(mosaic_ds, 'mosaic_layer') layer_object = arcpy.mapping.Layer('mosaic_layer') task_utils.make_thumbnail(layer_object, os.path.join(request['folder'], '_thumb.png')) except arcpy.ExecuteError: status_writer.send_state(status.STAT_FAILED, arcpy.GetMessages(2)) return else: try: if len(bands) > 1: status_writer.send_state(status.STAT_FAILED, _('Input rasters must have the same number of bands')) return status_writer.send_status(_('Generating {0}. Large input {1} will take longer to process.'.format('Mosaic', 'rasters'))) if out_coordinate_system == '0': out_coordinate_system = None if clip_area: ext = '{0} {1} {2} {3}'.format(clip_area.XMin, clip_area.YMin, clip_area.XMax, clip_area.YMax) tmp_mosaic = arcpy.MosaicToNewRaster_management( raster_items, out_workspace, 'tm', out_coordinate_system, pixel_type, number_of_bands=bands.keys()[0] ) status_writer.send_status(_('Clipping...')) out_mosaic = arcpy.Clip_management(tmp_mosaic, ext, output_name) arcpy.Delete_management(tmp_mosaic) else: out_mosaic = arcpy.MosaicToNewRaster_management(raster_items, out_workspace, output_name, out_coordinate_system, pixel_type, number_of_bands=bands.keys()[0]) arcpy.MakeRasterLayer_management(out_mosaic, 'mosaic_layer') layer_object = arcpy.mapping.Layer('mosaic_layer') task_utils.make_thumbnail(layer_object, os.path.join(request['folder'], '_thumb.png')) except arcpy.ExecuteError: status_writer.send_state(status.STAT_FAILED, arcpy.GetMessages(2)) return if arcpy.env.workspace.endswith('.gdb'): out_workspace = os.path.dirname(arcpy.env.workspace) zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) # Update state if necessary. if skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), len(raster_items), skipped, skipped_details=skipped_reasons)
def execute(request): """Adds a field and calculates it to some value. :param request: json as a dict. """ created = 0 skipped = 0 errors = 0 warnings = 0 global result_count parameters = request['params'] if not os.path.exists(request['folder']): os.makedirs(request['folder']) # Parameter values field_name = task_utils.get_parameter_value(parameters, 'field_name', 'value') field_type = task_utils.get_parameter_value(parameters, 'field_type', 'value') field_value = task_utils.get_parameter_value(parameters, 'field_value', 'value') # Query the index for results in groups of 25. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl + ',links' query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'add_field') for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] input_items = [] for doc in docs: if 'path' in doc: if 'links' in doc: links = eval(doc['links']) input_items.append((doc['id'], doc['path'], links['links'][0]['link'][0]['id'])) else: input_items.append((doc['id'], doc['path'])) result = add_field(input_items, field_name, field_type, field_value) created += result[0] errors += result[1] skipped += result[2] warnings += result[3] # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(request['folder'], '__report.json'), created, skipped, errors, errors_reasons, skipped_reasons, warnings, warnings_reasons)
def execute(request): """Converts each input dataset to kml (.kmz). :param request: json as a dict. """ converted = 0 skipped = 0 errors = 0 global result_count parameters = request['params'] out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Get the boundary box extent for input to KML tools. extent = '' try: try: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'wkt') if ext: sr = task_utils.get_spatial_reference("4326") extent = task_utils.from_wkt(ext, sr) except KeyError: ext = task_utils.get_parameter_value(parameters, 'processing_extent', 'feature') if ext: extent = arcpy.Describe(ext).extent except KeyError: pass # Get the output file name. output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'kml_results' result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'convert_to_kml') headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) docs = results.json()['response']['docs'] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) input_rows = collections.defaultdict(list) for doc in docs: if 'path' not in doc: input_rows[doc['name']].append(doc) if input_rows: result = convert_to_kml(input_rows, out_workspace, extent) converted += result[0] errors += result[1] skipped += result[2] if input_items: result = convert_to_kml(input_items, out_workspace, extent) converted += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state( status.STAT_FAILED, _('No items to process. Check if items exist.')) return # Zip up kmz files if more than one. if converted > 1: status_writer.send_status("Converted: {}".format(converted)) zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move( zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) shutil.copy2( os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', '_thumb.png'), request['folder']) elif converted == 1: try: kml_file = glob.glob(os.path.join(out_workspace, '*.kmz'))[0] tmp_lyr = arcpy.KMLToLayer_conversion(kml_file, out_workspace, 'kml_layer') task_utils.make_thumbnail( tmp_lyr.getOutput(0), os.path.join(request['folder'], '_thumb.png')) except arcpy.ExecuteError: pass shutil.move( kml_file, os.path.join(request['folder'], os.path.basename(kml_file))) # Update state if necessary. if skipped > 0 or errors > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(errors + skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), converted, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Deletes files. :param request: json as a dict. """ deleted = 0 skipped = 0 parameters = request['params'] if not os.path.exists(request['folder']): os.makedirs(request['folder']) headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } num_results, response_index = task_utils.get_result_count(parameters) if num_results > task_utils.CHUNK_SIZE: # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper( list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, num_results), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'delete_files') i = 0. for group in groups: i += len(group) - group.count('') if fq: results = requests.get(query + "&rows={0}&start={1}".format( task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get( query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format( task_utils.CHUNK_SIZE, group[0]), headers=headers) input_items = task_utils.get_input_items( results.json()['response']['docs'], True, True) if not input_items: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) result = delete_files(input_items) deleted += result[0] skipped += result[1] status_writer.send_percent( i / num_results, '{0}: {1:.0f}%'.format("Processed", i / num_results * 100), 'delete_files') else: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs'], True, True) deleted, skipped = delete_files(input_items, True) # Update state if necessary. if skipped > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), deleted, skipped, skipped_details=skipped_reasons)
def execute(request): """Clips selected search results using the clip geometry. :param request: json as a dict. """ clipped = 0 errors = 0 skipped = 0 global result_count parameters = request['params'] # Retrieve clip geometry. try: clip_area = task_utils.get_parameter_value(parameters, 'clip_geometry', 'wkt') if not clip_area: clip_area = 'POLYGON ((-180 -90, -180 90, 180 90, 180 -90, -180 -90))' except KeyError: clip_area = 'POLYGON ((-180 -90, -180 90, 180 90, 180 -90, -180 -90))' # Retrieve the coordinate system code. out_coordinate_system = int(task_utils.get_parameter_value(parameters, 'output_projection', 'code')) # Retrieve the output format, create mxd parameter and output file name values. out_format = task_utils.get_parameter_value(parameters, 'output_format', 'value') create_mxd = task_utils.get_parameter_value(parameters, 'create_mxd', 'value') output_file_name = task_utils.get_parameter_value(parameters, 'output_file_name', 'value') if not output_file_name: output_file_name = 'clip_results' # Create the temporary workspace if clip_feature_class: out_workspace = os.path.join(request['folder'], 'temp') if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Set the output coordinate system. if not out_coordinate_system == 0: # Same as Input out_sr = task_utils.get_spatial_reference(out_coordinate_system) arcpy.env.outputCoordinateSystem = out_sr # Create the clip polygon geometry object in WGS84 projection. gcs_sr = task_utils.get_spatial_reference(4326) gcs_clip_poly = task_utils.from_wkt(clip_area, gcs_sr) if not gcs_clip_poly.area > 0: gcs_clip_poly = task_utils.from_wkt('POLYGON ((-180 -90, -180 90, 180 90, 180 -90, -180 -90))', gcs_sr) # Set the output workspace. status_writer.send_status(_('Setting the output workspace...')) if not out_format == 'SHP': out_workspace = arcpy.CreateFileGDB_management(out_workspace, 'output.gdb').getOutput(0) arcpy.env.workspace = out_workspace # Query the index for results in groups of 25. headers = {'x-access-token': task_utils.get_security_token(request['owner'])} result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') # Begin processing status_writer.send_percent(0.0, _('Starting to process...'), 'clip_data') for group in groups: if fq: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), headers=headers) else: results = requests.get(query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers) # docs = eval(results.read().replace('false', 'False').replace('true', 'True').replace('null', 'None'))['response']['docs'] docs = results.json()['response']['docs'] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]['response']['docs']) input_rows = collections.defaultdict(list) for doc in docs: if 'path' not in doc: input_rows[doc['name']].append(doc) if input_rows: result = clip_data(input_rows, out_workspace, out_coordinate_system, gcs_sr, gcs_clip_poly, out_format) clipped += result[0] errors += result[1] skipped += result[2] if input_items: result = clip_data(input_items, out_workspace, out_coordinate_system, gcs_sr, gcs_clip_poly, out_format) clipped += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _('No items to process. Check if items exist.')) return if arcpy.env.workspace.endswith('.gdb'): out_workspace = os.path.dirname(arcpy.env.workspace) if clipped > 0: try: if out_format == 'MPK': mxd_template = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'MapTemplate.mxd') mxd = task_utils.create_mxd(out_workspace, mxd_template, 'output') status_writer.send_status(_("Packaging results...")) task_utils.create_mpk(out_workspace, mxd, files_to_package) shutil.move(os.path.join(out_workspace, 'output.mpk'), os.path.join(os.path.dirname(out_workspace), '{0}.mpk'.format(output_file_name))) elif out_format == 'LPK': status_writer.send_status(_("Packaging results...")) task_utils.create_lpk(out_workspace,output_file_name, files_to_package) elif out_format == 'KML': task_utils.convert_to_kml(os.path.join(out_workspace, "output.gdb")) arcpy.env.workspace = '' try: arcpy.Delete_management(os.path.join(out_workspace, "output.gdb")) except arcpy.ExecuteError: pass zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) else: if create_mxd: mxd_template = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'supportfiles', 'MapTemplate.mxd') task_utils.create_mxd(out_workspace, mxd_template, 'output') zip_file = task_utils.zip_data(out_workspace, '{0}.zip'.format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) except arcpy.ExecuteError as ee: status_writer.send_state(status.STAT_FAILED, _(ee)) sys.exit(1) else: status_writer.send_state(status.STAT_FAILED, _('No output created. Zero inputs were clipped.')) # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _('{0} results could not be processed').format(errors + skipped)) task_utils.report(os.path.join(request['folder'], '__report.json'), clipped, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Copies data to an existing geodatabase or feature dataset. :param request: json as a dict. """ added = 0 errors = 0 skipped = 0 global result_count parameters = request['params'] # Get the target workspace location. out_gdb = task_utils.get_parameter_value(parameters, 'target_workspace', 'value') # Retrieve the coordinate system code. out_coordinate_system = task_utils.get_parameter_value( parameters, 'output_projection', 'code') if not out_coordinate_system == '0': # Same as Input arcpy.env.outputCoordinateSystem = task_utils.get_spatial_reference( out_coordinate_system) task_folder = request['folder'] if not os.path.exists(task_folder): os.makedirs(task_folder) # Check if the geodatabase exists or if it is a feature dataset. is_fds = False if not os.path.exists(out_gdb): if out_gdb.endswith('.gdb'): arcpy.CreateFileGDB_management(os.path.dirname(out_gdb), os.path.basename(out_gdb)) status_writer.send_status( _('Created output workspace: {0}').format(out_gdb)) elif out_gdb.endswith('.mdb'): arcpy.CreatePersonalGDB_management(os.path.dirname(out_gdb), os.path.basename(out_gdb)) status_writer.send_status( _('Created output workspace: {0}').format(out_gdb)) elif out_gdb.endswith('.sde'): status_writer.send_state(status.STAT_FAILED, _('{0} does not exist').format(out_gdb)) return else: # Possible feature dataset. is_fds = is_feature_dataset(out_gdb) if not is_fds: if os.path.dirname(out_gdb).endswith('.gdb'): if not os.path.exists(os.path.dirname(out_gdb)): arcpy.CreateFileGDB_management( os.path.dirname(os.path.dirname(out_gdb)), os.path.basename(os.path.dirname(out_gdb))) arcpy.CreateFeatureDataset_management( os.path.dirname(out_gdb), os.path.basename(out_gdb)) elif os.path.dirname(out_gdb).endswith('.mdb'): if not os.path.exists(os.path.dirname(out_gdb)): arcpy.CreatePersonalGDB_management( os.path.dirname(os.path.dirname(out_gdb)), os.path.basename(os.path.dirname(out_gdb))) arcpy.CreateFeatureDataset_management( os.path.dirname(out_gdb), os.path.basename(out_gdb)) status_writer.send_status(_('Setting the output workspace...')) arcpy.env.workspace = out_gdb headers = { 'x-access-token': task_utils.get_security_token(request['owner']) } result_count, response_index = task_utils.get_result_count(parameters) # Query the index for results in groups of 25. query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl query = '{0}{1}{2}'.format(sys.argv[2].split('=')[1], '/select?&wt=json', fl) # query = '{0}{1}{2}'.format("http://localhost:8888/solr/v0", '/select?&wt=json', fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') query += fq elif 'ids' in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]['ids']), task_utils.CHUNK_SIZE, '') else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, '') status_writer.send_percent(0.0, _('Starting to process...'), 'add_to_geodatabase') for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) elif 'ids' in parameters[response_index]: results = requests.get(query + '{0}&ids={1}'.format(fl, ','.join(group)), verify=verify_ssl, headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), verify=verify_ssl, headers=headers) docs = results.json()['response']['docs'] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items( parameters[response_index]['response']['docs']) input_rows = collections.defaultdict(list) for doc in docs: if 'path' not in doc: input_rows[doc['title']].append(doc) if input_rows: result = add_to_geodatabase(input_rows, out_gdb, is_fds) added += result[0] errors += result[1] skipped += result[2] if input_items: result = add_to_geodatabase(input_items, out_gdb, is_fds) added += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state( status.STAT_FAILED, _('No items to process. Check if items exist.')) return # Update state if necessary. if skipped > 0 or errors > 0: status_writer.send_state( status.STAT_WARNING, _('{0} results could not be processed').format(skipped + errors)) task_utils.report(os.path.join(task_folder, '__report.json'), added, skipped, errors, errors_reasons, skipped_reasons)
def execute(request): """Clips selected search results using the clip geometry. :param request: json as a dict. """ clipped = 0 errors = 0 skipped = 0 global result_count parameters = request["params"] # Retrieve the clip features. clip_features = task_utils.get_parameter_value(parameters, "clip_features", "value") # Retrieve the coordinate system code. out_coordinate_system = int(task_utils.get_parameter_value(parameters, "output_projection", "code")) # Retrieve the output format, create mxd and output file name parameter values. out_format = task_utils.get_parameter_value(parameters, "output_format", "value") create_mxd = task_utils.get_parameter_value(parameters, "create_mxd", "value") output_file_name = task_utils.get_parameter_value(parameters, "output_file_name", "value") if not output_file_name: output_file_name = "clip_results" # Create the temporary workspace if clip_feature_class: out_workspace = os.path.join(request["folder"], "temp") if not os.path.exists(out_workspace): os.makedirs(out_workspace) # Set the output coordinate system. if not out_coordinate_system == 0: # Same as Input out_sr = task_utils.get_spatial_reference(out_coordinate_system) arcpy.env.outputCoordinateSystem = out_sr # Set the output workspace. status_writer.send_status(_("Setting the output workspace...")) if not out_format == "SHP": out_workspace = arcpy.CreateFileGDB_management(out_workspace, "output.gdb").getOutput(0) arcpy.env.workspace = out_workspace # Query the index for results in groups of 25. headers = {"x-access-token": task_utils.get_security_token(request["owner"])} result_count, response_index = task_utils.get_result_count(parameters) query_index = task_utils.QueryIndex(parameters[response_index]) fl = query_index.fl # Get the Clip features by id. id = clip_features["id"] clip_query = "{0}{1}{2}".format( sys.argv[2].split("=")[1], "/select?&wt=json", "&fl=id,path:[absolute],[lyrFile],[geo]&q=id:{0}".format(id) ) clip_result = requests.get(clip_query, headers=headers) clipper = clip_result.json()["response"]["docs"][0] if "path" in clipper: clip_features = clipper["path"] elif "[lyrFile]" in clipper: clip_features = clipper["[lyrFile]"] elif "[geo]" in clipper: clip_features = arcpy.AsShape(clipper["[geo]"]).projectAs(arcpy.SpatialReference(4326)) else: bbox = clipper["bbox"].split() extent = arcpy.Extent(*bbox) pt_array = arcpy.Array([extent.lowerLeft, extent.upperLeft, extent.upperRight, extent.lowerRight]) clip_features = arcpy.Polygon(pt_array, 4326) query = "{0}{1}{2}".format(sys.argv[2].split("=")[1], "/select?&wt=json", fl) fq = query_index.get_fq() if fq: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, "") query += fq elif "ids" in parameters[response_index]: groups = task_utils.grouper(list(parameters[response_index]["ids"]), task_utils.CHUNK_SIZE, "") else: groups = task_utils.grouper(range(0, result_count), task_utils.CHUNK_SIZE, "") status_writer.send_percent(0.0, _("Starting to process..."), "clip_data") for group in groups: if fq: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers ) elif "ids" in parameters[response_index]: results = requests.get(query + "{0}&ids={1}".format(fl, ",".join(group)), headers=headers) else: results = requests.get( query + "&rows={0}&start={1}".format(task_utils.CHUNK_SIZE, group[0]), headers=headers ) # docs = eval(results.read().replace('false', 'False').replace('true', 'True').replace('null', 'None'))['response']['docs'] docs = results.json()["response"]["docs"] input_items = task_utils.get_input_items(docs) if not input_items: input_items = task_utils.get_input_items(parameters[response_index]["response"]["docs"]) input_rows = collections.defaultdict(list) for doc in docs: if "path" not in doc: input_rows[doc["name"]].append(doc) if input_rows: result = clip_data(input_rows, out_workspace, clip_features, out_format) clipped += result[0] errors += result[1] skipped += result[2] if input_items: result = clip_data(input_items, out_workspace, clip_features, out_format) clipped += result[0] errors += result[1] skipped += result[2] if not input_items and not input_rows: status_writer.send_state(status.STAT_FAILED, _("No items to process. Check if items exist.")) return if arcpy.env.workspace.endswith(".gdb"): out_workspace = os.path.dirname(arcpy.env.workspace) if clipped > 0: try: if out_format == "MPK": mxd_template = os.path.join( os.path.dirname(os.path.dirname(__file__)), "supportfiles", "MapTemplate.mxd" ) mxd = task_utils.create_mxd(out_workspace, mxd_template, "output") status_writer.send_status(_("Packaging results...")) task_utils.create_mpk(out_workspace, mxd, files_to_package) shutil.move( os.path.join(out_workspace, "output.mpk"), os.path.join(os.path.dirname(out_workspace), "{0}.mpk".format(output_file_name)), ) elif out_format == "LPK": status_writer.send_status(_("Packaging results...")) task_utils.create_lpk(out_workspace, output_file_name, files_to_package) elif out_format == "KML": task_utils.convert_to_kml(os.path.join(out_workspace, "output.gdb")) arcpy.env.workspace = "" arcpy.RefreshCatalog(os.path.join(out_workspace, "output.gdb")) try: arcpy.Delete_management(os.path.join(out_workspace, "output.gdb")) except arcpy.ExecuteError: pass zip_file = task_utils.zip_data(out_workspace, "{0}.zip".format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) else: if create_mxd: mxd_template = os.path.join( os.path.dirname(os.path.dirname(__file__)), "supportfiles", "MapTemplate.mxd" ) task_utils.create_mxd(out_workspace, mxd_template, "output") zip_file = task_utils.zip_data(out_workspace, "{0}.zip".format(output_file_name)) shutil.move(zip_file, os.path.join(os.path.dirname(out_workspace), os.path.basename(zip_file))) except arcpy.ExecuteError as ee: status_writer.send_state(status.STAT_FAILED, _(ee)) sys.exit(1) else: status_writer.send_state(status.STAT_FAILED, _("No output created. Zero inputs were clipped.")) # Update state if necessary. if errors > 0 or skipped > 0: status_writer.send_state(status.STAT_WARNING, _("{0} results could not be processed").format(errors + skipped)) task_utils.report( os.path.join(request["folder"], "__report.json"), clipped, skipped, errors, errors_reasons, skipped_reasons )
def execute(request): """Report duplicate items. :param request: json as a dict. """ if not os.path.exists(request['folder']): os.makedirs(request['folder']) date_str = str(datetime.date.today()) report_file = os.path.join(request['folder'], 'Duplicates_{0}.csv'.format(date_str)) request_owner = request['owner'] headers = {'x-access-token': task_utils.get_security_token(request_owner)} fields = list(task_utils.get_parameter_value(request['params'], 'fields', 'value')) required_fields = ("name", "title", "bytes", "id", "format", "absolute_path", "format_category") [fields.remove(f) for f in required_fields if f in fields] fields_str = "" if fields: fields_str = ",".join(fields) voyager_instance = 'http://localhost:8888/solr/v0' #sys.argv[2].split('=')[1] #voyager_instance = 'http://ec2amaz-7h0t5qu.tnc.sdi.org:8888/solr/v0' query = "%s/select?q={!func}joindf(md5,md5)&f.md5.facet.mincount=2&f.contentHash.facet.mincount=2&f.schemaHash.facet.mincount=2&sort=md5 desc&start=0&rows=1&fl=id,title,name:[name],format,fullpath:[absolute],absolute_path:[absolute],format_type,bytes,md5,path,name&fq={!frange l=2}{!func}joindf(md5,md5)&wt=json" % (voyager_instance) results = requests.get(query, auth=('admin', 'admin')) result_count = results.json()['response']['numFound'] if result_count == 0: status_writer.send_state(status.STAT_WARNING, "No duplicates found.") return duplicates = collections.defaultdict(list) groups = grouper(range(0, result_count), 150, '') url = voyager_instance.split("/solr")[0] req = requests.get("{0}/api/rest/i18n/field/format".format(url), headers=headers) formats = req.json()['VALUE']['format'] processed_count = 0 status_writer.send_status("Generating list of documents with children...") parent_docs = create_listcount_data_in_doc(voyager_instance, headers) increment = task_utils.get_increment(result_count) for group in groups: query = "%s/select?q={!func}joindf(md5,md5)&f.md5.facet.mincount=2&f.contentHash.facet.mincount=2&f.schemaHash.facet.mincount=2&sort=md5 desc&start=%s&rows=150&fl=md5,id,title,name:[name],format,fullpath:[absolute],absolute_path:[absolute],format_type,format_category,bytes,linkcount_data,linkcount_md5,path,name,%s&fq={!frange l=2}{!func}joindf(md5,md5)&wt=json" % (voyager_instance, group[0], fields_str) results = requests.get(query, headers=headers) docs = results.json()['response']['docs'] for doc in docs: file_path = "" if "absolute_path" in doc: file_path = doc["absolute_path"] if os.path.splitext(file_path)[1].lower() in ('.cpg', '.ini'): continue file_size = 0 if "bytes" in doc: file_size = float(doc['bytes']) format_type = "" if "format" in doc and not doc["format"] in ("application/vnd.esri.gdb.file.data", "application/vnd.esri.gdb.personal.data"): try: format_type = formats[doc["format"]] except KeyError: format_type = doc["format"] else: continue format_category = "" if "format_category" in doc: format_category = doc["format_category"] id = "" if "id" in doc: id = doc["id"] name = "" if "name" in doc: name = doc["name"] elif "title" in doc: name = doc["title"] field_dict = {"FILE NAME": name, "FILE SIZE": file_size, "FORMAT CATEGORY": format_category, "FORMAT": format_type, "ID": id, "FILE PATH": file_path} extra_fields = {} if fields: for fld in fields: if fld in doc: extra_fields[fld.upper()] = doc[fld] field_dict.update(extra_fields) duplicates[doc['md5']].append(field_dict) processed_count += len(group) if (processed_count % increment) == 0: status_writer.send_percent(processed_count / float(result_count), 'Grouping duplicates by MD5...', 'report_duplicate_files') # Find total number of items in the index. all_query = "%s/select?&disp=default&sort=score desc&place.op=within&start=0&fl=id&voyager.config.id=ace4bb77&wt=json" % (voyager_instance) results = requests.get(all_query, headers=headers) index_count = results.json()['response']['numFound'] duplicate_count = 0 total_file_size = 0 # Write all the duplicates to the report file. status_writer.send_percent(0, 'Creating the duplicate report and comparing data...', '') processed_count = 0 md5_count = len(duplicates) increment = task_utils.get_increment(md5_count) with open(report_file, "wb") as f: if extra_fields: keys = ["MD5", "FILE NAME", "FILE SIZE", "FORMAT", "FORMAT CATEGORY", "ID", "FILE PATH", "MXD COUNT", "LYR COUNT"] + extra_fields.keys() else: keys = ["MD5", "FILE NAME", "FILE SIZE", "FORMAT", "FORMAT CATEGORY", "ID", "FILE PATH", "MXD COUNT", "LYR COUNT"] writer = csv.DictWriter(f, fieldnames=keys) writer.writeheader() # Loop through each group of MD5 (duplicates). for md5, values in duplicates.items(): try: val_count = len(values) if val_count > 1: # If the files sizes are all 0 bytes, return them as duplicates. file_size = convert_size(values[0]['FILE SIZE']) for z in values: if not convert_size(z['FILE SIZE']) == file_size: values.remove(z) if len(values) == 1: processed_count += 1 continue # Perform data comparison (Feature, Table, File, raster, etc.). If different, don't report and continue. max_severity = compare_data(values) if max_severity > 0: continue writer.writerow({}) duplicate_count += val_count - 1 for val in values: used_in_mxd = 0 used_in_lyr = 0 total_file_size += val["FILE SIZE"] * (val_count - 1) val["MD5"] = md5 val["FILE SIZE"] = convert_size(val["FILE SIZE"]) new_val = {k: change(v, 'utf-8') for (k, v) in val.items()} if new_val["FORMAT CATEGORY"].lower() in ("gis", "cad", "imagery"): for k,v in parent_docs.items(): if new_val['ID'] in v: if k.endswith('.mxd'): used_in_mxd += 1 elif k.endswith('.lyr'): used_in_lyr += 1 new_val['MXD COUNT'] = used_in_mxd val['MXD COUNT'] = used_in_mxd new_val['LYR COUNT'] = used_in_lyr val['LYR COUNT'] = used_in_lyr try: writer.writerow(new_val) except UnicodeEncodeError: try: new_val = {k: change(v, 'latin-1') for (k, v) in new_val.items()} writer.writerow(new_val) except Exception as we: status_writer.send_status('WRITE ERROR: {0}'.format(repr(we))) pass processed_count += 1 except Exception as ex: status_writer.send_status(repr(ex)) processed_count += 1 continue try: if (processed_count % increment) == 0: status_writer.send_percent(processed_count / float(md5_count), 'Creating report and comparing data...', 'report_duplicate_files') except Exception: status_writer.send_status("error reporting progress.") continue try: # Report a summary to the report file. pct_dups = float(duplicate_count) / index_count with open(report_file, "ab") as f: writer = csv.DictWriter(f, fieldnames=["DUPLICATE COUNT", "INDEX COUNT", "PERCENT DUPLICATES", "TOTAL DUPLICATE FILE SIZE"]) writer.writerow({}) writer.writerow({}) writer.writeheader() writer.writerow({"DUPLICATE COUNT": duplicate_count, "INDEX COUNT": index_count, "PERCENT DUPLICATES": '{:.0f}%'.format(pct_dups * 100), "TOTAL DUPLICATE FILE SIZE": convert_size(total_file_size)}) status_writer.send_status("DUPLICATE COUNT: {0}".format(duplicate_count)) status_writer.send_status("INDEX COUNT: {0}".format(index_count)) status_writer.send_status("PERCENT DUPLICATES: {0}".format('{:.0f}%'.format(pct_dups * 100))) status_writer.send_status("TOTAL DUPLICATE FILE SIZE: {0}".format(convert_size(total_file_size))) status_writer.send_state(status.STAT_SUCCESS) except Exception: status_writer.send_status("Error writing summary.")