def transfer_backlog(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.ingest.views.transfer_backlog', query='', field='', type='') # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request( request) # set paging variables if not file_mode: items_per_page = 10 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = elasticSearchFunctions.connect_and_create_index('transfers') try: query = advanced_search.assemble_query( queries, ops, fields, types, must_haves=[pyes.TermQuery('status', 'backlog')]) # use all results to pull transfer facets if not in file mode if not file_mode: results = conn.search_raw( query, indices='transfers', type='transferfile', ) else: # otherwise use pages results results = conn.search_raw(query, indices='transfers', type='transferfile', start=start - 1, size=items_per_page) except: return HttpResponse('Error accessing index.') # take note of facet data file_extension_usage = results['facets']['fileExtension']['terms'] transfer_uuids = results['facets']['sipuuid']['terms'] if not file_mode: # run through transfers to see if they've been created yet awaiting_creation = {} for transfer_instance in transfer_uuids: try: awaiting_creation[transfer_instance. term] = transfer_awaiting_sip_creation_v2( transfer_instance.term) transfer = models.Transfer.objects.get( uuid=transfer_instance.term) transfer_basename = os.path.basename( transfer.currentlocation[:-1]) transfer_instance.name = transfer_basename[:-37] transfer_instance.type = transfer.type if transfer.accessionid != None: transfer_instance.accession = transfer.accessionid else: transfer_instance.accession = '' except: awaiting_creation[transfer_instance.term] = False # page data number_of_results = len(transfer_uuids) page_data = helpers.pager(transfer_uuids, items_per_page, page + 1) transfer_uuids = page_data['objects'] else: # page data number_of_results = results.hits.total results = transfer_backlog_augment_search_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results) # make sure results is set try: if results: pass except: results = False form = StorageSearchForm(initial={'query': queries[0]}) return render(request, 'ingest/backlog/search.html', locals())
def transfer_backlog(request, ui): """ AJAX endpoint to query for and return transfer backlog items. """ es_client = elasticSearchFunctions.get_client() results = None # Return files which are in the backlog backlog_filter = { 'bool': { 'must': { 'term': { 'status': 'backlog', } } } } # Omit files without UUIDs (metadata and logs directories): # - When the `hidemetadatalogs` param is sent from SIP arrange. # - Always from the appraisal tab. if ui == 'appraisal' or request.GET.get('hidemetadatalogs'): backlog_filter['bool']['must_not'] = { 'term': { 'fileuuid': '', } } # Get search parameters from request if 'query' not in request.GET: # Use backlog boolean filter as boolean query query = {'query': backlog_filter} else: queries, ops, fields, types = advanced_search.search_parameter_prep( request) try: query = advanced_search.assemble_query( queries, ops, fields, types, filters=[backlog_filter], ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # perform search try: results = elasticSearchFunctions.search_all_results( es_client, body=query, index='transferfiles', ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # Convert results into a more workable form results = elasticSearchFunctions.augment_raw_search_results(results) # Convert to a form JS can use: # [{'name': <filename>, # 'properties': {'not_draggable': False}}, # {'name': <directory name>, # 'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'}, # 'children': [ # {'name': <filename>, # 'properties': {'not_draggable': True}}, # {'name': <directory name>, # 'children': [...] # } # ] # }, # ] return_list = [] directory_map = {} # _es_results_to_directory_tree requires that paths MUST be sorted results.sort(key=lambda x: x['relative_path']) for path in results: # If a path is in SIPArrange.original_path, then it shouldn't be draggable not_draggable = False if models.SIPArrange.objects.filter( original_path__endswith=path['relative_path']).exists(): not_draggable = True if ui == 'legacy': _es_results_to_directory_tree(path['relative_path'], return_list, not_draggable=not_draggable) else: _es_results_to_appraisal_tab_format(path, directory_map, return_list, not_draggable=not_draggable) if ui == 'legacy': response = return_list else: response = { 'formats': [], # TODO populate this 'transfers': return_list, } # return JSON response return helpers.json_response(response)
def search(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.archival_storage.views.search', query='', field='', type='') # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request( request) # set paging variables if not file_mode: items_per_page = 2 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) try: query = advanced_search.assemble_query(queries, ops, fields, types) # use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries) if not file_mode: results = conn.search_raw(query=query, indices='aips', type='aipfile', fields='uuid') else: results = conn.search_raw(query=query, indices='aips', type='aipfile', start=start - 1, size=items_per_page, fields='AIPUUID,filePath,FILEUUID') except: return HttpResponse('Error accessing index.') # take note of facet data aip_uuids = results['facets']['AIPUUID']['terms'] if not file_mode: number_of_results = len(aip_uuids) page_data = helpers.pager(aip_uuids, items_per_page, page + 1) aip_uuids = page_data['objects'] search_augment_aip_results(conn, aip_uuids) else: number_of_results = results.hits.total results = search_augment_file_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results) # make sure results is set try: if results: pass except: results = False form = forms.StorageSearchForm(initial={'query': queries[0]}) return render(request, 'archival_storage/archival_storage_search.html', locals())
def transfer_backlog(request): """ AJAX endpoint to query for and return transfer backlog items. """ # Get search parameters from request results = None conn = elasticSearchFunctions.connect_and_create_index('transfers') if not 'query' in request.GET: query = elasticSearchFunctions.MATCH_ALL_QUERY else: queries, ops, fields, types = advanced_search.search_parameter_prep(request) try: query = advanced_search.assemble_query( queries, ops, fields, types, # Specify this as a filter, not a must_have, for performance, # and so that it doesn't cause the "should" queries in a # should-only query to be ignored. filters={'term': {'status': 'backlog'}}, ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # perform search try: results = elasticSearchFunctions.search_all_results( conn, body=query, index='transfers', doc_type='transferfile', ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # Convert results into a more workable form results = _transfer_backlog_augment_search_results(results) # Convert to a form JS can use: # [{'name': <filename>, # 'properties': {'not_draggable': False}}, # {'name': <directory name>, # 'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'}, # 'children': [ # {'name': <filename>, # 'properties': {'not_draggable': True}}, # {'name': <directory name>, # 'children': [...] # } # ] # }, # ] return_list = [] # _es_results_to_directory_tree requires that paths MUST be sorted results.sort(key=lambda x: x['relative_path']) for path in results: # If a path is in SIPArrange.original_path, then it shouldn't be draggable not_draggable = False if models.SIPArrange.objects.filter( original_path__endswith=path['relative_path']).exists(): not_draggable = True _es_results_to_directory_tree(path['relative_path'], return_list, not_draggable=not_draggable) # retun JSON response return helpers.json_response(return_list)
def transfer_backlog(request, ui): """ AJAX endpoint to query for and return transfer backlog items. """ es_client = elasticSearchFunctions.get_client() results = None # Return files which are in the backlog backlog_filter = {"bool": {"must": {"term": {"status": "backlog"}}}} # Omit files without UUIDs (metadata and logs directories): # - When the `hidemetadatalogs` param is sent from SIP arrange. if request.GET.get("hidemetadatalogs"): backlog_filter["bool"]["must_not"] = {"term": {"fileuuid": ""}} # Get search parameters from request if "query" not in request.GET: # Use backlog boolean filter as boolean query query = {"query": backlog_filter} else: queries, ops, fields, types = advanced_search.search_parameter_prep( request) try: query = advanced_search.assemble_query(queries, ops, fields, types, filters=[backlog_filter]) except: logger.exception("Error accessing index.") return HttpResponse("Error accessing index.") # perform search try: results = elasticSearchFunctions.search_all_results( es_client, body=query, index="transferfiles") except: logger.exception("Error accessing index.") return HttpResponse("Error accessing index.") # Convert results into a more workable form results = elasticSearchFunctions.augment_raw_search_results(results) # Convert to a form JS can use: # [{'name': <filename>, # 'properties': {'not_draggable': False}}, # {'name': <directory name>, # 'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'}, # 'children': [ # {'name': <filename>, # 'properties': {'not_draggable': True}}, # {'name': <directory name>, # 'children': [...] # } # ] # }, # ] return_list = [] directory_map = {} # _es_results_to_directory_tree requires that paths MUST be sorted results.sort(key=lambda x: x["relative_path"]) for path in results: # If a path is in SIPArrange.original_path, then it shouldn't be draggable not_draggable = False if models.SIPArrange.objects.filter( original_path__endswith=path["relative_path"]).exists(): not_draggable = True if ui == "legacy": _es_results_to_directory_tree(path["relative_path"], return_list, not_draggable=not_draggable) else: _es_results_to_appraisal_tab_format(path, directory_map, return_list, not_draggable=not_draggable) if ui == "legacy": response = return_list else: if not request.GET.get("hidemetadatalogs"): # if metadata and log file are shown in the appraisal tab # directories should not be draggable if they contain # non draggable children adjust_non_draggable_nodes(return_list) response = { "formats": [], "transfers": return_list } # TODO populate this # return JSON response return helpers.json_response(response)
def transfer_backlog(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.ingest.views.transfer_backlog', query='', field='', type='' ) # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request(request) # set paging variables if not file_mode: items_per_page = 10 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = elasticSearchFunctions.connect_and_create_index('transfers') try: query = advanced_search.assemble_query( queries, ops, fields, types, must_haves=[pyes.TermQuery('status', 'backlog')] ) # use all results to pull transfer facets if not in file mode if not file_mode: results = conn.search_raw( query, indices='transfers', type='transferfile', ) else: # otherwise use pages results results = conn.search_raw( query, indices='transfers', type='transferfile', start=start - 1, size=items_per_page ) except: return HttpResponse('Error accessing index.') # take note of facet data file_extension_usage = results['facets']['fileExtension']['terms'] transfer_uuids = results['facets']['sipuuid']['terms'] if not file_mode: # run through transfers to see if they've been created yet awaiting_creation = {} for transfer_instance in transfer_uuids: try: awaiting_creation[transfer_instance.term] = transfer_awaiting_sip_creation_v2(transfer_instance.term) transfer = models.Transfer.objects.get(uuid=transfer_instance.term) transfer_basename = os.path.basename(transfer.currentlocation[:-1]) transfer_instance.name = transfer_basename[:-37] transfer_instance.type = transfer.type if transfer.accessionid != None: transfer_instance.accession = transfer.accessionid else: transfer_instance.accession = '' except: awaiting_creation[transfer_instance.term] = False # page data number_of_results = len(transfer_uuids) page_data = helpers.pager(transfer_uuids, items_per_page, page + 1) transfer_uuids = page_data['objects'] else: # page data number_of_results = results.hits.total results = transfer_backlog_augment_search_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results ) # make sure results is set try: if results: pass except: results = False form = StorageSearchForm(initial={'query': queries[0]}) return render(request, 'ingest/backlog/search.html', locals())
def search(request): """A JSON end point that returns results for AIPs and their files. :param request: Django request object. :return: A JSON object including required metadata for the datatable and the search results. """ REQUEST_FILE = "requestFile" MIMETYPE = "mimeType" RETURN_ALL = "returnAll" FILE_NAME = "fileName" request_file = request.GET.get(REQUEST_FILE, "").lower() == "true" file_mime = request.GET.get(MIMETYPE, "") file_name = request.GET.get(FILE_NAME, "") # Configure page-size requirements for the search. DEFAULT_PAGE_SIZE = 10 page_size = None if request.GET.get(RETURN_ALL, "").lower() == "true": page_size = es.MAX_QUERY_SIZE if page_size is None: page_size = int(request.GET.get("iDisplayLength", DEFAULT_PAGE_SIZE)) # Get search parameters from the request. queries, ops, fields, types = advanced_search.search_parameter_prep(request) if "query" not in request.GET: queries, ops, fields, types = (["*"], ["or"], [""], ["term"]) query = advanced_search.assemble_query(queries, ops, fields, types) file_mode = request.GET.get("file_mode") == "true" # Configure other aspects of the search including starting page and sort # order. start = int(request.GET.get("iDisplayStart", 0)) order_by = get_es_property_from_column_index( int(request.GET.get("iSortCol_0", 0)), file_mode ) sort_direction = request.GET.get("sSortDir_0", "asc") es_client = es.get_client() try: if file_mode: index = es.AIP_FILES_INDEX source = "filePath,FILEUUID,AIPUUID,accessionid,status" else: # Fetch all unique AIP UUIDs in the returned set of files. # ES query will limit to 10 aggregation results by default; # add size parameter in terms to override. # TODO: Use composite aggregation when it gets out of beta. query["aggs"] = { "aip_uuids": {"terms": {"field": "AIPUUID", "size": "10000"}} } # Don't return results, just the aggregation. query["size"] = 0 # Searching for AIPs still actually searches type 'aipfile', and # returns the UUID of the AIP the files are a part of. To search # for an attribute of an AIP, the aipfile must index that # information about their AIP. results = es_client.search(body=query, index=es.AIP_FILES_INDEX) # Given these AIP UUIDs, now fetch the actual information we want # from AIPs/AIP. buckets = results["aggregations"]["aip_uuids"]["buckets"] uuids = [bucket["key"] for bucket in buckets] uuid_file_counts = { bucket["key"]: bucket["doc_count"] for bucket in buckets } query = {"query": {"terms": {"uuid": uuids}}} index = es.AIPS_INDEX source = "name,uuid,size,accessionids,created,status,encrypted,AICID,isPartOf,countAIPsinAIC,location" results = es_client.search( index=index, body=query, from_=start, size=page_size, sort=order_by + ":" + sort_direction if order_by else "", _source=source, ) if file_mode: augmented_results = search_augment_file_results(es_client, results) else: augmented_results = search_augment_aip_results(results, uuid_file_counts) if request_file and not file_mode: return search_as_file( augmented_results, file_name=file_name, mime_type=file_mime ) hit_count = results["hits"]["total"] return helpers.json_response( { "iTotalRecords": hit_count, "iTotalDisplayRecords": hit_count, "sEcho": int( request.GET.get("sEcho", 0) ), # It was recommended we convert sEcho to int to prevent XSS. "aaData": augmented_results, } ) except ElasticsearchException: err_desc = "Error accessing AIPs index" logger.exception(err_desc) return HttpResponse(err_desc)
def search(request): """ A JSON end point that returns results for various backlog transfers and their files. :param request: The Django request object :return: A JSON object including required metadata for the datatable and the backlog search results. """ # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) file_mode = request.GET.get("file_mode") == "true" page_size = int(request.GET.get("iDisplayLength", 10)) start = int(request.GET.get("iDisplayStart", 0)) order_by = get_es_property_from_column_index( int(request.GET.get("iSortCol_0", 0)), file_mode ) sort_direction = request.GET.get("sSortDir_0", "asc") es_client = es.get_client() if "query" not in request.GET: queries, ops, fields, types = (["*"], ["or"], [""], ["term"]) query = advanced_search.assemble_query( queries, ops, fields, types, filters=[{"term": {"status": "backlog"}}] ) try: if file_mode: index = es.TRANSFER_FILES_INDEX source = "filename,sipuuid,relative_path,accessionid,pending_deletion" else: # Transfer mode: # Query to transferfile, but only fetch & aggregrate transfer UUIDs. # Based on transfer UUIDs, query to transfers. # ES query will limit to 10 aggregation results by default, # add size parameter in terms to override. # TODO: Use composite aggregation when it gets out of beta. query["aggs"] = { "transfer_uuid": {"terms": {"field": "sipuuid", "size": "10000"}} } hits = es_client.search( index=es.TRANSFER_FILES_INDEX, body=query, size=0, # Don't return results, only aggregation ) uuids = [x["key"] for x in hits["aggregations"]["transfer_uuid"]["buckets"]] # Recreate query to search over transfers query = {"query": {"terms": {"uuid": uuids}}} index = es.TRANSFERS_INDEX source = ( "name,uuid,file_count,ingest_date,accessionid,size,pending_deletion" ) hits = es_client.search( index=index, body=query, from_=start, size=page_size, sort=order_by + ":" + sort_direction if order_by else "", _source=source, ) hit_count = hits["hits"]["total"] except Exception: err_desc = "Error accessing transfers index" logger.exception(err_desc) return HttpResponse(err_desc) search_results = [] es_results = [x["_source"] for x in hits["hits"]["hits"]] for result in es_results: # Format size size = result.get("size") if size is not None: result["size"] = filesizeformat(size) if file_mode: # We only check status against the Storage Service for # transfers, so include all files in search results. search_results.append(result) else: pending_deletion = result.get("pending_deletion") keep_in_results = sync_es_transfer_status_with_storage_service( result["uuid"], pending_deletion ) # Only return details for transfers that haven't been # deleted from the Storage Service in the search results. if keep_in_results: search_results.append(result) return helpers.json_response( { "iTotalRecords": hit_count, "iTotalDisplayRecords": hit_count, "sEcho": int( request.GET.get("sEcho", 0) ), # It was recommended we convert sEcho to int to prevent XSS "aaData": search_results, } )
def search(request): # FIXME there has to be a better way of handling checkboxes than parsing # them by hand here, and displaying 'checked' in # _archival_storage_search_form.html # Parse checkbox for file mode yes_options = ('checked', 'yes', 'true', 'on') if request.GET.get('filemode', '') in yes_options: file_mode = True checked_if_in_file_mode = 'checked' items_per_page = 20 else: # AIP list file_mode = False checked_if_in_file_mode = '' items_per_page = 10 # Parse checkbox for show AICs show_aics = '' if request.GET.get('show_aics', '') in yes_options: show_aics = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) logger.debug('Queries: %s, Ops: %s, Fields: %s, Types: %s', queries, ops, fields, types) # redirect if no search params have been set if 'query' not in request.GET: return helpers.redirect_with_get_params( 'components.archival_storage.views.search', query='', field='', type='' ) # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request(request) current_page_number = int(request.GET.get('page', 1)) # perform search es_client = elasticSearchFunctions.get_client() results = None query = advanced_search.assemble_query(es_client, queries, ops, fields, types, search_index='aips', doc_type='aipfile') try: # use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries) if not file_mode: # Fetch all unique AIP UUIDs in the returned set of files query['aggs'] = {'aip_uuids': {'terms': {'field': 'AIPUUID', 'size': 0}}} # Don't return results, just the aggregation query['size'] = 0 # Searching for AIPs still actually searches type 'aipfile', and # returns the UUID of the AIP the files are a part of. To search # for an attribute of an AIP, the aipfile must index that # information about their AIP in # elasticSearchFunctions.index_mets_file_metadata results = es_client.search( body=query, index='aips', doc_type='aipfile', sort='sipName:desc', ) # Given these AIP UUIDs, now fetch the actual information we want from aips/aip buckets = results['aggregations']['aip_uuids']['buckets'] uuids = [bucket['key'] for bucket in buckets] uuid_file_counts = {bucket['key']: bucket['doc_count'] for bucket in buckets} query = { 'query': { 'terms': { 'uuid': uuids, }, }, } index = 'aips' doc_type = 'aip' fields = 'name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC,encrypted' sort = 'name:desc' else: index = 'aips' doc_type = 'aipfile' fields = 'AIPUUID,filePath,FILEUUID,encrypted' sort = 'sipName:desc' # To reduce amount of data fetched from ES, use LazyPagedSequence def es_pager(page, page_size): """ Fetch one page of normalized aipfile entries from Elasticsearch. :param page: 1-indexed page to fetch :param page_size: Number of entries on a page :return: List of dicts for each entry with additional information """ start = (page - 1) * page_size results = es_client.search( body=query, from_=start, size=page_size, index=index, doc_type=doc_type, fields=fields, sort=sort, ) if file_mode: return search_augment_file_results(es_client, results) else: return search_augment_aip_results(results, uuid_file_counts) count = es_client.count(index=index, doc_type=doc_type, body={'query': query['query']})['count'] results = LazyPagedSequence(es_pager, items_per_page, count) except ElasticsearchException: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') if not file_mode: aic_creation_form = forms.CreateAICForm(initial={'results': uuids}) else: # if file_mode aic_creation_form = None page_data = helpers.pager(results, items_per_page, current_page_number) return render(request, 'archival_storage/search.html', { 'file_mode': file_mode, 'show_aics': show_aics, 'checked_if_in_file_mode': checked_if_in_file_mode, 'aic_creation_form': aic_creation_form, 'results': page_data.object_list, 'search_params': search_params, 'page': page_data, } )
def search(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.archival_storage.views.search', query='', field='', type='' ) # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request(request) # set paging variables if not file_mode: items_per_page = 2 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) try: query=advanced_search.assemble_query(queries, ops, fields, types) # use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries) if not file_mode: results = conn.search_raw( query=query, indices='aips', type='aipfile', fields='uuid' ) else: results = conn.search_raw( query=query, indices='aips', type='aipfile', start=start - 1, size=items_per_page, fields='AIPUUID,filePath,FILEUUID' ) except: return HttpResponse('Error accessing index.') # take note of facet data aip_uuids = results['facets']['AIPUUID']['terms'] if not file_mode: number_of_results = len(aip_uuids) page_data = helpers.pager(aip_uuids, items_per_page, page + 1) aip_uuids = page_data['objects'] search_augment_aip_results(conn, aip_uuids) else: number_of_results = results.hits.total results = search_augment_file_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results ) # make sure results is set try: if results: pass except: results = False form = forms.StorageSearchForm(initial={'query': queries[0]}) return render(request, 'archival_storage/archival_storage_search.html', locals())
def search(request): # FIXME there has to be a better way of handling checkboxes than parsing # them by hand here, and displaying 'checked' in # _archival_storage_search_form.html # Parse checkbox for file mode yes_options = ("checked", "yes", "true", "on") if request.GET.get("filemode", "") in yes_options: file_mode = True checked_if_in_file_mode = "checked" items_per_page = 20 else: # AIP list file_mode = False checked_if_in_file_mode = "" items_per_page = 10 # Parse checkbox for show AICs show_aics = "" if request.GET.get("show_aics", "") in yes_options: show_aics = "checked" # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) logger.debug("Queries: %s, Ops: %s, Fields: %s, Types: %s", queries, ops, fields, types) # redirect if no search params have been set if "query" not in request.GET: return helpers.redirect_with_get_params( "components.archival_storage.views.search", query="", field="", type="") # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request( request) current_page_number = int(request.GET.get("page", 1)) # perform search es_client = elasticSearchFunctions.get_client() results = None query = advanced_search.assemble_query(queries, ops, fields, types) try: # Use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries). if not file_mode: # Fetch all unique AIP UUIDs in the returned set of files # ES query will limit to 10 aggregation results by default, # add size parameter in terms to override. # TODO: Use composite aggregation when it gets out of beta. query["aggs"] = { "aip_uuids": { "terms": { "field": "AIPUUID", "size": "10000" } } } # Don't return results, just the aggregation query["size"] = 0 # Searching for AIPs still actually searches type 'aipfile', and # returns the UUID of the AIP the files are a part of. To search # for an attribute of an AIP, the aipfile must index that # information about their AIP. results = es_client.search(body=query, index="aipfiles") # Given these AIP UUIDs, now fetch the actual information we want from aips/aip buckets = results["aggregations"]["aip_uuids"]["buckets"] uuids = [bucket["key"] for bucket in buckets] uuid_file_counts = { bucket["key"]: bucket["doc_count"] for bucket in buckets } query = {"query": {"terms": {"uuid": uuids}}} index = "aips" fields = ( "name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC,encrypted" ) sort = "name.raw:desc" else: index = "aipfiles" fields = "AIPUUID,filePath,FILEUUID,encrypted" sort = "sipName.raw:desc" # To reduce amount of data fetched from ES, use LazyPagedSequence def es_pager(page, page_size): """ Fetch one page of normalized aipfile entries from Elasticsearch. :param page: 1-indexed page to fetch :param page_size: Number of entries on a page :return: List of dicts for each entry with additional information """ start = (page - 1) * page_size results = es_client.search( body=query, from_=start, size=page_size, index=index, _source=fields, sort=sort, ) if file_mode: return search_augment_file_results(es_client, results) else: return search_augment_aip_results(results, uuid_file_counts) count = es_client.count(index=index, body={"query": query["query"]})["count"] results = LazyPagedSequence(es_pager, items_per_page, count) except ElasticsearchException: logger.exception("Error accessing index.") return HttpResponse("Error accessing index.") if not file_mode: aic_creation_form = forms.CreateAICForm(initial={"results": uuids}) else: # if file_mode aic_creation_form = None page_data = helpers.pager(results, items_per_page, current_page_number) return render( request, "archival_storage/search.html", { "file_mode": file_mode, "show_aics": show_aics, "checked_if_in_file_mode": checked_if_in_file_mode, "aic_creation_form": aic_creation_form, "results": page_data.object_list, "search_params": search_params, "page": page_data, }, )
def transfer_backlog(request, ui): """ AJAX endpoint to query for and return transfer backlog items. """ es_client = elasticSearchFunctions.get_client() # Get search parameters from request results = None # GET params in SIP arrange can control whether files in metadata/ and # logs/ are returned. Appraisal tab always hides these dirs and their files # (for now). backlog_filter = elasticSearchFunctions.BACKLOG_FILTER if ui == 'appraisal' or request.GET.get('hidemetadatalogs'): backlog_filter = elasticSearchFunctions.BACKLOG_FILTER_NO_MD_LOGS if 'query' not in request.GET: query = elasticSearchFunctions.MATCH_ALL_QUERY.copy() query['filter'] = backlog_filter else: queries, ops, fields, types = advanced_search.search_parameter_prep( request) try: query = advanced_search.assemble_query( es_client, queries, ops, fields, types, filters=backlog_filter, ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # perform search try: results = elasticSearchFunctions.search_all_results( es_client, body=query, index='transfers', doc_type='transferfile', ) except: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') # Convert results into a more workable form results = elasticSearchFunctions.augment_raw_search_results(results) # Convert to a form JS can use: # [{'name': <filename>, # 'properties': {'not_draggable': False}}, # {'name': <directory name>, # 'properties': {'not_draggable': True, 'object count': 3, 'display_string': '3 objects'}, # 'children': [ # {'name': <filename>, # 'properties': {'not_draggable': True}}, # {'name': <directory name>, # 'children': [...] # } # ] # }, # ] return_list = [] directory_map = {} # _es_results_to_directory_tree requires that paths MUST be sorted results.sort(key=lambda x: x['relative_path']) for path in results: # If a path is in SIPArrange.original_path, then it shouldn't be draggable not_draggable = False if models.SIPArrange.objects.filter( original_path__endswith=path['relative_path']).exists(): not_draggable = True if ui == 'legacy': _es_results_to_directory_tree(path['relative_path'], return_list, not_draggable=not_draggable) else: _es_results_to_appraisal_tab_format(path, directory_map, return_list, not_draggable=not_draggable) if ui == 'legacy': response = return_list else: response = { 'formats': [], # TODO populate this 'transfers': return_list, } # return JSON response return helpers.json_response(response)
def search(request): """ A JSON end point that returns results for various backlog transfers and their files. :param request: The Django request object :return: A JSON object including required metadata for the datatable and the backlog search results. """ # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) file_mode = request.GET.get('file_mode') == 'true' page_size = int(request.GET.get('iDisplayLength', 10)) start = int(request.GET.get('iDisplayStart', 0)) order_by = get_es_property_from_column_index( int(request.GET.get('iSortCol_0', 0)), file_mode) sort_direction = request.GET.get('sSortDir_0', 'asc') es_client = elasticSearchFunctions.get_client() if 'query' not in request.GET: queries, ops, fields, types = (['*'], ['or'], [''], ['term']) query = advanced_search.assemble_query( es_client, queries, ops, fields, types, search_index='transfers', doc_type='transferfile', filters={'term': { 'status': 'backlog' }}) try: if file_mode: doc_type = 'transferfile' source = 'filename,sipuuid,relative_path' else: # Transfer mode # Query to transfers/transferfile, but only fetch & aggregrate transfer UUIDs # Based on transfer UUIDs, query to transfers/transfer # ES query will limit to 10 aggregation results by default, add size parameter in terms to override # (https://stackoverflow.com/questions/22927098/show-all-elasticsearch-aggregation-results-buckets-and-not-just-10) query['aggs'] = { 'transfer_uuid': { 'terms': { 'field': 'sipuuid', 'size': '10000' } } } hits = es_client.search( index='transfers', doc_type='transferfile', body=query, size=0, # Don't return results, only aggregation ) uuids = [ x['key'] for x in hits['aggregations']['transfer_uuid']['buckets'] ] query['query'] = { 'terms': { 'uuid': uuids, }, } doc_type = 'transfer' source = 'name,uuid,file_count,ingest_date' hit_count = es_client.search(index='transfers', doc_type=doc_type, body=query, search_type='count')['hits']['total'] hits = es_client.search( index='transfers', doc_type=doc_type, body=query, from_=start, size=page_size, sort=order_by + ':' + sort_direction if order_by else '', _source=source, ) except Exception: err_desc = 'Error accessing transfers index' logger.exception(err_desc) return HttpResponse(err_desc) results = [x['_source'] for x in hits['hits']['hits']] return helpers.json_response({ 'iTotalRecords': hit_count, 'iTotalDisplayRecords': hit_count, 'sEcho': int(request.GET.get( 'sEcho', 0)), # It was recommended we convert sEcho to int to prevent XSS 'aaData': results, })
def search(request): """ A JSON end point that returns results for various backlog transfers and their files. :param request: The Django request object :return: A JSON object including required metadata for the datatable and the backlog search results. """ # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) file_mode = request.GET.get("file_mode") == "true" page_size = int(request.GET.get("iDisplayLength", 10)) start = int(request.GET.get("iDisplayStart", 0)) order_by = get_es_property_from_column_index( int(request.GET.get("iSortCol_0", 0)), file_mode) sort_direction = request.GET.get("sSortDir_0", "asc") es_client = elasticSearchFunctions.get_client() if "query" not in request.GET: queries, ops, fields, types = (["*"], ["or"], [""], ["term"]) query = advanced_search.assemble_query(queries, ops, fields, types, filters=[{ "term": { "status": "backlog" } }]) try: if file_mode: index = "transferfiles" source = "filename,sipuuid,relative_path" else: # Transfer mode: # Query to transferfile, but only fetch & aggregrate transfer UUIDs. # Based on transfer UUIDs, query to transfers. # ES query will limit to 10 aggregation results by default, # add size parameter in terms to override. # TODO: Use composite aggregation when it gets out of beta. query["aggs"] = { "transfer_uuid": { "terms": { "field": "sipuuid", "size": "10000" } } } hits = es_client.search( index="transferfiles", body=query, size=0, # Don't return results, only aggregation ) uuids = [ x["key"] for x in hits["aggregations"]["transfer_uuid"]["buckets"] ] # Recreate query to search over transfers query = {"query": {"terms": {"uuid": uuids}}} index = "transfers" source = "name,uuid,file_count,ingest_date" hits = es_client.search( index=index, body=query, from_=start, size=page_size, sort=order_by + ":" + sort_direction if order_by else "", _source=source, ) hit_count = hits["hits"]["total"] except Exception: err_desc = "Error accessing transfers index" logger.exception(err_desc) return HttpResponse(err_desc) results = [x["_source"] for x in hits["hits"]["hits"]] return helpers.json_response({ "iTotalRecords": hit_count, "iTotalDisplayRecords": hit_count, "sEcho": int(request.GET.get( "sEcho", 0)), # It was recommended we convert sEcho to int to prevent XSS "aaData": results, })