def document_json_response(document_id_modified, type): document_id = document_id_modified.replace('____', '-') conn = httplib.HTTPConnection(elasticSearchFunctions.getElasticsearchServerHostAndPort()) conn.request("GET", "/aips/" + type + "/" + document_id) response = conn.getresponse() data = response.read() pretty_json = json.dumps(json.loads(data), sort_keys=True, indent=2) return HttpResponse(pretty_json, content_type='application/json')
def document_json_response(document_id_modified, type): document_id = document_id_modified.replace('____', '-') conn = httplib.HTTPConnection(elasticSearchFunctions.getElasticsearchServerHostAndPort()) conn.request("GET", "/aips/" + type + "/" + document_id) response = conn.getresponse() data = response.read() pretty_json = simplejson.dumps(simplejson.loads(data), sort_keys=True, indent=2) return HttpResponse(pretty_json, content_type='application/json')
def indexed_count(index, types=None, query=None): if types is not None: types = ','.join(types) try: conn = Elasticsearch( hosts=elasticSearchFunctions.getElasticsearchServerHostAndPort()) return conn.count(index=index, doc_type=types, body=query)['count'] except: return 0
def indexed_count(index): aip_indexed_file_count = 0 try: conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) count_data = conn.count(indices=index) aip_indexed_file_count = count_data.count except: pass return aip_indexed_file_count
def create_aic(request, *args, **kwargs): aic_form = forms.CreateAICForm(request.POST or None) if aic_form.is_valid(): aip_uuids = ast.literal_eval(aic_form.cleaned_data['results']) logger.info("AIC AIP UUIDs: {}".format(aip_uuids)) # The form was passed a raw list of all AIP UUIDs mapping the user's query; # use those to fetch their names, which is used to produce files below. query = { "query": { "terms": { "uuid": aip_uuids, } } } conn = Elasticsearch(hosts=elasticSearchFunctions.getElasticsearchServerHostAndPort()) results = conn.search( body=query, index='aips', doc_type='aip', fields='uuid,name', size=elasticSearchFunctions.MAX_QUERY_SIZE, # return all records ) # Create files in staging directory with AIP information shared_dir = helpers.get_server_config_value('sharedDirectory') staging_dir = os.path.join(shared_dir, 'tmp') # Create SIP (AIC) directory in staging directory temp_uuid = str(uuid.uuid4()) destination = os.path.join(staging_dir, temp_uuid) try: os.mkdir(destination) os.chmod(destination, 0o770) except os.error: messages.error(request, "Error creating AIC") logger.exception("Error creating AIC: Error creating directory {}".format(destination)) return redirect('archival_storage_index') # Create SIP in DB mcp_destination = destination.replace(shared_dir, '%sharedPath%') + '/' databaseFunctions.createSIP(mcp_destination, UUID=temp_uuid, sip_type='AIC') # Create files with filename = AIP UUID, and contents = AIP name for aip in results['hits']['hits']: filepath = os.path.join(destination, aip['fields']['uuid'][0]) with open(filepath, 'w') as f: os.chmod(filepath, 0o660) f.write(str(aip['fields']['name'][0])) return redirect('components.ingest.views.aic_metadata_add', temp_uuid) else: messages.error(request, "Error creating AIC") logger.error("Error creating AIC: Form not valid: {}".format(aic_form)) return redirect('archival_storage_index')
def archival_storage_file_json(request, document_id_modified): document_id = document_id_modified.replace('____', '-') conn = httplib.HTTPConnection( elasticSearchFunctions.getElasticsearchServerHostAndPort()) conn.request("GET", "/aips/aipfile/" + document_id) response = conn.getresponse() data = response.read() pretty_json = simplejson.dumps(simplejson.loads(data), sort_keys=True, indent=2) return HttpResponse(pretty_json, content_type='application/json')
def preservation_planning_fpr_search(request, current_page_number = None): if current_page_number == None: current_page_number = 1 query = request.GET.get('query', '') if query == '': # No query in the URL parameters list, try to see if we've got an existing query going from a previous page... query = request.session['fpr_query'] # No query from a previous page either if query == '': query = '*' return HttpResponse('No query.') request.session['fpr_query'] = query # Save this for pagination... conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) indexes = conn.get_indices() if 'fpr_file' not in indexes: # Grab relevant FPR data from the DB results = get_fpr_table() request.session['fpr_results'] = results # Setup indexing for some Elastic Search action. for row in results: conn.index(row, 'fpr_file', 'fpr_files') else: results = request.session['fpr_results'] # do fulltext search q = pyes.StringQuery(query) s = pyes.Search(q) try: results = conn.search_raw(s, size=len(results), indices='fpr_file') except: return HttpResponse('Error accessing index.') form = FPRSearchForm() search_hits = [] for row in results.hits.hits: search_hits.append(row['_source'].copy()) page = helpers.pager(search_hits, results_per_page, current_page_number) hit_count = len(search_hits) return render(request, 'main/preservation_planning_fpr.html', locals())
def preservation_planning_fpr_search(request, current_page_number=None): if current_page_number == None: current_page_number = 1 query = request.GET.get("query", "") if query == "": # No query in the URL parameters list, try to see if we've got an existing query going from a previous page... query = request.session["fpr_query"] # No query from a previous page either if query == "": query = "*" return HttpResponse("No query.") request.session["fpr_query"] = query # Save this for pagination... conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) indexes = conn.get_indices() if "fpr_file" not in indexes: # Grab relevant FPR data from the DB results = get_fpr_table() request.session["fpr_results"] = results # Setup indexing for some Elastic Search action. for row in results: conn.index(row, "fpr_file", "fpr_files") else: results = request.session["fpr_results"] # do fulltext search q = pyes.StringQuery(query) s = pyes.Search(q) try: results = conn.search_raw(s, size=len(results), indices="fpr_file") except: return HttpResponse("Error accessing index.") form = FPRSearchForm() search_hits = [] for row in results.hits.hits: search_hits.append(row["_source"].copy()) page = helpers.pager(search_hits, results_per_page, current_page_number) hit_count = len(search_hits) return render(request, "main/preservation_planning_fpr.html", locals())
def search(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep( request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.archival_storage.views.search', query='', field='', type='') # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request( request) # set paging variables if not file_mode: items_per_page = 2 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) try: query = advanced_search.assemble_query(queries, ops, fields, types) # use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries) if not file_mode: results = conn.search_raw(query=query, indices='aips', type='aipfile', fields='uuid') else: results = conn.search_raw(query=query, indices='aips', type='aipfile', start=start - 1, size=items_per_page, fields='AIPUUID,filePath,FILEUUID') except: return HttpResponse('Error accessing index.') # take note of facet data aip_uuids = results['facets']['AIPUUID']['terms'] if not file_mode: number_of_results = len(aip_uuids) page_data = helpers.pager(aip_uuids, items_per_page, page + 1) aip_uuids = page_data['objects'] search_augment_aip_results(conn, aip_uuids) else: number_of_results = results.hits.total results = search_augment_file_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results) # make sure results is set try: if results: pass except: results = False form = forms.StorageSearchForm(initial={'query': queries[0]}) return render(request, 'archival_storage/archival_storage_search.html', locals())
def search(request): # FIXME there has to be a better way of handling checkboxes than parsing # them by hand here, and displaying 'checked' in # _archival_storage_search_form.html # Parse checkbox for file mode yes_options = ('checked', 'yes', 'true', 'on') if request.GET.get('filemode', '') in yes_options: file_mode = True checked_if_in_file_mode = 'checked' items_per_page = 20 else: # AIP list file_mode = False checked_if_in_file_mode = '' items_per_page = 10 # Parse checkbox for show AICs show_aics = '' if request.GET.get('show_aics', '') in yes_options: show_aics = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) logger.debug('Queries: %s, Ops: %s, Fields: %s, Types: %s', queries, ops, fields, types) # redirect if no search params have been set if 'query' not in request.GET: return helpers.redirect_with_get_params( 'components.archival_storage.views.search', query='', field='', type='' ) # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request(request) current_page_number = int(request.GET.get('page', 1)) # perform search conn = Elasticsearch(hosts=elasticSearchFunctions.getElasticsearchServerHostAndPort()) results = None query = advanced_search.assemble_query(queries, ops, fields, types, search_index='aips', doc_type='aipfile') try: # use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries) if not file_mode: # Fetch all unique AIP UUIDs in the returned set of files query['aggs'] = {'aip_uuids': {'terms': {'field': 'AIPUUID', 'size': 0}}} # Don't return results, just the aggregation query['size'] = 0 # Searching for AIPs still actually searches type 'aipfile', and # returns the UUID of the AIP the files are a part of. To search # for an attribute of an AIP, the aipfile must index that # information about their AIP in # elasticSearchFunctions.index_mets_file_metadata results = conn.search( body=query, index='aips', doc_type='aipfile', sort='sipName:desc', ) # Given these AIP UUIDs, now fetch the actual information we want from aips/aip buckets = results['aggregations']['aip_uuids']['buckets'] uuids = [bucket['key'] for bucket in buckets] uuid_file_counts = {bucket['key']: bucket['doc_count'] for bucket in buckets} query = { 'query': { 'terms': { 'uuid': uuids, }, }, } index = 'aips' doc_type = 'aip' fields = 'name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC' sort = 'name:desc' else: index = 'aips' doc_type = 'aipfile' fields = 'AIPUUID,filePath,FILEUUID' sort = 'sipName:desc' # To reduce amount of data fetched from ES, use LazyPagedSequence def es_pager(page, page_size): """ Fetch one page of normalized aipfile entries from Elasticsearch. :param page: 1-indexed page to fetch :param page_size: Number of entries on a page :return: List of dicts for each entry with additional information """ start = (page - 1) * page_size results = conn.search( body=query, from_=start, size=page_size, index=index, doc_type=doc_type, fields=fields, sort=sort, ) if file_mode: return search_augment_file_results(results) else: return search_augment_aip_results(results, uuid_file_counts) count = conn.count(index=index, doc_type=doc_type, body={'query': query['query']})['count'] results = LazyPagedSequence(es_pager, items_per_page, count) except ElasticsearchException: logger.exception('Error accessing index.') return HttpResponse('Error accessing index.') if not file_mode: aic_creation_form = forms.CreateAICForm(initial={'results': uuids}) else: # if file_mode aic_creation_form = None page_data = helpers.pager(results, items_per_page, current_page_number) return render(request, 'archival_storage/archival_storage_search.html', { 'file_mode': file_mode, 'show_aics': show_aics, 'checked_if_in_file_mode': checked_if_in_file_mode, 'aic_creation_form': aic_creation_form, 'results': page_data.object_list, 'search_params': search_params, 'page': page_data, } )
def search(request): # deal with transfer mode file_mode = False checked_if_in_file_mode = '' if request.GET.get('mode', '') != '': file_mode = True checked_if_in_file_mode = 'checked' # get search parameters from request queries, ops, fields, types = advanced_search.search_parameter_prep(request) # redirect if no search params have been set if not 'query' in request.GET: return helpers.redirect_with_get_params( 'components.archival_storage.views.search', query='', field='', type='' ) # get string of URL parameters that should be passed along when paging search_params = advanced_search.extract_url_search_params_from_request(request) # set paging variables if not file_mode: items_per_page = 2 else: items_per_page = 20 page = advanced_search.extract_page_number_from_url(request) start = page * items_per_page + 1 # perform search conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort()) try: query=advanced_search.assemble_query(queries, ops, fields, types) # use all results to pull transfer facets if not in file mode # pulling only one field (we don't need field data as we augment # the results using separate queries) if not file_mode: results = conn.search_raw( query=query, indices='aips', type='aipfile', fields='uuid' ) else: results = conn.search_raw( query=query, indices='aips', type='aipfile', start=start - 1, size=items_per_page, fields='AIPUUID,filePath,FILEUUID' ) except: return HttpResponse('Error accessing index.') # take note of facet data aip_uuids = results['facets']['AIPUUID']['terms'] if not file_mode: number_of_results = len(aip_uuids) page_data = helpers.pager(aip_uuids, items_per_page, page + 1) aip_uuids = page_data['objects'] search_augment_aip_results(conn, aip_uuids) else: number_of_results = results.hits.total results = search_augment_file_results(results) # set remaining paging variables end, previous_page, next_page = advanced_search.paging_related_values_for_template_use( items_per_page, page, start, number_of_results ) # make sure results is set try: if results: pass except: results = False form = forms.StorageSearchForm(initial={'query': queries[0]}) return render(request, 'archival_storage/archival_storage_search.html', locals())
import sys os.environ['DJANGO_SETTINGS_MODULE'] = 'settings.common' sys.path.append("/usr/lib/archivematica/archivematicaCommon") from elasticSearchFunctions import getElasticsearchServerHostAndPort from elasticsearch import Elasticsearch, ConnectionError, TransportError # allow "-f" to override prompt options = sys.argv[1:] if len(sys.argv) < 2 or not '-f' in options: proceed = raw_input( "Are you sure you want to erase the ElasticSearch indexes? (y/N)\n") if proceed.lower() != 'y': print 'Not going to erase the indexes.' sys.exit(0) conn = Elasticsearch(hosts=getElasticsearchServerHostAndPort()) try: conn.info() except (ConnectionError, TransportError): print "Connection error: Elasticsearch may not be running." sys.exit(1) # delete transfers ElasticSearch index # Ignore 404, in case the index is missing (e.g. already deleted) conn.indices.delete('transfers', ignore=404) conn.indices.delete('aips', ignore=404) print "ElasticSearch indexes deleted."