示例#1
0
def document_json_response(document_id_modified, type):
    document_id = document_id_modified.replace('____', '-')
    conn = httplib.HTTPConnection(elasticSearchFunctions.getElasticsearchServerHostAndPort())
    conn.request("GET", "/aips/" + type + "/" + document_id)
    response = conn.getresponse()
    data = response.read()
    pretty_json = json.dumps(json.loads(data), sort_keys=True, indent=2)
    return HttpResponse(pretty_json, content_type='application/json')
示例#2
0
def document_json_response(document_id_modified, type):
    document_id = document_id_modified.replace('____', '-')
    conn = httplib.HTTPConnection(elasticSearchFunctions.getElasticsearchServerHostAndPort())
    conn.request("GET", "/aips/" + type + "/" + document_id)
    response = conn.getresponse()
    data = response.read()
    pretty_json = simplejson.dumps(simplejson.loads(data), sort_keys=True, indent=2)
    return HttpResponse(pretty_json, content_type='application/json')
def indexed_count(index, types=None, query=None):
    if types is not None:
        types = ','.join(types)
    try:
        conn = Elasticsearch(
            hosts=elasticSearchFunctions.getElasticsearchServerHostAndPort())
        return conn.count(index=index, doc_type=types, body=query)['count']
    except:
        return 0
示例#4
0
def indexed_count(index):
    aip_indexed_file_count = 0
    try:
        conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())
        count_data = conn.count(indices=index)
        aip_indexed_file_count = count_data.count
    except:
        pass
    return aip_indexed_file_count
示例#5
0
def create_aic(request, *args, **kwargs):
    aic_form = forms.CreateAICForm(request.POST or None)
    if aic_form.is_valid():
        aip_uuids = ast.literal_eval(aic_form.cleaned_data['results'])
        logger.info("AIC AIP UUIDs: {}".format(aip_uuids))

        # The form was passed a raw list of all AIP UUIDs mapping the user's query;
        # use those to fetch their names, which is used to produce files below.
        query = {
            "query": {
                "terms": {
                    "uuid": aip_uuids,
                }
            }
        }
        conn = Elasticsearch(hosts=elasticSearchFunctions.getElasticsearchServerHostAndPort())
        results = conn.search(
            body=query,
            index='aips',
            doc_type='aip',
            fields='uuid,name',
            size=elasticSearchFunctions.MAX_QUERY_SIZE,  # return all records
        )

        # Create files in staging directory with AIP information
        shared_dir = helpers.get_server_config_value('sharedDirectory')
        staging_dir = os.path.join(shared_dir, 'tmp')

        # Create SIP (AIC) directory in staging directory
        temp_uuid = str(uuid.uuid4())
        destination = os.path.join(staging_dir, temp_uuid)
        try:
            os.mkdir(destination)
            os.chmod(destination, 0o770)
        except os.error:
            messages.error(request, "Error creating AIC")
            logger.exception("Error creating AIC: Error creating directory {}".format(destination))
            return redirect('archival_storage_index')

        # Create SIP in DB
        mcp_destination = destination.replace(shared_dir, '%sharedPath%') + '/'
        databaseFunctions.createSIP(mcp_destination, UUID=temp_uuid, sip_type='AIC')

        # Create files with filename = AIP UUID, and contents = AIP name
        for aip in results['hits']['hits']:
            filepath = os.path.join(destination, aip['fields']['uuid'][0])
            with open(filepath, 'w') as f:
                os.chmod(filepath, 0o660)
                f.write(str(aip['fields']['name'][0]))

        return redirect('components.ingest.views.aic_metadata_add', temp_uuid)
    else:
        messages.error(request, "Error creating AIC")
        logger.error("Error creating AIC: Form not valid: {}".format(aic_form))
        return redirect('archival_storage_index')
示例#6
0
def archival_storage_file_json(request, document_id_modified):
    document_id = document_id_modified.replace('____', '-')
    conn = httplib.HTTPConnection(
        elasticSearchFunctions.getElasticsearchServerHostAndPort())
    conn.request("GET", "/aips/aipfile/" + document_id)
    response = conn.getresponse()
    data = response.read()
    pretty_json = simplejson.dumps(simplejson.loads(data),
                                   sort_keys=True,
                                   indent=2)
    return HttpResponse(pretty_json, content_type='application/json')
示例#7
0
def preservation_planning_fpr_search(request, current_page_number = None):
    if current_page_number == None:                
        current_page_number = 1

    query = request.GET.get('query', '')

    if query == '':
        # No query in the URL parameters list, try to see if we've got an existing query going from a previous page...
        query = request.session['fpr_query']
  
        # No query from a previous page either
        if query == '':
            query = '*'
            return HttpResponse('No query.')


    request.session['fpr_query'] = query # Save this for pagination...
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    indexes = conn.get_indices()

    if 'fpr_file' not in indexes:
        # Grab relevant FPR data from the DB
        results = get_fpr_table()
        request.session['fpr_results'] = results

        # Setup indexing for some Elastic Search action.
        for row in results:
            conn.index(row, 'fpr_file', 'fpr_files')
    else:
        results = request.session['fpr_results']
    
    # do fulltext search
    q = pyes.StringQuery(query)
    s = pyes.Search(q)

    try:
        results = conn.search_raw(s, size=len(results), indices='fpr_file')
    except:
        return HttpResponse('Error accessing index.')
    
    form = FPRSearchForm()

    search_hits = []

    for row in results.hits.hits:
        search_hits.append(row['_source'].copy())

    page = helpers.pager(search_hits, results_per_page, current_page_number)
    hit_count = len(search_hits) 
  
    return render(request, 'main/preservation_planning_fpr.html', locals())
示例#8
0
def preservation_planning_fpr_search(request, current_page_number=None):
    if current_page_number == None:
        current_page_number = 1

    query = request.GET.get("query", "")

    if query == "":
        # No query in the URL parameters list, try to see if we've got an existing query going from a previous page...
        query = request.session["fpr_query"]

        # No query from a previous page either
        if query == "":
            query = "*"
            return HttpResponse("No query.")

    request.session["fpr_query"] = query  # Save this for pagination...
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    indexes = conn.get_indices()

    if "fpr_file" not in indexes:
        # Grab relevant FPR data from the DB
        results = get_fpr_table()
        request.session["fpr_results"] = results

        # Setup indexing for some Elastic Search action.
        for row in results:
            conn.index(row, "fpr_file", "fpr_files")
    else:
        results = request.session["fpr_results"]

    # do fulltext search
    q = pyes.StringQuery(query)
    s = pyes.Search(q)

    try:
        results = conn.search_raw(s, size=len(results), indices="fpr_file")
    except:
        return HttpResponse("Error accessing index.")

    form = FPRSearchForm()

    search_hits = []

    for row in results.hits.hits:
        search_hits.append(row["_source"].copy())

    page = helpers.pager(search_hits, results_per_page, current_page_number)
    hit_count = len(search_hits)

    return render(request, "main/preservation_planning_fpr.html", locals())
示例#9
0
def search(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(
        request)

    # redirect if no search params have been set
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type='')

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(
        request)

    # set paging variables
    if not file_mode:
        items_per_page = 2
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    try:
        query = advanced_search.assemble_query(queries, ops, fields, types)

        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            results = conn.search_raw(query=query,
                                      indices='aips',
                                      type='aipfile',
                                      fields='uuid')
        else:
            results = conn.search_raw(query=query,
                                      indices='aips',
                                      type='aipfile',
                                      start=start - 1,
                                      size=items_per_page,
                                      fields='AIPUUID,filePath,FILEUUID')
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    aip_uuids = results['facets']['AIPUUID']['terms']

    if not file_mode:
        number_of_results = len(aip_uuids)

        page_data = helpers.pager(aip_uuids, items_per_page, page + 1)
        aip_uuids = page_data['objects']
        search_augment_aip_results(conn, aip_uuids)
    else:
        number_of_results = results.hits.total
        results = search_augment_file_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
        items_per_page, page, start, number_of_results)

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = forms.StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'archival_storage/archival_storage_search.html',
                  locals())
示例#10
0
def search(request):
    # FIXME there has to be a better way of handling checkboxes than parsing
    # them by hand here, and displaying 'checked' in
    # _archival_storage_search_form.html
    # Parse checkbox for file mode
    yes_options = ('checked', 'yes', 'true', 'on')
    if request.GET.get('filemode', '') in yes_options:
        file_mode = True
        checked_if_in_file_mode = 'checked'
        items_per_page = 20
    else:  # AIP list
        file_mode = False
        checked_if_in_file_mode = ''
        items_per_page = 10

    # Parse checkbox for show AICs
    show_aics = ''
    if request.GET.get('show_aics', '') in yes_options:
        show_aics = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)
    logger.debug('Queries: %s, Ops: %s, Fields: %s, Types: %s', queries, ops, fields, types)

    # redirect if no search params have been set
    if 'query' not in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    current_page_number = int(request.GET.get('page', 1))

    # perform search
    conn = Elasticsearch(hosts=elasticSearchFunctions.getElasticsearchServerHostAndPort())

    results = None
    query = advanced_search.assemble_query(queries, ops, fields, types, search_index='aips', doc_type='aipfile')
    try:
        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            # Fetch all unique AIP UUIDs in the returned set of files
            query['aggs'] = {'aip_uuids': {'terms': {'field': 'AIPUUID', 'size': 0}}}
            # Don't return results, just the aggregation
            query['size'] = 0
            # Searching for AIPs still actually searches type 'aipfile', and
            # returns the UUID of the AIP the files are a part of.  To search
            # for an attribute of an AIP, the aipfile must index that
            # information about their AIP in
            # elasticSearchFunctions.index_mets_file_metadata
            results = conn.search(
                body=query,
                index='aips',
                doc_type='aipfile',
                sort='sipName:desc',
            )
            # Given these AIP UUIDs, now fetch the actual information we want from aips/aip
            buckets = results['aggregations']['aip_uuids']['buckets']
            uuids = [bucket['key'] for bucket in buckets]
            uuid_file_counts = {bucket['key']: bucket['doc_count'] for bucket in buckets}
            query = {
                'query': {
                    'terms': {
                        'uuid': uuids,
                    },
                },
            }
            index = 'aips'
            doc_type = 'aip'
            fields = 'name,uuid,size,created,status,AICID,isPartOf,countAIPsinAIC'
            sort = 'name:desc'
        else:
            index = 'aips'
            doc_type = 'aipfile'
            fields = 'AIPUUID,filePath,FILEUUID'
            sort = 'sipName:desc'

        # To reduce amount of data fetched from ES, use LazyPagedSequence
        def es_pager(page, page_size):
            """
            Fetch one page of normalized aipfile entries from Elasticsearch.

            :param page: 1-indexed page to fetch
            :param page_size: Number of entries on a page
            :return: List of dicts for each entry with additional information
            """
            start = (page - 1) * page_size
            results = conn.search(
                body=query,
                from_=start,
                size=page_size,
                index=index,
                doc_type=doc_type,
                fields=fields,
                sort=sort,
            )
            if file_mode:
                return search_augment_file_results(results)
            else:
                return search_augment_aip_results(results, uuid_file_counts)
        count = conn.count(index=index, doc_type=doc_type, body={'query': query['query']})['count']
        results = LazyPagedSequence(es_pager, items_per_page, count)

    except ElasticsearchException:
        logger.exception('Error accessing index.')
        return HttpResponse('Error accessing index.')

    if not file_mode:
        aic_creation_form = forms.CreateAICForm(initial={'results': uuids})
    else:  # if file_mode
        aic_creation_form = None

    page_data = helpers.pager(results, items_per_page, current_page_number)

    return render(request, 'archival_storage/archival_storage_search.html',
        {
            'file_mode': file_mode,
            'show_aics': show_aics,
            'checked_if_in_file_mode': checked_if_in_file_mode,
            'aic_creation_form': aic_creation_form,
            'results': page_data.object_list,
            'search_params': search_params,
            'page': page_data,
        }
    )
示例#11
0
def search(request):
    # deal with transfer mode
    file_mode = False
    checked_if_in_file_mode = ''
    if request.GET.get('mode', '') != '':
        file_mode = True
        checked_if_in_file_mode = 'checked'

    # get search parameters from request
    queries, ops, fields, types = advanced_search.search_parameter_prep(request)

    # redirect if no search params have been set
    if not 'query' in request.GET:
        return helpers.redirect_with_get_params(
            'components.archival_storage.views.search',
            query='',
            field='',
            type=''
        )

    # get string of URL parameters that should be passed along when paging
    search_params = advanced_search.extract_url_search_params_from_request(request)

    # set paging variables
    if not file_mode:
        items_per_page = 2
    else:
        items_per_page = 20

    page = advanced_search.extract_page_number_from_url(request)

    start = page * items_per_page + 1

    # perform search
    conn = pyes.ES(elasticSearchFunctions.getElasticsearchServerHostAndPort())

    try:
        query=advanced_search.assemble_query(queries, ops, fields, types)

        # use all results to pull transfer facets if not in file mode
        # pulling only one field (we don't need field data as we augment
        # the results using separate queries)
        if not file_mode:
            results = conn.search_raw(
                query=query,
                indices='aips',
                type='aipfile',
                fields='uuid'
            )
        else:
            results = conn.search_raw(
                query=query,
                indices='aips',
                type='aipfile',
                start=start - 1,
                size=items_per_page,
                fields='AIPUUID,filePath,FILEUUID'
            )
    except:
        return HttpResponse('Error accessing index.')

    # take note of facet data
    aip_uuids = results['facets']['AIPUUID']['terms']

    if not file_mode:
        number_of_results = len(aip_uuids)

        page_data = helpers.pager(aip_uuids, items_per_page, page + 1)
        aip_uuids = page_data['objects']
        search_augment_aip_results(conn, aip_uuids)
    else:
        number_of_results = results.hits.total
        results = search_augment_file_results(results)

    # set remaining paging variables
    end, previous_page, next_page = advanced_search.paging_related_values_for_template_use(
       items_per_page,
       page,
       start,
       number_of_results
    )

    # make sure results is set
    try:
        if results:
            pass
    except:
        results = False

    form = forms.StorageSearchForm(initial={'query': queries[0]})
    return render(request, 'archival_storage/archival_storage_search.html', locals())
import sys

os.environ['DJANGO_SETTINGS_MODULE'] = 'settings.common'
sys.path.append("/usr/lib/archivematica/archivematicaCommon")
from elasticSearchFunctions import getElasticsearchServerHostAndPort

from elasticsearch import Elasticsearch, ConnectionError, TransportError

# allow "-f" to override prompt
options = sys.argv[1:]
if len(sys.argv) < 2 or not '-f' in options:
    proceed = raw_input(
        "Are you sure you want to erase the ElasticSearch indexes? (y/N)\n")
    if proceed.lower() != 'y':
        print 'Not going to erase the indexes.'
        sys.exit(0)

conn = Elasticsearch(hosts=getElasticsearchServerHostAndPort())
try:
    conn.info()
except (ConnectionError, TransportError):
    print "Connection error: Elasticsearch may not be running."
    sys.exit(1)

# delete transfers ElasticSearch index
# Ignore 404, in case the index is missing (e.g. already deleted)
conn.indices.delete('transfers', ignore=404)
conn.indices.delete('aips', ignore=404)

print "ElasticSearch indexes deleted."