def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') userSchemas = request.GET.getlist('userSchemas[]') refinements = refinements_to_mongo(request.GET.getlist('refinements[]')) if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False except: keyword = '' schemas = [] userSchemas = [] refinements = {} onlySuggestions = True #We get all template versions for the given schemas #First, we take care of user defined schema templatesIDUser = Template.objects(title__in=userSchemas).distinct(field="id") templatesIDUser = [str(x) for x in templatesIDUser] #Take care of the rest, with versions templatesVersions = Template.objects(title__in=schemas).distinct(field="templateVersion") #We get all templates ID, for all versions allTemplatesIDCommon = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="versions") #We remove the removed version allTemplatesIDCommonRemoved = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions") templatesIDCommon = list(set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved)) templatesID = templatesIDUser + templatesIDCommon #We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id')] metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id") instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template('oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = xmltodict.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({'title':instanceResult['identifier'], 'content':xmltodictunparse(instanceResult['metadata']),'id':str(instanceResult['_id'])}) dom = toXML(str(xmltodictunparse(instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str(instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse(BytesIO(metadataFormat.template.ResultXsltList.content.encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False context = RequestContext(request, {'id':str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'schema_name': metadataFormat.metadataPrefix, 'registry_name': registriesName[instanceResult['registry']], 'oai_pmh': True}) resultString+= template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x +"\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall("\\b("+ wordList +")\\b", xmltodict.unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') mergedSchemas = [] for schema in schemas: t = json.loads(schema) mergedSchemas += t['oai-pmh'] if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False except: keyword = '' schemas = [] onlySuggestions = True mergedSchemas = [] instanceResults = OaiRecord.executeFullTextQuery(keyword, mergedSchemas) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} schemasName = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() schemasName[str(schemaId)] = obj listItems = [] xmltodictunparse = unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: schema = schemasName[str(instanceResult['metadataformat'])] if schema.ResultXsltList: listXslt = parse( BytesIO( schema.ResultXsltList.content.encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False item = { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'schema_name': schema.metadataPrefix, 'registry_name': registriesName[instanceResult['registry']] } listItems.append(item) context = RequestContext(request, {'list_results': listItems}) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json) if not onlySuggestions: template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') resultString += template.render(context)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] keyword = request.POST.get('keyword', '') schemas = request.POST.getlist('schemas[]', []) user_schemas = request.POST.getlist('userSchemas[]', []) refinements = refinements_to_mongo( json.loads(request.POST.get('refinements', '{}'))) registries = request.POST.getlist('registries[]', []) if 'onlySuggestions' in request.POST: onlySuggestions = json.loads(request.POST['onlySuggestions']) else: onlySuggestions = False metadata_format_ids = _get_metadata_formats_id(schemas=schemas, user_schemas=user_schemas, registries=registries) instanceResults = OaiRecord.executeFullTextQuery(keyword, metadata_format_ids, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) registriesURL = {} for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name registriesURL[str(registryId)] = obj.url listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = XMLdata.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str( instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse( BytesIO( metadataFormat.template.ResultXsltList.content. encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False registry_name = registriesName[instanceResult['registry']] if len(registry_name) > 30: registry_name = "{0}...".format(registry_name[:30]) url = urlparse(registriesURL[instanceResult['registry']]) context = RequestContext( request, { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'template_name': metadataFormat.template.title, 'registry_name': registry_name, 'registry_url': "{0}://{1}".format( url.scheme, url.netloc), 'oai_pmh': True }) resultString += template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", XMLdata.unparse( instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') userSchemas = request.GET.getlist('userSchemas[]') refinements = refinements_to_mongo( request.GET.getlist('refinements[]')) if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False except: keyword = '' schemas = [] userSchemas = [] refinements = {} onlySuggestions = True #We get all template versions for the given schemas #First, we take care of user defined schema templatesIDUser = Template.objects(title__in=userSchemas).distinct( field="id") templatesIDUser = [str(x) for x in templatesIDUser] #Take care of the rest, with versions templatesVersions = Template.objects(title__in=schemas).distinct( field="templateVersion") #We get all templates ID, for all versions allTemplatesIDCommon = TemplateVersion.objects( pk__in=templatesVersions, isDeleted=False).distinct(field="versions") #We remove the removed version allTemplatesIDCommonRemoved = TemplateVersion.objects( pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions") templatesIDCommon = list( set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved)) templatesID = templatesIDUser + templatesIDCommon #We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [ str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id') ] metadataFormatsID = OaiMetadataFormat.objects( template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id") instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = xmltodict.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str( instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse( BytesIO( metadataFormat.template.ResultXsltList.content. encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False context = RequestContext( request, { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'schema_name': metadataFormat.metadataPrefix, 'registry_name': registriesName[instanceResult['registry']], 'oai_pmh': True }) resultString += template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", xmltodict.unparse( instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)