def _get_metadata_formats_id(schemas, user_schemas, registries): # We get all template versions for the given schemas # First, we take care of user defined schema templates_id_user = Template.objects(title__in=user_schemas).distinct( field="id") templates_id_user = [str(x) for x in templates_id_user] # Take care of the rest, with versions templates_versions = Template.objects(title__in=schemas).distinct( field="templateVersion") # We get all templates ID, for all versions all_templates_id_common = TemplateVersion.objects(pk__in=templates_versions, isDeleted=False)\ .distinct(field="versions") # We remove the removed version all_templates_id_common_removed = TemplateVersion.objects(pk__in=templates_versions, isDeleted=False)\ .distinct( field="deletedVersions") templates_id_common = list( set(all_templates_id_common) - set(all_templates_id_common_removed)) templates_id = templates_id_user + templates_id_common if len(registries) == 0: # We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [ str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id') ] metadataFormatsID = OaiMetadataFormat.objects( template__in=templates_id, registry__not__in=deactivatedRegistries).distinct(field="id") else: # We retrieve registries from the refinement metadataFormatsID = OaiMetadataFormat.objects( template__in=templates_id, registry__in=registries).distinct(field="id") return metadataFormatsID
def get_metadata_formats_detail(request): template = loader.get_template("oai_pmh/explore/explore_metadata_formats_detail.html") try: # Get metadata formats infos = json.loads(request.GET["metadataFormats"]) metadataFormats = infos["oai-pmh"] if "local" in infos: localTemplate = infos["local"] else: localTemplate = None except: metadataFormats = [] localTemplate = None list_metadata_formats = OaiMetadataFormat.objects(pk__in=metadataFormats).all() list_metadata_formats_info = [] for metadataFormat in list_metadata_formats: item = { "registry": OaiRegistry.objects(isDeactivated=False).only("name").get(pk=metadataFormat.registry).name, "metadataPrefix": metadataFormat.metadataPrefix, "schema": metadataFormat.schema, } list_metadata_formats_info.append(item) context = RequestContext( request, {"list_metadata_formats_info": list_metadata_formats_info, "local": localTemplate} ) return HttpResponse(json.dumps(template.render(context)), content_type="application/javascript")
def all_metadataprefix(request, registry): prefix = [] #Get all metadataprefix information metadataformats = OaiMetadataFormat.objects(registry=registry).order_by("metadataPrefix") for format in metadataformats: prefix.append(format.metadataPrefix) return HttpResponse(json.dumps(prefix), content_type="application/javascript")
def get_metadata_formats_detail(request): template = loader.get_template('oai_pmh/explore/explore_metadata_formats_detail.html') try: #Get metadata formats infos = json.loads(request.GET['metadataFormats']) metadataFormats = infos['oai-pmh'] if 'local' in infos: localTemplate = infos['local'] else: localTemplate = None except: metadataFormats = [] localTemplate = None list_metadata_formats = OaiMetadataFormat.objects(pk__in=metadataFormats).all() list_metadata_formats_info = [] for metadataFormat in list_metadata_formats: item = { 'registry' : OaiRegistry.objects(isDeactivated=False).only('name').get(pk=metadataFormat.registry).name, 'metadataPrefix' : metadataFormat.metadataPrefix, 'schema' : metadataFormat.schema, } list_metadata_formats_info.append(item) context = RequestContext(request, { 'list_metadata_formats_info': list_metadata_formats_info, 'local' : localTemplate }) return HttpResponse(json.dumps(template.render(context)), content_type='application/javascript')
def all_metadataprefix(request, registry): prefix = [] #Get all metadataprefix information metadataformats = OaiMetadataFormat.objects( registry=registry).order_by("metadataPrefix") for format in metadataformats: prefix.append(format.metadataPrefix) return HttpResponse(json.dumps(prefix), content_type="application/javascript")
def oai_pmh_detail_registry(request): result_id = request.GET['id'] template = loader.get_template('oai_pmh/admin/oai_pmh_detail_registry.html') context = RequestContext(request, { 'registry': OaiRegistry.objects.get(pk=result_id), 'metadataformats': OaiMetadataFormat.objects(registry=result_id), 'sets': OaiSet.objects(registry=result_id), 'nbRecords': OaiRecord.objects(registry=result_id).count(), }) return HttpResponse(template.render(context))
def __init__(self, *args, **kwargs): if 'id' in kwargs: registryId = kwargs.pop('id') metadataFormats = OaiMetadataFormat.objects(registry=str(registryId)).all() sets = OaiSet.objects(registry=str(registryId)).all() super(SettingHarvestForm, self).__init__(*args, **kwargs) self.fields['id'].initial = registryId self.fields['metadataFormats'].initial = [mf.id for mf in metadataFormats if mf.harvest] self.fields['metadataFormats'].queryset = [] self.fields['metadataFormats'].queryset = metadataFormats self.fields['sets'].initial = [set.id for set in sets if set.harvest] self.fields['sets'].queryset = [] self.fields['sets'].queryset = sets
def __init__(self, listRegistriesId=[]): self.SCHEMAS_OPTIONS = [] #Retrieve registries name registriesName = {} for registryId in listRegistriesId: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name #We retrieve all common schemas schemas = OaiMetadataFormat.objects(registry__in=listRegistriesId).order_by('metadataPrefix') groups = [] for k, g in groupby(schemas, lambda x: x.hash): groups.append(list(g)) # Store group iterator as a list #For each group for group in groups: #Get metadata prefix name = group[0].metadataPrefix #Get template name template = group[0].template listValues = [] for elt in group: listValues.append((str(elt.id))) #Provide information about the number of registries using this MF if len(listValues) == 1: name = format_html(name + "<br> (in 1 Registry)") else: name = format_html(name + "<br> (in %s Registries)" % len(listValues)) #If it's linked to a template if template != None: name += format_html(" <text class='local'> + Local </text>") template = Template.objects.only('id', 'title').get(pk=template.id) t = json.dumps({'oai-pmh': listValues, 'local': template.title}) else: t = json.dumps({'oai-pmh': listValues}) self.SCHEMAS_OPTIONS.append((( t , name))) super(MetadataFormatsForm, self).__init__() self.fields['my_schemas'].choices = [] self.fields['my_schemas'].choices = self.SCHEMAS_OPTIONS self.my_schemas_nb = len(self.SCHEMAS_OPTIONS)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') userSchemas = request.GET.getlist('userSchemas[]') refinements = refinements_to_mongo(request.GET.getlist('refinements[]')) if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False except: keyword = '' schemas = [] userSchemas = [] refinements = {} onlySuggestions = True #We get all template versions for the given schemas #First, we take care of user defined schema templatesIDUser = Template.objects(title__in=userSchemas).distinct(field="id") templatesIDUser = [str(x) for x in templatesIDUser] #Take care of the rest, with versions templatesVersions = Template.objects(title__in=schemas).distinct(field="templateVersion") #We get all templates ID, for all versions allTemplatesIDCommon = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="versions") #We remove the removed version allTemplatesIDCommonRemoved = TemplateVersion.objects(pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions") templatesIDCommon = list(set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved)) templatesID = templatesIDUser + templatesIDCommon #We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id')] metadataFormatsID = OaiMetadataFormat.objects(template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id") instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template('oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = xmltodict.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({'title':instanceResult['identifier'], 'content':xmltodictunparse(instanceResult['metadata']),'id':str(instanceResult['_id'])}) dom = toXML(str(xmltodictunparse(instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str(instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse(BytesIO(metadataFormat.template.ResultXsltList.content.encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False context = RequestContext(request, {'id':str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'schema_name': metadataFormat.metadataPrefix, 'registry_name': registriesName[instanceResult['registry']], 'oai_pmh': True}) resultString+= template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x +"\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall("\\b("+ wordList +")\\b", xmltodict.unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
def dump_oai_metadata_format(self): self.assertEquals(len(OaiMetadataFormat.objects()), 0) self.restoreDump(join(DUMP_OAI_PMH_TEST_PATH, 'oai_metadata_format.bson'), 'oai_metadata_format') self.assertTrue(len(OaiMetadataFormat.objects()) > 0)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] keyword = request.POST.get('keyword', '') schemas = request.POST.getlist('schemas[]', []) user_schemas = request.POST.getlist('userSchemas[]', []) refinements = refinements_to_mongo( json.loads(request.POST.get('refinements', '{}'))) registries = request.POST.getlist('registries[]', []) if 'onlySuggestions' in request.POST: onlySuggestions = json.loads(request.POST['onlySuggestions']) else: onlySuggestions = False metadata_format_ids = _get_metadata_formats_id(schemas=schemas, user_schemas=user_schemas, registries=registries) instanceResults = OaiRecord.executeFullTextQuery(keyword, metadata_format_ids, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) registriesURL = {} for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name registriesURL[str(registryId)] = obj.url listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = XMLdata.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str( instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse( BytesIO( metadataFormat.template.ResultXsltList.content. encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False registry_name = registriesName[instanceResult['registry']] if len(registry_name) > 30: registry_name = "{0}...".format(registry_name[:30]) url = urlparse(registriesURL[instanceResult['registry']]) context = RequestContext( request, { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'template_name': metadataFormat.template.title, 'registry_name': registry_name, 'registry_url': "{0}://{1}".format( url.scheme, url.netloc), 'oai_pmh': True }) resultString += template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", XMLdata.unparse( instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)
def dump_oai_metadata_format(self): self.assertEquals(len(OaiMetadataFormat.objects()), 0) self.restoreDump( join(DUMP_OAI_PMH_TEST_PATH, 'oai_metadata_format.bson'), 'oai_metadata_format') self.assertTrue(len(OaiMetadataFormat.objects()) > 0)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') mergedSchemas = [] for schema in schemas: t = json.loads(schema) mergedSchemas += t['oai-pmh'] if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False except: keyword = '' schemas = [] onlySuggestions = True mergedSchemas = [] instanceResults = OaiRecord.executeFullTextQuery(keyword, mergedSchemas) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} schemasName = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() schemasName[str(schemaId)] = obj listItems = [] xmltodictunparse = unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: schema = schemasName[str(instanceResult['metadataformat'])] if schema.ResultXsltList: listXslt = parse( BytesIO( schema.ResultXsltList.content.encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False item = { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'schema_name': schema.metadataPrefix, 'registry_name': registriesName[instanceResult['registry']] } listItems.append(item) context = RequestContext(request, {'list_results': listItems}) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", unparse(instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json) if not onlySuggestions: template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') resultString += template.render(context)
def get_results_by_instance_keyword(request): print 'BEGIN def getResultsKeyword(request)' resultsByKeyword = [] results = [] resultString = "" #Instance json_instances = [] if 'HTTPS' in request.META['SERVER_PROTOCOL']: protocol = "https" else: protocol = "http" instance = Instance(name="Local", protocol=protocol, address=request.META['REMOTE_ADDR'], port=request.META['SERVER_PORT'], access_token="token", refresh_token="token") json_instances.append(instance.to_json()) request.session['instancesExplore'] = json_instances sessionName = "resultsExploreOaiPMh" + instance['name'] try: keyword = request.GET['keyword'] schemas = request.GET.getlist('schemas[]') userSchemas = request.GET.getlist('userSchemas[]') refinements = refinements_to_mongo( request.GET.getlist('refinements[]')) if 'onlySuggestions' in request.GET: onlySuggestions = json.loads(request.GET['onlySuggestions']) else: onlySuggestions = False except: keyword = '' schemas = [] userSchemas = [] refinements = {} onlySuggestions = True #We get all template versions for the given schemas #First, we take care of user defined schema templatesIDUser = Template.objects(title__in=userSchemas).distinct( field="id") templatesIDUser = [str(x) for x in templatesIDUser] #Take care of the rest, with versions templatesVersions = Template.objects(title__in=schemas).distinct( field="templateVersion") #We get all templates ID, for all versions allTemplatesIDCommon = TemplateVersion.objects( pk__in=templatesVersions, isDeleted=False).distinct(field="versions") #We remove the removed version allTemplatesIDCommonRemoved = TemplateVersion.objects( pk__in=templatesVersions, isDeleted=False).distinct(field="deletedVersions") templatesIDCommon = list( set(allTemplatesIDCommon) - set(allTemplatesIDCommonRemoved)) templatesID = templatesIDUser + templatesIDCommon #We retrieve deactivated registries so as not to get their metadata formats deactivatedRegistries = [ str(x.id) for x in OaiRegistry.objects(isDeactivated=True).order_by('id') ] metadataFormatsID = OaiMetadataFormat.objects( template__in=templatesID, registry__not__in=deactivatedRegistries).distinct(field="id") instanceResults = OaiRecord.executeFullTextQuery(keyword, metadataFormatsID, refinements) if len(instanceResults) > 0: if not onlySuggestions: xsltPath = os.path.join(settings.SITE_ROOT, 'static/resources/xsl/xml2html.xsl') xslt = etree.parse(xsltPath) transform = etree.XSLT(xslt) template = loader.get_template( 'oai_pmh/explore/explore_result_keyword.html') #Retrieve schema and registries. Avoid to retrieve the information for each result registriesName = {} objMetadataFormats = {} listRegistriesID = set([x['registry'] for x in instanceResults]) for registryId in listRegistriesID: obj = OaiRegistry.objects(pk=registryId).get() registriesName[str(registryId)] = obj.name listSchemaId = set([x['metadataformat'] for x in instanceResults]) for schemaId in listSchemaId: obj = OaiMetadataFormat.objects(pk=schemaId).get() objMetadataFormats[str(schemaId)] = obj listItems = [] xmltodictunparse = xmltodict.unparse appendResult = results.append toXML = etree.XML parse = etree.parse XSLT = etree.XSLT if not onlySuggestions: for instanceResult in instanceResults: custom_xslt = False appendResult({ 'title': instanceResult['identifier'], 'content': xmltodictunparse(instanceResult['metadata']), 'id': str(instanceResult['_id']) }) dom = toXML( str( xmltodictunparse( instanceResult['metadata']).encode('utf-8'))) #Check if a custom list result XSLT has to be used try: metadataFormat = objMetadataFormats[str( instanceResult['metadataformat'])] if metadataFormat.template.ResultXsltList: listXslt = parse( BytesIO( metadataFormat.template.ResultXsltList.content. encode('utf-8'))) listTransform = XSLT(listXslt) newdom = listTransform(dom) custom_xslt = True else: newdom = transform(dom) except Exception, e: #We use the default one newdom = transform(dom) custom_xslt = False context = RequestContext( request, { 'id': str(instanceResult['_id']), 'xml': str(newdom), 'title': instanceResult['identifier'], 'custom_xslt': custom_xslt, 'schema_name': metadataFormat.metadataPrefix, 'registry_name': registriesName[instanceResult['registry']], 'oai_pmh': True }) resultString += template.render(context) else: for instanceResult in instanceResults[:20]: wordList = re.sub("[^\w]", " ", keyword).split() wordList = [x + "|" + x + "\w+" for x in wordList] wordList = '|'.join(wordList) listWholeKeywords = re.findall( "\\b(" + wordList + ")\\b", xmltodict.unparse( instanceResult['metadata']).encode('utf-8'), flags=re.IGNORECASE) labels = list(set(listWholeKeywords)) for label in labels: label = label.lower() result_json = {} result_json['label'] = label result_json['value'] = label if not result_json in resultsByKeyword: resultsByKeyword.append(result_json)