Exemplo n.º 1
0
def regexConfirm():
		
	# get PIDs	
	PIDs = jobs.getSelPIDs()			
	form_data = request.form	

	# search / replace
	orig_string = request.form['raw_xml']
	regex_search = request.form['regex_search'].encode('utf-8')
	regex_replace = request.form['regex_replace'].encode('utf-8')
	new_string = re.sub(regex_search,regex_replace,orig_string)	
		
	#debug
	return_package = {
		"orig_string":orig_string,
		"new_string":new_string,
		"regex_search":regex_search,
		"regex_replace":regex_replace		
	}	

	# check diff - if ratio == 100, XML is identical, simply reordered by RDF query
	if orig_string == new_string:
		return_package['string_match'] = True	
	
	return render_template("editDSRegex_regexConfirm.html",return_package=return_package)
Exemplo n.º 2
0
def updateSolr(update_type):			


	# real or emulated solr events
	if update_type == "fullIndex":				
		index_handle = solrIndexer.delay('fullIndex','')

	if update_type == "timestamp":
		print "Updating by timestamp"	
		index_handle = solrIndexer.delay('timestampIndex','')

	if update_type == "userObjects":
		print "Updating by userObjects"	
		PIDs = jobs.getSelPIDs()
		for PID in PIDs:
			index_handle = solrIndexer.delay('modifyObject', PID)	

	# purge and reindex staging solr core from fedora (SLOW)
	if update_type == "purgeAndFullIndex":
		print "Purging solr core and reindexing all objects"
		# delete all from /fedobjs core
		if 'fedobjs' in solr_handle.base_url:
			solr_handle.delete_by_query('*:*',commit=False)
		# run full index	
		index_handle = solrIndexer.delay('fullIndex','')

	# purge production core, replicate from staging (FAST)
	if update_type == "replicateStagingToProduction":
		index_handle = solrIndexer.delay('replicateStagingToProduction','')


	# return logic
	if "APIcall" in request.values and request.values['APIcall'] == "True":

		# prepare package
		return_dict = {
			"solrIndexer":{
				"update_type":update_type,
				"timestamp":datetime.datetime.now().isoformat(),
				"job_ID":index_handle.id
			}
		}
		# return JSON
		print return_dict
		json_string = json.dumps(return_dict)
		resp = make_response(json_string)
		resp.headers['Content-Type'] = 'application/json'
		return resp		
	
	else:
		return render_template("updateSolr.html",update_type=update_type,APP_HOST=localConfig.APP_HOST)
Exemplo n.º 3
0
def index():

	# get PID to examine, if noted
	if request.args.get("PIDnum") != None:
		PIDnum = int(request.args.get("PIDnum"))		
	else:
		PIDnum = 0

	# get PIDs	
	PIDs = getSelPIDs()	
	print PIDs[PIDnum]

	obj_ohandle = fedora_handle.get_object(PIDs[PIDnum])		
	obj_ohandle = obj_ohandle.ds_list
	dsIDs = []
	for (name, loc) in obj_ohandle.items():
		dsIDs.extend([name])
	print dsIDs

	form = purgeDSForm()	
	return render_template("purgeDS.html",form=form,PID=PIDs[PIDnum],dsIDs=dsIDs,PIDnum=PIDnum)
Exemplo n.º 4
0
def viewManifests(PIDnum):	

	# get PIDs	
	PIDs = getSelPIDs()

	# GET CURRENT OBJECTS	
	PIDlet = genPIDlet(int(PIDnum))
	if PIDlet == False:
		return utilities.applicationError("PIDnum is out of range or invalid.  We are is displeased.")
	PIDlet['pURL'] = "/tasks/iiifManifest/viewManifests/"+str(int(PIDnum)-1)
	PIDlet['nURL'] = "/tasks/iiifManifest/viewManifests/"+str(int(PIDnum)+1)
	
	# check Redis for manifest
	r_response = redisHandles.r_iiif.get(PIDlet['cPID'])
	if r_response != None:
		print "Redis hit for manifest."
		json_return = r_response
	else:
		print "Redis miss for manifest."
		json_return = json.dumps({"status":"manifest for %s not found in redis" % PIDlet['cPID']})
	


	return render_template("iiifManifest_view.html",PIDnum=(int(PIDnum)+1),PIDlet=PIDlet, json_return=json.dumps( json.loads(json_return), indent=2), iiif_manifest_prefix=localConfig.IIIF_MANIFEST_PREFIX,APP_HOST=localConfig.APP_HOST )
Exemplo n.º 5
0
def MODSexport_export():

	# get username
	username = session['username']

	#register namespaces
	etree.register_namespace('mods', 'mods:http://www.loc.gov/mods/v3')

	# collect MODS records for selected objects	
	PIDs = jobs.getSelPIDs()
	with open('/tmp/Ouroboros/%s_MODS_concat.xml' % (username), 'w') as outfile:

		# write header
		outfile.write('<?xml version="1.0" encoding="UTF-8"?><mods:modsCollection xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:mods="http://www.loc.gov/mods/v3" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd">\n')

		for PID in PIDs:

			# get MODS ds
			obj_ohandle = fedora_handle.get_object(PID)					
			ds_handle = obj_ohandle.getDatastreamObject('MODS')

			# get MODS string
			MODS_string = unicode(ds_handle.content.serialize(),'utf-8')

			'''
			Little bit of complexity here:
			For this kind of MODS in & out, we need the current PID associated with the file on the way out.
			Writing this to the <mods:extension> field, creating if not present.

			Update: This will be forced.  Without the PID, records will not be associated.  Critical for reingest.
			'''

			# does <PID> element already exist?
			PID_check = ds_handle.content.node.xpath('//mods:extension/PID',namespaces=ds_handle.content.node.nsmap)

			# if not, continue with checks
			if len(PID_check) == 0:

				# check for <mods:extension>, if not present add
				extension_check = ds_handle.content.node.xpath('//mods:extension',namespaces=ds_handle.content.node.nsmap)
				
				# if absent, create with <PID> subelement
				if len(extension_check) == 0:
					#serialize and replace
					MODS_content = MODS_string
					MODS_content = MODS_content.replace("</mods:mods>","<mods:extension><PID>%s</PID></mods:extension></mods:mods>" % PID)
				
				# <mods:extension> present, but no PID subelement, create
				else:
					PID_elem = etree.SubElement(extension_check[0],"PID")
					PID_elem.text = PID
					#serialize
					MODS_content = MODS_string

			# overwrite with PID
			else:
				PID_element = PID_check[0]
				PID_element.text = PID
				#serialize
				MODS_content = MODS_string


			# # OLD
			# # if not, continue with checks
			# if len(PID_check) == 0:

			# 	# check for <mods:extension>, if not present add
			# 	extension_check = ds_handle.content.node.xpath('//mods:extension',namespaces=ds_handle.content.node.nsmap)
				
			# 	# if absent, create with <PID> subelement
			# 	if len(extension_check) == 0:
			# 		#serialize and replace
			# 		MODS_content = ds_handle.content.serialize()				
			# 		MODS_content = MODS_content.replace("</mods:mods>","<mods:extension><PID>%s</PID></mods:extension></mods:mods>" % PID)
				
			# 	# <mods:extension> present, but no PID subelement, create
			# 	else:
			# 		PID_elem = etree.SubElement(extension_check[0],"PID")
			# 		PID_elem.text = PID
			# 		#serialize
			# 		MODS_content = ds_handle.content.serialize()

			# # skip <PID> element creation, just serialize
			# else:
			# 	MODS_content = ds_handle.content.serialize()

			
			# write to file
			outfile.write(MODS_content.encode('utf-8'))

		# close MODS collection
		outfile.write('\n</mods:modsCollection>')
	
	# close file
	outfile.close()

	# open file from tmp and return as download
	fhand = open('/tmp/Ouroboros/%s_MODS_concat.xml' % (username), 'r')
	response = make_response(fhand.read())
	response.headers["Content-Disposition"] = "attachment; filename=MODS_export.xml"
	return response
Exemplo n.º 6
0
def updateSolr(update_type):	

	# real or emulated solr events
	if update_type == "fullIndex":

		if 'choice' not in request.form:
			return render_template('confirm.html',update_type=update_type)
		else:
			# fire only with confirmation
			if "choice" in request.form and request.form['choice'] == "confirm" and request.form['confirm_string'].lower() == 'confirm':		
				index_handle = solrIndexer.delay('fullIndex', None)
			else:
				print 'skipping fullIndex'
				return redirect('/tasks/updateSolr/select')


	if update_type == "timestamp":
		print "Updating by timestamp"	
		index_handle = solrIndexer.delay('timestampIndex', None)

	if update_type == "userObjects":
		print "Updating by userObjects"	
		PIDs = jobs.getSelPIDs()
		for PID in PIDs:
			index_handle = solrIndexer.delay('modifyObject', PID)	

	# purge and reindex fedobjs (SLOW)
	if update_type == "purgeAndFullIndex":

		if 'choice' not in request.form:
			return render_template('confirm.html',update_type=update_type)

		else:

			# fire only with confirmation
			if "choice" in request.form and request.form['choice'] == "confirm" and request.form['confirm_string'].lower() == 'confirm':

				print "Purging solr core and reindexing all objects"
				# delete all from /fedobjs core
				if 'fedobjs' in solr_handle.base_url:
					solr_handle.delete_by_query('*:*',commit=False)
				# run full index	
				index_handle = solrIndexer.delay('fullIndex', None)

			else:
				print 'skipping purge and index'
				return redirect('/tasks/updateSolr/select')

	
	# return logic
	if "APIcall" in request.values and request.values['APIcall'] == "True":

		# prepare package
		return_dict = {
			"solrIndexer":{
				"update_type":update_type,
				"timestamp":datetime.datetime.now().isoformat(),
				"job_ID":index_handle.id
			}
		}
		# return JSON
		print return_dict
		json_string = json.dumps(return_dict)
		resp = make_response(json_string)
		resp.headers['Content-Type'] = 'application/json'
		return resp		
	
	else:
		return render_template("updateSolr.html",update_type=update_type,APP_HOST=localConfig.APP_HOST)
Exemplo n.º 7
0
def index():	

	# get PIDs	
	PIDs = getSelPIDs()
	return render_template("iiifManifest.html")
Exemplo n.º 8
0
def index():	

	# get PIDs	
	PIDs = getSelPIDs()
	return render_template("exportObject.html")
Exemplo n.º 9
0
def objectRelated():

	from WSUDOR_Manager import forms

	'''
	Query to see what will show up in REPOX:
	SELECT rcItem.identifier AS pid FROM rcItem INNER JOIN rcRecord ON rcItem.itemKey = rcRecord.itemKey INNER JOIN rcMembership ON rcMembership.recordKey = rcRecord.recordKey INNER JOIN rcSet ON rcSet.setKey = rcMembership.setKey INNER JOIN rcFormat ON rcFormat.formatKey = rcRecord.formatKey WHERE rcFormat.mdPrefix = 'mods' AND rcSet.setSpec = 'set:wayne:collectionDPLAOAI';
	'''

	# get PIDs	
	PIDs = getSelPIDs()

	# shared_relationships (in this instance, the PID of collection objects these assert membership to)	
	shared_relationships = []

	# function for shared query between whole and chunked queries
	def risearchQuery(list_of_PIDs):
		# construct where statement for query
		where_statement = ""
		for PID in list_of_PIDs:
			if PID != None:				
				where_statement += "<fedora:{PID}> <http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet> $object . $object <http://www.openarchives.org/OAI/2.0/setSpec> $setSpec . $object <http://www.openarchives.org/OAI/2.0/setName> $setName .".format(PID=PID)
		query_statement = "select $object $setSpec $setName from <#ri> where {{ {where_statement} }}".format(where_statement=where_statement)		
		base_URL = "http://{FEDORA_USER}:{FEDORA_PASSWORD}@localhost/fedora/risearch".format(FEDORA_USER=FEDORA_USER,FEDORA_PASSWORD=FEDORA_PASSWORD)
		payload = {
			"lang" : "sparql",
			"query" : query_statement,
			"flush" : "false",
			"type" : "tuples",
			"format" : "JSON"
		}
		r = requests.post(base_URL, auth=HTTPBasicAuth(FEDORA_USER, FEDORA_PASSWORD), data=payload )
		risearch = json.loads(r.text)
		return risearch	

	# if more than 100 PIDs, chunk into sub-queries
	if len(PIDs) > 100:
		def grouper(iterable, chunksize, fillvalue=None):
			from itertools import izip_longest
			args = [iter(iterable)] * chunksize
			return izip_longest(*args, fillvalue=fillvalue)

		chunks =  grouper(PIDs,100)

		for chunk in chunks:

			# perform query
			risearch = risearchQuery(chunk)

			chunk_list = []			
			for each in risearch['results']:
				tup = (each['object'].split("/")[1],each['setSpec'], each['setName'])
				chunk_list.append(tup)
			try:
				curr_set = set.intersection(curr_set,set(chunk_list))
			except:
				curr_set = set(chunk_list)

		print curr_set
		shared_relationships = curr_set		

	else:		
		# perform query
		risearch = risearchQuery(PIDs)
		shared_relationships = [ (each['object'].split("/")[1],each['setSpec'], each['setName']) for each in risearch['results'] ]

	print shared_relationships

	# finally, find all currently available / defined sets	
	form = forms.OAI_sets()
	active_sets = utilities.returnOAISets('dropdown')
	total_sets = len(active_sets)

	return render_template("manageOAI_objectRelated.html",shared_relationships=shared_relationships,form=form,active_sets=active_sets,total_sets=total_sets)

	return render_template("manageOAI_objectRelated.html")
Exemplo n.º 10
0
def editRELS_shared():
	'''
	Will return only RDF statements shared (predicate AND object) by all PIDs	

	- Requires workaround for large queries...
		- Eulfedora (uses GET, too small)
		- POST requests 100+ break sparql
		- Solution: for scenarios with 100+ PIDs, break into smaller queries, then mix together in results

	'''
	# get PIDs	
	PIDs = jobs.getSelPIDs()

	# shared relationships	
	shared_relationships = []

	# shared function for whole or chunked query
	def risearchQuery(list_of_PIDs):
		# construct where statement for query
		where_statement = ""
		for PID in list_of_PIDs:
			if PID != None:				
				where_statement += "<fedora:%s> $predicate $object . " % (PID)
		query_statement = "select $predicate $object from <#ri> where {{ %s }}" % (where_statement)		

		# print query_statement
		
		base_URL = "http://%s:%s@localhost/fedora/risearch" % (FEDORA_USER, FEDORA_PASSWORD)
		payload = {
			"lang" : "sparql",
			"query" : query_statement,
			"flush" : "false",
			"type" : "tuples",
			"format" : "JSON"
		}
		r = requests.post(base_URL, auth=HTTPBasicAuth(FEDORA_USER, FEDORA_PASSWORD), data=payload )
		risearch = json.loads(r.text)
		return risearch

	# if more than 100 PIDs, chunk into sub-queries
	if len(PIDs) > 100:		

		def grouper(iterable, chunksize, fillvalue=None):
			from itertools import izip_longest
			args = [iter(iterable)] * chunksize
			return izip_longest(*args, fillvalue=fillvalue)

		chunks =  grouper(PIDs,100)

		for chunk in chunks:			

			# perform query
			risearch = risearchQuery(chunk)

			chunk_list = []			
			for each in risearch['results']:
				tup = (each['predicate'],each['object'])				
				chunk_list.append(tup)
			try:
				curr_set = set.intersection(curr_set,set(chunk_list))
			except:
				curr_set = set(chunk_list)

		print curr_set
		shared_relationships = curr_set
		

	else:
		# perform query
		risearch = risearchQuery(PIDs)
		shared_relationships = [ (each['predicate'], each['object']) for each in risearch['results'] ]


	return render_template('editRELS_shared.html',shared_relationships=shared_relationships)