Exemplo n.º 1
0
	def ingest(self, book_obj, page_num):

		# set book_obj to self
		self.book_obj = book_obj

		# using parent book, get datastreams from objMeta
		page_dict = self.book_obj.pages_from_objMeta[page_num]

		# new pid
		npid = "wayne:%s_Page_%s" % (self.book_obj.pid.split(":")[1], page_num)

		# creating new self	
		self.ohandle = fedora_handle.get_object(npid)
		if self.ohandle.exists:
			fedora_handle.purge_object(self.ohandle)
		self.ohandle = fedora_handle.get_object(npid, create=True)
		self.ohandle.save()

		# label
		self.ohandle.label = "%s - Page %s" % (self.book_obj.ohandle.label, page_num)

		# write POLICY datastream
		# NOTE: 'E' management type required, not 'R'
		print "Using policy:",self.book_obj.objMeta['policy']
		policy_suffix = self.book_obj.objMeta['policy'].split("info:fedora/")[1]
		policy_handle = eulfedora.models.DatastreamObject(self.ohandle, "POLICY", "POLICY", mimetype="text/xml", control_group="E")
		policy_handle.ds_location = "http://localhost/fedora/objects/%s/datastreams/POLICY_XML/content" % (policy_suffix)
		policy_handle.label = "POLICY"
		policy_handle.save()				

		# generic hash of target ids
		target_ids = {
			'IMAGE':'IMAGE_%d' % page_num,
			'HTML':'HTML_%d' % page_num,
			'ALTOXML':'ALTOXML_%d' % page_num
		}

		# for each file type in pages dict, pass page obj and process
		for ds in page_dict:

			if ds['ds_id'].startswith('IMAGE'):
				self.processImage(ds)
			if ds['ds_id'].startswith('HTML'):
				self.processHTML(ds)
			if ds['ds_id'].startswith('ALTOXML'):
				self.processALTOXML(ds)

		# write RDF relationships
		self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#hasContentModel", "info:fedora/CM:WSUebook_Page")
		self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#isConstituentOf", "info:fedora/%s" % self.book_obj.ohandle.pid)
		self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageOrder", page_num)

		# save page object
		return self.ohandle.save()
Exemplo n.º 2
0
	def ingestMissingPage(self, book_obj, page_num, from_bag=True):

		# set book_obj to self
		self.book_obj = book_obj

		# new pid
		npid = "wayne:%s_Page_%s" % (self.book_obj.pid.split(":")[1], page_num)

		# creating new self	
		self.ohandle = fedora_handle.get_object(npid)
		if self.ohandle.exists:
			fedora_handle.purge_object(self.ohandle)
		self.ohandle = fedora_handle.get_object(npid, create=True)
		self.ohandle.save()

		# label
		self.ohandle.label = "%s - Page %s" % (self.book_obj.ohandle.label, page_num)

		# write POLICY datastream
		# NOTE: 'E' management type required, not 'R'
		print "Using policy:",self.book_obj.objMeta['policy']
		policy_suffix = self.book_obj.objMeta['policy'].split("info:fedora/")[1]
		policy_handle = eulfedora.models.DatastreamObject(self.ohandle, "POLICY", "POLICY", mimetype="text/xml", control_group="E")
		policy_handle.ds_location = "http://localhost/fedora/objects/%s/datastreams/POLICY_XML/content" % (policy_suffix)
		policy_handle.label = "POLICY"
		policy_handle.save()				

		print "Processing HTML placeholder"
		generic_handle = eulfedora.models.FileDatastreamObject(self.ohandle, "HTML", "HTML", mimetype="text/html", control_group='M')
		generic_handle.label = "HTML"
		generic_handle.content = "<p>[Page %s Intentionally Left Blank]</p>" % (page_num)
		generic_handle.save()

		print "Processing IMAGE placeholder"
		# passes 'from_bag' param		
		self.processImage(None, exists=False, page_num=page_num, from_bag=from_bag)

		# write RDF relationships
		self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#hasContentModel", "info:fedora/CM:WSUebook_Page")
		self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#isConstituentOf", "info:fedora/%s" % self.book_obj.ohandle.pid)
		self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageOrder", page_num)
		self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageExists", False)

		# create IMAGE, HTML, ALTOXML for missing page
		print "Processing ALTOXML placeholder"
		generic_handle = eulfedora.models.FileDatastreamObject(self.ohandle, 'ALTOXML', 'ALTOXML', mimetype="text/xml", control_group='M')
		generic_handle.label = 'ALTOXML'
		generic_handle.content = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><alto xmlns="http://www.loc.gov/standards/alto/ns-v2#" xmlns:xlink="http://www.w3.org/1999/xlink"    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/standards/alto/ns-v2# http://www.loc.gov/standards/alto/alto-v2.0.xsd">    <Description>        <MeasurementUnit>pixel</MeasurementUnit>        <OCRProcessing ID="IdOcr">            <ocrProcessingStep>                <processingSoftware>                    <softwareCreator>ABBYY</softwareCreator>                    <softwareName>ABBYY Recognition Server</softwareName>                    <softwareVersion>4.0</softwareVersion>                </processingSoftware>            </ocrProcessingStep>        </OCRProcessing>    </Description>    <Styles>        <ParagraphStyle ID="StyleId-FFFFFFFF-FFFF-FFFF-FFFF-FFFFFFFFFFFF-" ALIGN="Left" LEFT="0"            RIGHT="0" FIRSTLINE="0"/>    </Styles>    <Layout>        <Page ID="Page1" PHYSICAL_IMG_NR="1">            <PrintSpace HEIGHT="%s" WIDTH="%s" VPOS="0" HPOS="0"/>        </Page>    </Layout></alto>' % (self.faux_width, self.faux_height)
		generic_handle.save()

		# save page object
		return self.ohandle.save()
Exemplo n.º 3
0
def checksum_worker(job_package):
	form_data = job_package['form_data']
	print form_data

	# in confirmation present, change state
	if form_data['confirm_string'] == "CONFIRM":

		# grab target state
		target_state = form_data['target_state']

		# set state	
		print "Setting state to: %s" % (target_state)
		
		# get PID handle, set state, save()
		PID = job_package['PID']		
		obj_ohandle = fedora_handle.get_object(PID)		
		obj_ohandle = obj_ohandle.ds_list
		for (name, loc) in obj_ohandle.items():
			print name

		# getDatastreamObject('ACCESS').checksum_type
		# getDatastreamObject('ACCESS').checksum
		# take name, insert into .checksum and checksum_type
		# return datastream name and checksum results to page (which are then sorted by template)
		# not quiet sure what return does below
		return obj_ohandle

		# Enable Checksumming feature to be developed if Checksums are not enabled

	else:
		return "Confirmation not entered correctly, skipping."
Exemplo n.º 4
0
def addDS_worker(job_package):
	
	form_data = job_package['form_data']	
	print form_data
	
	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)

	# initialized DS object
	newDS = eulfedora.models.DatastreamObject(obj_ohandle, form_data['dsID'], form_data['dsLabel'], control_group=form_data['controlGroup'])	

	# construct DS object
	if form_data['MIMEType'] != '':		
		newDS.mimetype = form_data['MIMEType']	
	if form_data['dsLocation'] != '':
		newDS.ds_location = form_data['dsLocation']	

	# content
	if 'upload_data' in job_package:
		with open(job_package['upload_data'],'r') as fhand:
			newDS.content = fhand.read()
	elif form_data['content'] != '':
		newDS.content = form_data['content']	

	# save constructed object
	return newDS.save()
	
	
Exemplo n.º 5
0
def editDSRegex_regex_worker(job_package):		
	
	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)

	# Raw Datastream via Fedora API
	###############################################################	
	raw_xml_URL = "http://{APP_HOST}/fedora/objects/{PID}/datastreams/MODS/content".format(PID=PID,APP_HOST=localConfig.APP_HOST)
	raw_xml = requests.get(raw_xml_URL).text.encode("utf-8")	
	###############################################################
	
	# get regex parameters
	form_data = job_package['form_data']	

	# search / replace	
	regex_search = form_data['regex_search'].encode('utf-8')
	regex_replace = form_data['regex_replace'].encode('utf-8')
	new_string = re.sub(regex_search,regex_replace,raw_xml)		

	# similar to addDS functionality	
	DS_handle = eulfedora.models.DatastreamObject(obj_ohandle, "MODS", "MODS", control_group="X")	

	# construct DS object	
	DS_handle.mimetype = "text/xml"

	# content		
	DS_handle.content = new_string	

	# save constructed object
	return DS_handle.save()	
Exemplo n.º 6
0
def manageOAI_toggleSet_worker(self,harvest_status,object_uri,collectionPID):
	PID = object_uri.split("/")[1]

	################################################	
	# check PIDlock	
	lock_status = redisHandles.r_PIDlock.exists(PID)
	
	# if locked, divert
	if lock_status == True:
		time.sleep(.25)
		raise self.retry(countdown=3)
	else:
		redisHandles.r_PIDlock.set(PID,1)
	################################################

	isMemberOfOAISet_predicate = "http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet"
	obj_handle = fedora_handle.get_object(object_uri)

	# toggle collection OAI relatedd RELS-EXT relationships	
	if harvest_status == "False":
		print "%s was not part of set, enabling..." % (PID)		
		toggle_function = obj_handle.add_relationship
	if harvest_status == "True":
		print "%s was harvestable, deactivating..." % (PID)		
		toggle_function = obj_handle.purge_relationship
		
	# isMemberOfOAISet relationship		
	predicate_string = isMemberOfOAISet_predicate
	object_string = "info:fedora/%s" % (collectionPID)
	return toggle_function(predicate_string, object_string)
Exemplo n.º 7
0
def DCfromMODS_worker(job_package):

	PID = job_package['PID']
	ohandle = fedora_handle.get_object(PID)

	# retrieve MODS		
	MODS_handle = ohandle.getDatastreamObject('MODS')		
	XMLroot = etree.fromstring(MODS_handle.content.serialize())

	# 2) transform downloaded MODS to DC with LOC stylesheet
	print "XSLT Transforming: %s" % (PID)
	# Saxon transformation
	XSLhand = open('inc/xsl/MODS_to_DC.xsl','r')		
	xslt_tree = etree.parse(XSLhand)
	transform = etree.XSLT(xslt_tree)
	DC = transform(XMLroot)

	# 2.5) scrub duplicate, identical elements from DC
	DC = utilities.delDuplicateElements(DC)		

	# 3) save to DC datastream
	DS_handle = ohandle.getDatastreamObject("DC")
	DS_handle.content = str(DC)
	derive_results = DS_handle.save()
	print "DCfromMODS result:",derive_results
	return derive_results
Exemplo n.º 8
0
def augmentCore(PID):
	
	print "Checking",PID	
	
	# for all 'wayne' prefixes
	if PID.startswith("wayne:"):
		# get content type
		obj_ohandle = fedora_handle.get_object(PID)			
		obj_risearch = obj_ohandle.risearch
		obj_spo = obj_risearch.spo_search("info:fedora/%s" % (PID), "info:fedora/fedora-system:def/relations-external#hasContentModel")
		obj_objects = obj_spo.objects()
		for obj in obj_objects:
			
			# ebooks
			if str(obj) == "info:fedora/CM:WSUebook":	
				print "Firing ebook augment"		
				ebookText(PID)

			# hierarchicalfiles
			if str(obj) == "info:fedora/CM:Document":			
				print "Firing hierarchical augment"
				hierarchicalDocuments(PID)

	#######################################################
	# consider adding more advanced indexing here, e.g. 
	#######################################################

	else:
		print "Does not have 'wayne' prefix, skipping augmentCore()..."		
Exemplo n.º 9
0
    def __init__(self, pid=False, ds_id='PREMIS'):

        self.pid = pid
        self.ohandle = False
        self.premis_ds = False
        self.premis_tree = None

        # if pid provided, attempt to retrieve PREMIS
        if pid:
            self.ohandle = fedora_handle.get_object(pid)
            if ds_id in self.ohandle.ds_list:
                self.premis_ds = self.ohandle.getDatastreamObject('PREMIS')
                self.premis_root = self.premis_ds.content.node
                self.premis_tree = self.premis_root.getroottree()
            else:
                print "%s datastream not found, initializing blank PREMIS node" % ds_id

        # if no pre-exisintg PREMIS datastream, init new one
        if not self.premis_ds:
            ns = {
                "xsi": "http://www.w3.org/2001/XMLSchema-instance",
                "xsd": "http://www.w3.org/2001/XMLSchema",
                "premis": "info:lc/xmlns/premis-v2",
            }
            self.premis_root = etree.Element('premis', nsmap=ns)
            self.premis_tree = etree.ElementTree(self.premis_root)
Exemplo n.º 10
0
	def indexPageText(self):

		'''
		When copying objects between repositories, indexing of pages is skipped.
		This function can be run to repeat that process.
		'''

		for page in self.pages_from_rels:

			try:
				print "Working on page %d / %d" % (page, len(self.pages_from_rels))

				# index in Solr bookreader core
				data = {
					"literal.id" : self.objMeta['identifier']+"_OCR_HTML_"+str(page),
					"literal.ItemID" : self.objMeta['identifier'],
					"literal.page_num" : page,
					"fmap.content" : "OCR_text",
					"commit" : "false"
				}
				ds_handle = fedora_handle.get_object("%s_Page_%d" % (self.pid, page)).getDatastreamObject("HTML")
				files = {'file': ds_handle.content}
				r = requests.post("http://localhost/solr4/bookreader/update/extract", data=data, files=files)
			except:
				raise Exception("Could not index page %d" % page)

		# commit
		print solr_bookreader_handle.commit()
Exemplo n.º 11
0
def removeFromDPLA_worker(job_package):

	# get PID
	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)	

	# add relationship
	return obj_ohandle.purge_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isMemberOfOAISet", "info:fedora/wayne:collectionDPLAOAI")
Exemplo n.º 12
0
	def constituents(self):

		'''
		Returns OrderedDict with pageOrder as key, digital obj as val
		'''

		# get ordered, constituent objs
		sparql_response = fedora_handle.risearch.sparql_query('select $constituent WHERE {{ $constituent <info:fedora/fedora-system:def/relations-external#isConstituentOf> <info:fedora/%s> . }}' % (self.pid))
		constituent_objects = [ fedora_handle.get_object(obj['constituent']) for obj in sparql_response ]		
		return constituent_objects
Exemplo n.º 13
0
def manageOAI_genItemID_worker(job_package):
	
	# get PID
	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)	

	# generate OAI identifier
	OAI_identifier = "oai:digital.library.wayne.edu:%s" % (PID)	
	
	print obj_ohandle.add_relationship("http://www.openarchives.org/OAI/2.0/itemID", OAI_identifier)
Exemplo n.º 14
0
	def pages_from_rels(self):

		'''
		Returns OrderedDict with pageOrder as key, digital obj as val
		'''

		# get ordered, constituent objs
		sparql_response = fedora_handle.risearch.sparql_query('select $page $pageOrder WHERE {{ $page <info:fedora/fedora-system:def/relations-external#isConstituentOf> <info:fedora/%s> .$page <http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/pageOrder> $pageOrder . }} ORDER BY ASC($pageOrder)' % (self.pid))
		constituent_objects = OrderedDict((int(page['pageOrder']), fedora_handle.get_object(page['page'])) for page in sparql_response)
		return constituent_objects
Exemplo n.º 15
0
	def ingestBag(self):
		if self.object_type != "bag":
			raise Exception("WSUDOR_Object instance is not 'bag' type, aborting.")		

		# ingest Volume object
		try:
			self.ohandle = fedora_handle.get_object(self.objMeta['id'],create=True)
			self.ohandle.save()

			# set base properties of object
			self.ohandle.label = self.objMeta['label']

			# write POLICY datastream (NOTE: 'E' management type required, not 'R')
			print "Using policy:",self.objMeta['policy']
			policy_suffix = self.objMeta['policy'].split("info:fedora/")[1]
			policy_handle = eulfedora.models.DatastreamObject(self.ohandle,"POLICY", "POLICY", mimetype="text/xml", control_group="E")
			policy_handle.ds_location = "http://localhost/fedora/objects/{policy}/datastreams/POLICY_XML/content".format(policy=policy_suffix)
			policy_handle.label = "POLICY"
			policy_handle.save()

			# write objMeta as datastream
			objMeta_handle = eulfedora.models.FileDatastreamObject(self.ohandle, "OBJMETA", "Ingest Bag Object Metadata", mimetype="application/json", control_group='M')
			objMeta_handle.label = "Ingest Bag Object Metadata"
			objMeta_handle.content = json.dumps(self.objMeta)
			objMeta_handle.save()

			# write explicit RELS-EXT relationships			
			for relationship in self.objMeta['object_relationships']:
				print "Writing relationship:",str(relationship['predicate']),str(relationship['object'])
				self.ohandle.add_relationship(str(relationship['predicate']),str(relationship['object']))
					
			# writes derived RELS-EXT
			self.ohandle.add_relationship("http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isRepresentedBy",self.objMeta['isRepresentedBy'])
			content_type_string = "info:fedora/CM:"+self.objMeta['content_type'].split("_")[1]
			self.ohandle.add_relationship("info:fedora/fedora-system:def/relations-external#hasContentModel",content_type_string)

			# write MODS datastream
			objMeta_handle = eulfedora.models.FileDatastreamObject(self.ohandle, "MODS", "MODS descriptive metadata", mimetype="text/xml", control_group='M')
			objMeta_handle.label = "MODS descriptive metadata"
			file_path = self.Bag.path + "/data/MODS.xml"
			objMeta_handle.content = open(file_path)
			objMeta_handle.save()			

			# save and commit object before finishIngest()
			final_save = self.ohandle.save()
			
			# finish generic ingest
			return self.finishIngest()


		# exception handling
		except Exception,e:
			print traceback.format_exc()
			print "Volume Ingest Error:",e
			return False
Exemplo n.º 16
0
def editRELS_purge_worker(job_package):

	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)	

	form_data = job_package['form_data']
	predicate_string = form_data['predicate'].encode('utf-8').strip()
	object_string = form_data['object'].encode('utf-8').strip()

	print "Removing the following predicate / subject: %s /%s" % (predicate_string, object_string)
		
	return obj_ohandle.purge_relationship(predicate_string, object_string)
Exemplo n.º 17
0
def editRELS_modify_worker(job_package):

	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)	

	form_data = job_package['form_data']

	new_predicate_string = form_data['new_predicate'].encode('utf-8').strip()
	old_predicate_string = form_data['old_predicate'].encode('utf-8').strip()
	new_object_string = form_data['new_object'].encode('utf-8').strip()	
	old_object_string = form_data['old_object'].encode('utf-8').strip()
		
	return obj_ohandle.modify_relationship(old_predicate_string, old_object_string, new_object_string)
Exemplo n.º 18
0
def editRELS_add_worker(job_package):
	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)	

	form_data = job_package['form_data']	

	if "literal" in form_data:
		predicate_string = form_data['predicate_literal'].encode('utf-8').strip()	
	else:
		predicate_string = form_data['predicate'].encode('utf-8').strip()

	object_string = form_data['obj'].encode('utf-8').strip()
	return obj_ohandle.add_relationship(predicate_string, object_string)
Exemplo n.º 19
0
def index(PIDnum):	
	# gen PIDlet
	PIDlet = jobs.genPIDlet(int(PIDnum))
	if PIDlet == False:
		return utilities.applicationError("PIDnum is out of range.")
	PIDlet['pURL'] = "/tasks/editDSMime/"+str(int(PIDnum)-1)
	PIDlet['nURL'] = "/tasks/editDSMime/"+str(int(PIDnum)+1)

	PID = PIDlet['cPID']

	# get datastreams for object
	obj_ohandle = fedora_handle.get_object(PID)
	ds_list = obj_ohandle.ds_list

	return render_template("editDSMime.html", PIDlet=PIDlet, PIDnum=PIDnum, ds_list=ds_list, APP_HOST=localConfig.APP_HOST)
Exemplo n.º 20
0
	def _createVirtBook(self):

		'''
		Target Datastreams:
			- DC
				- text/xml
			MARCXML
				- text/xml
			RELS-EXT
				- application/rdf+xml
		'''

		print "generating virtual ScannedBook object"

		virtual_book_handle = fedora_handle.get_object(type=WSUDOR_ContentTypes.WSUDOR_Readux_VirtualBook)
		virtual_book_handle.create(self)
Exemplo n.º 21
0
def pruneSolr_worker(job_package, PID=False):

	if PID:
		# prune specific PID
		solr_handle.delete_by_key(PID)
		return "PRUNED"

	else:
		doc_id = job_package['doc_id']
		
		if not fedora_handle.get_object(doc_id).exists:
			print "Did not find object in Fedora, pruning from Solr..."
			solr_handle.delete_by_key(doc_id)
			return "PRUNED"
		else:
			return "IGNORED"
Exemplo n.º 22
0
def manageOAI_toggleSet(PID):	

	isOAIHarvestable_predicate = "http://digital.library.wayne.edu/fedora/objects/wayne:WSUDOR-Fedora-Relations/datastreams/RELATIONS/content/isOAIHarvestable"
	
	# determine collection status
	obj_ohandle = fedora_handle.get_object(PID)
	harvest_status_gen = obj_ohandle.risearch.get_objects(obj_ohandle.uriref, isOAIHarvestable_predicate)
	harvest_status = harvest_status_gen.next()	

	# get collection name
	dc_title_gen = obj_ohandle.risearch.get_objects(obj_ohandle.uriref,"dc:title")
	dc_title = dc_title_gen.next()

	# toggle collection OAI relatedd RELS-EXT relationships	
	if harvest_status == "False":
		print "Object was not harvestable, enabling..."
		print obj_ohandle.modify_relationship(isOAIHarvestable_predicate, "False", "True")	
		toggle_function = obj_ohandle.add_relationship
	if harvest_status == "True":
		print "Object was harvestable, deactivating..."
		print obj_ohandle.modify_relationship(isOAIHarvestable_predicate, "True", "False")	
		toggle_function = obj_ohandle.purge_relationship
		
	# setSpec relationship	
	predicate_string = "http://www.openarchives.org/OAI/2.0/setSpec"
	object_string = "set:%s" % (PID)
	print toggle_function(predicate_string, object_string)

	# setName relationship	
	predicate_string = "http://www.openarchives.org/OAI/2.0/setName"
	object_string = dc_title
	print toggle_function(predicate_string, object_string)


	# toggle relationships for child objects (runs as celery task)	
	# collection_objects = obj_ohandle.risearch.get_subjects("fedora-rels-ext:isMemberOfCollection",obj_ohandle.uriref)	
	# for object_uri in collection_objects:
	# 	manageOAI_toggleSet_worker.apply_async(
		# 	kwargs={
		# 		'harvest_status':harvest_status,
		# 		'object_uri':object_uri,
		# 		'PID':PID
		# 	},
		# 	queue=job_package['username']
		# )

	return redirect("/tasks/manageOAI/serverWide")	
Exemplo n.º 23
0
	def _createVirtVolume(self):
		'''
		Target Datastreams:
			- DC
				- text/xml
			- OCR
				- text/xml
			- PDF
				- application/pdf
			- RELS-EXT
				- applicaiton/rdf+xml
		'''

		print "generating virtual ScannedVolume object"

		virtual_volume_handle = fedora_handle.get_object(type=WSUDOR_ContentTypes.WSUDOR_Readux_VirtualVolume)
		virtual_volume_handle.create(self)
Exemplo n.º 24
0
def makeSymLink(PID, DS):

	returnDict = {}

	filename = "info:fedora/"+PID+"/"+DS+"/"+DS+".0"
	
	# get hash folder	
	hashed_filename = hashlib.md5(urllib.unquote(filename))
	dataFolder = hashed_filename.hexdigest()[0:2]

	filename_quoted = urllib.quote_plus(filename)	
	
	# peculiars for Fedora
	####################################################
	filename_quoted = filename_quoted.replace('_','%5F')
	####################################################

	# symlink directory
	path_prefix = "/var/www/wsuls/symLinks/"
	
	# guess file extension
	try:		
		extension_guess = mimetypes.guess_extension( fedora_handle.get_object(PID).getDatastreamObject(DS).mimetype )
		if extension_guess == None:
			extension_guess == ".sym"
	except:
		extension_guess = ".sym"

	# construct full symlink path
	file_path = path_prefix+hashed_filename.hexdigest()+extension_guess
	
	returnDict['symlink'] = file_path;	

	# exists
	if os.path.exists(file_path):
		return returnDict
	# create
	else:
		source_prefix = "/usr/local/fedora/data/datastreamStore/"
		source_path = source_prefix+dataFolder+"/"+filename_quoted

		if os.path.exists(source_path):
			os.symlink(source_path, file_path)
			return returnDict
		else:
			return "Target not found.  Aborting."
Exemplo n.º 25
0
def editDSMime_worker(job_package):
	form_data = job_package['form_data']
	print form_data		
		
	try:
		# get PID handle, set state, save()
		PID = job_package['PID']
		obj_ohandle = fedora_handle.get_object(PID)

		# update mime/type
		ds_handle = obj_ohandle.getDatastreamObject(form_data['DSID'].encode('utf-8'))
		ds_handle.mimetype = form_data['mimetype'].encode('utf-8')

		# save constructed object
		return ds_handle.save()

	except:
		return "Could not edit Datastream Mime-Type"
Exemplo n.º 26
0
def MODSimport_worker(job_package):	
	'''
	Receive job_package, which contains PID, update MODS
	'''	

	PID = job_package['PID']
	MODS = job_package['MODS']	
	print "Updating MODS for %s" % (PID)

	# open temp MODS file, read, delete
	fhand = open(MODS,'r')
	MODS_string = fhand.read()
	fhand.close()
	os.system("rm %s" % (MODS))

	obj_handle = fedora_handle.get_object(PID)
	ds_handle = obj_handle.getDatastreamObject("MODS")
	ds_handle.content = MODS_string
	return ds_handle.save()
Exemplo n.º 27
0
def objectState_worker(job_package):
	form_data = job_package['form_data']
	print form_data

	# in confirmation present, change state
	if form_data['confirm_string'] == "CONFIRM":

		# grab target state
		target_state = form_data['target_state']

		# set state	
		print "Setting state to: {target_state}".format(target_state=target_state)
		
		# get PID handle, set state, save()
		PID = job_package['PID']		
		obj_ohandle = fedora_handle.get_object(PID)		
		obj_ohandle.state = target_state
		return obj_ohandle.save()

	else:
		return "Confirmation not entered correctly, skipping."
Exemplo n.º 28
0
def index():

	# get PID to examine, if noted
	if request.args.get("PIDnum") != None:
		PIDnum = int(request.args.get("PIDnum"))		
	else:
		PIDnum = 0

	# get PIDs	
	PIDs = getSelPIDs()	
	print PIDs[PIDnum]

	obj_ohandle = fedora_handle.get_object(PIDs[PIDnum])		
	obj_ohandle = obj_ohandle.ds_list
	dsIDs = []
	for (name, loc) in obj_ohandle.items():
		dsIDs.extend([name])
	print dsIDs

	form = purgeDSForm()	
	return render_template("purgeDS.html",form=form,PID=PIDs[PIDnum],dsIDs=dsIDs,PIDnum=PIDnum)
Exemplo n.º 29
0
def DCfromMODS_single(PID):	

	ohandle = fedora_handle.get_object(PID)

	# retrieve MODS		
	MODS_handle = ohandle.getDatastreamObject('MODS')		
	XMLroot = etree.fromstring(MODS_handle.content.serialize())

	# 2) transform downloaded MODS to DC with LOC stylesheet
	print "XSLT Transforming: {PID}".format(PID=PID)
	# Saxon transformation
	XSLhand = open('inc/xsl/MODS_to_DC.xsl','r')		
	xslt_tree = etree.parse(XSLhand)
	transform = etree.XSLT(xslt_tree)
	DC = transform(XMLroot)		

	# 3) save to DC datastream
	DS_handle = ohandle.getDatastreamObject("DC")
	DS_handle.content = str(DC)
	derive_results = DS_handle.save()
	print "DCfromMODS result:",derive_results
	return derive_results
Exemplo n.º 30
0
def editRELS_regex_worker(job_package):		
	
	PID = job_package['PID']		
	obj_ohandle = fedora_handle.get_object(PID)	
	
	# Eulfedora
	###############################################################
	# obj_ohandle = fedora_handle.get_object(PIDs[PIDnum])	
	# try:
	# 	raw_xml = obj_ohandle.rels_ext.content.serialize()
	# except:
	# 	raw_xml = "COULD NOT PARSE"
	###############################################################

	# Raw Datastream via Fedora API
	###############################################################	
	raw_xml_URL = "http://localhost/fedora/objects/%s/datastreams/RELS-EXT/content" % (PID)
	raw_xml = requests.get(raw_xml_URL).text.encode("utf-8")
	###############################################################
	
	# get regex parameters
	form_data = job_package['form_data']	

	# search / replace	
	regex_search = form_data['regex_search'].encode('utf-8')
	regex_replace = form_data['regex_replace'].encode('utf-8')
	new_string = re.sub(regex_search,regex_replace,raw_xml)		

	# similar to addDS functionality	
	newDS = eulfedora.models.DatastreamObject(obj_ohandle, "RELS-EXT", "RELS-EXT", control_group="X")	

	# construct DS object	
	newDS.mimetype = "application/rdf+xml"
	# content		
	newDS.content = new_string	

	# save constructed object
	print newDS.save()