Пример #1
0
def celeryTaskFactoryUnique(job_num,job_package):
	
	# reconstitute
	form_data = job_package['form_data']
	job_num = job_package['job_num']

	# get FOXML
	FOXMLs_serialized = genFOXML("retrieve", form_data['MODS_id'], form_data['xsl_trans_id'])

	# update job info
	redisHandles.r_job_handle.set("job_{job_num}_est_count".format(job_num=job_num),len(FOXMLs_serialized))

	# ingest in Fedora
	step = 1
	for FOXML in FOXMLs_serialized:		

		job_package['PID'] = "N/A"
		job_package['step'] = step		
		job_package['FOXML'] = FOXML

		# fire ingester
		result = actions.actions.taskWrapper.delay(job_package)

		task_id = result.id		
			
		# update incrementer for total assigned
		jobs.jobUpdateAssignedCount(job_num)

		# bump step
		step += 1
Пример #2
0
def celeryTaskFactoryBagIngest(job_num,job_package):
	
	# reconstitute
	job_num = job_package['job_num']	

	# update job info
	redisHandles.r_job_handle.set("job_{job_num}_est_count".format(job_num=job_num),1)

	# ingest in Fedora
	step = 1

	job_package['PID'] = "N/A"
	job_package['step'] = step		

	# fire ingester
	result = actions.actions.taskWrapper.delay(job_package)	

	task_id = result.id		
	print task_id
		
	# update incrementer for total assigned
	jobs.jobUpdateAssignedCount(job_num)

	# bump step
	step += 1
Пример #3
0
def celeryTaskFactoryImportMODS(job_num,job_package):

	'''
	Problem - too big to send the MODS XML to Redis.  Need to stick it in MySQL, or text file.
	Write to temp file.  Case closed. 
	'''
	
	# reconstitute
	form_data = job_package['form_data']
	job_num = job_package['job_num']

	# get mods:collection 
	if 'upload_data' in job_package:		
		MODS_collection = job_package['upload_data']
		job_package['upload_data'] = False #scrub data
	elif form_data['content'] != '':
		MODS_collection = form_data['content'] 
		form_data['content'] = False #scrub data

	# shunt each MODS record to list
	MODS_collection = unicode(MODS_collection, 'utf-8')
	XMLroot = etree.fromstring(MODS_collection.encode('utf-8'))	
	MODS_list = XMLroot.findall('{http://www.loc.gov/mods/v3}mods')	

	# update job info
	redisHandles.r_job_handle.set("job_{job_num}_est_count".format(job_num=job_num),len(MODS_list))

	# ingest in Fedora
	step = 1
	for MODS_elem in MODS_list:

		# read <mods:extension><PID>, pass this as PID
		PID_search = MODS_elem.findall("{http://www.loc.gov/mods/v3}extension/PID")
		if len(PID_search) == 0:
			print "Could not find PID, skipping"
			continue
		else:
			PID = PID_search[0].text

		# write MODS to temp file
		temp_filename = "/tmp/Ouroboros/"+str(uuid.uuid4())+".xml"
		fhand = open(temp_filename,'w')
		fhand.write(etree.tostring(MODS_elem))
		fhand.close()

		job_package['PID'] = PID
		job_package['step'] = step		
		job_package['MODS'] = temp_filename

		# fire ingester
		result = actions.actions.taskWrapper.delay(job_package)

		task_id = result.id		
			
		# update incrementer for total assigned
		jobs.jobUpdateAssignedCount(job_num)

		# bump step
		step += 1
Пример #4
0
def pruneSolr_factory(job_package):

	# set new task_name, for the worker below
	job_package['custom_task_name'] = 'pruneSolr_worker'

	# get solr results obj
	solr_total = solr_handle.search(q='*:*', fl='id').total_results

	# set estimated tasks
	print "Antipcating",solr_total,"tasks...."	
	redisHandles.r_job_handle.set("job_%s_est_count" % (job_package['job_num']), solr_total)

	# iterate through solr objects
	# variables 
	start = 0
	rows = 100
	step = 1
	while start < solr_total:

		# perform search
		solr_result = solr_handle.search(q='*:*', fl='id', rows=rows, start=start)

		# iterate
		for doc in solr_result.documents:
			doc_id = doc['id']
			print "pruneSolr checking %s" % (doc_id)

			job_package['doc_id'] = doc_id
			
			# fire task via custom_loop_taskWrapper			
			result = actions.actions.custom_loop_taskWrapper.apply_async(kwargs={'job_package':job_package}, queue=job_package['username']
			)
			task_id = result.id

			# Set handle in Redis
			redisHandles.r_job_handle.set("%s" % (task_id), "FIRED,%s" % (doc_id))
				
			# update incrementer for total assigned
			jobs.jobUpdateAssignedCount(job_package['job_num'])

			# bump step
			step += 1

		# bump start
		start += rows
Пример #5
0
def celeryTaskFactory(**kwargs):
	
	# create job_package
	job_package = kwargs['job_package']	

	# get username
	username = job_package['username']

	# get job_num
	job_num = kwargs['job_num']

	# get and iterate through user selectedPIDs			
	PIDlist = kwargs['PIDlist']	

	# task function for taskWrapper		
	job_package['task_name'] = kwargs['task_name']
	
	#set step counter
	step = 1		
		
	# iterate through PIDs 	
	for PID in PIDlist:
		time.sleep(.001)
					
		job_package['step'] = step	
		job_package['PID'] = PID

		# fire off async task via taskWrapper		
		result = taskWrapper.delay(job_package)		
		task_id = result.id

		# Set handle in 
		redisHandles.r_job_handle.set("{task_id}".format(task_id=task_id), "FIRED,{PID}".format(PID=PID))
			
		# update incrementer for total assigned
		jobs.jobUpdateAssignedCount(job_num)

		# bump step
		step += 1		

	print "Finished assigning tasks"
Пример #6
0
def obj_loop_taskFactory(**kwargs):

	# create job_package
	job_package = kwargs['job_package']

	# username
	username = job_package['username']

	# get job_num
	job_num = kwargs['job_num']

	# get and iterate through user selectedPIDs			
	PIDlist = kwargs['PIDlist']	

	# task function for obj_loop_taskWrapper		
	job_package['task_name'] = kwargs['task_name']
	
	#set step counter
	step = 1		
		
	# iterate through PIDs 	
	for PID in PIDlist:
		time.sleep(.001)
					
		job_package['step'] = step	
		job_package['PID'] = PID

		# fire off async task via obj_loop_taskWrapper		
		result = obj_loop_taskWrapper.apply_async(kwargs={'job_package':job_package,}, queue=username)
		task_id = result.id

		# Set handle in 
		redisHandles.r_job_handle.set("%s" % (task_id), "FIRED,%s" % (PID))
			
		# update incrementer for total assigned
		jobs.jobUpdateAssignedCount(job_num)

		# bump step
		step += 1		

	print "Finished assigning tasks"
Пример #7
0
def MODSimport_factory(job_package):

	print "FIRING MODSimport_factory"

	# get form data
	form_data = job_package['form_data']	

	# set new task_name, for the worker below
	job_package['custom_task_name'] = 'MODSimport_worker'	

	# get mods:collection 
	if 'upload_data' in job_package:
		with open(job_package['upload_data'], 'r') as fhand:		
			MODS_collection = fhand.read()
	elif form_data['content'] != '':
		MODS_collection = form_data['content'] 

	# shunt each MODS record to list
	MODS_collection = unicode(MODS_collection, 'utf-8')
	XMLroot = etree.fromstring(MODS_collection.encode('utf-8'))	
	MODS_list = XMLroot.findall('{http://www.loc.gov/mods/v3}mods')
	print MODS_list

	# update job info
	redisHandles.r_job_handle.set("job_%s_est_count" % (job_package['job_num']), len(MODS_list))

	# ingest in Fedora
	step = 1
	for MODS_elem in MODS_list:

		print "Loading %s / %s" % (step, len(MODS_list))

		# read <mods:extension><PID>, pass this as PID
		PID_search = MODS_elem.findall("{http://www.loc.gov/mods/v3}extension/PID")
		if len(PID_search) == 0:
			print "Could not find PID, skipping"
			# bump step
			step += 1
			continue
		else:
			PID = PID_search[0].text
			print "FOUND THE PID:",PID

		# write MODS to temp file
		temp_filename = "/tmp/Ouroboros/"+str(uuid.uuid4())+".xml"
		fhand = open(temp_filename,'w')
		fhand.write(etree.tostring(MODS_elem))
		fhand.close()

		job_package['PID'] = PID
		job_package['step'] = step		
		job_package['MODS'] = temp_filename
		
		# fire task via custom_loop_taskWrapper			
		result = actions.actions.custom_loop_taskWrapper.apply_async(kwargs={'job_package':job_package}, queue=job_package['username'])
		task_id = result.id

		# Set handle in Redis
		redisHandles.r_job_handle.set("%s" % (task_id), "FIRED,%s" % (PID))
			
		# update incrementer for total assigned
		jobs.jobUpdateAssignedCount(job_package['job_num'])

		# bump step
		step += 1

	print "Finished firing MODS import workers"
Пример #8
0
def bagIngest_factory(job_package):

	# get form data
	form_data = job_package['form_data']
	if "ingest_type" in form_data:
		ingest_type = form_data['ingest_type']
	else:
		return "No ingest type selected, aborting."

	# set new task_name, for the worker below
	job_package['custom_task_name'] = 'bagIngest_worker'

	
	# Single Ingest Type
	#################################################################
	if ingest_type == "single":

		payload_location = job_package['form_data']['payload_location']
		# create working directory in workspace
		bag_dir = payloadExtractor(payload_location,ingest_type)
		job_package['bag_dir'] = bag_dir

		# set estimated tasks
		print "Antipcating 1 tasks...."	
		redisHandles.r_job_handle.set("job_%s_est_count" % (job_package['job_num']), 1)

		step = 1

		result = actions.actions.custom_loop_taskWrapper.apply_async(kwargs={'job_package':job_package}, queue=job_package['username'])
		task_id = result.id

		# Set handle in Redis
		redisHandles.r_job_handle.set("%s" % (task_id), "FIRED,%s" % (bag_dir))
			
		# update incrementer for total assigned
		jobs.jobUpdateAssignedCount(job_package['job_num'])

		# bump step
		step += 1

		print "Finished firing ingest workers"


	# Multiple Ingest Type
	#################################################################
	if ingest_type == "multiple":

		# extract payload_location
		payload_location = job_package['form_data']['payload_location']

		# create working directory in workspace
		bag_dir = payloadExtractor(payload_location,ingest_type)
		if bag_dir == False:
			print "Aborting"
			return False
		print "Bag dir at this point:",bag_dir

		# all items inside bag_dir	
		bag_dirs_tuple = os.walk(bag_dir).next()

		# dirs
		if len(bag_dirs_tuple[1]) > 0:
			print "Directories detected, continuing"

		# archives
		if len(bag_dirs_tuple[2]) > 0:
			print "Archive files detected. Extracting and continuing."
			for archive in bag_dirs_tuple[2]:
				archive_filename = bag_dirs_tuple[0] + "/" + archive
				print archive_filename

				# extract to temp dir
				tar_handle = tarfile.open(archive_filename)
				tar_handle.extractall(path=bag_dirs_tuple[0])
				os.system("rm %s" % (archive_filename))

			# finally, rewalk
			bag_dirs_tuple = os.walk(bag_dir).next()

		# dirs
		bag_dirs = [ bag_dirs_tuple[0] + "/" + bag_name for bag_name in bag_dirs_tuple[1] ]
		print bag_dirs

		# set estimated tasks
		print "Antipcating",len(bag_dirs),"tasks...."	
		redisHandles.r_job_handle.set("job_%s_est_count" % (job_package['job_num']), len(bag_dirs))

		# iterate through bags
		step = 1
		for bag_dir in bag_dirs:
			print "Ingesting %s / %s" % (step, len(bag_dirs))
			job_package['bag_dir'] = bag_dir
			
			# fire task via custom_loop_taskWrapper			
			result = actions.actions.custom_loop_taskWrapper.apply_async(kwargs={'job_package':job_package}, queue=job_package['username'])
			task_id = result.id

			# Set handle in Redis
			redisHandles.r_job_handle.set("%s" % (task_id), "FIRED,%s" % (bag_dir))
				
			# update incrementer for total assigned
			jobs.jobUpdateAssignedCount(job_package['job_num'])

			# bump step
			step += 1

		print "Finished firing ingest workers"