Python read_json示例，invenio.websubmit_functions.Websubmit_Helpers_hgf.read_json Python示例

示例#1

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def insert_inst_into_980(curdir,uid):
	"""collection handling for institutes"""
	user_groups = get_usergroups(uid)
	if check_field_exists(curdir,"hgf_9201_"):
		if read_file(curdir,"hgf_9201_") == "[]": remove_file(curdir,"hgf_9201_") # delete file in case of empty sequence! TODO: this should not happen and has to be fixed in hgfInstitutes.js

	if not check_field_exists(curdir,"hgf_9201_"): #make sure that we have at least one institute
		if str(uid) == "1": return #do not add institutes for admin
		user_insts = extract_user_institutes("0",user_groups)
		if user_insts == []:
			email_txt = "%s is not assigned to any institute. This email was generated from Covert_hgf_fields and function insert_inst_into_980" %get_recordid(curdir)
			send_email(CFG_SITE_ADMIN_EMAIL, CFG_SITE_ADMIN_EMAIL, "ERROR: no institute assigned", email_txt,header="",html_header="")
			return #this should not happen!
		jsondict = user_insts   #add institute even if no institute chosen to be inserted into 980
	else:
		jsondict = read_json(curdir,"hgf_9201_")
	inst_list = []
	list_980 = read_json(curdir,"hgf_980__")
	
	for inst in jsondict:
		if {"a":inst["0"]} in list_980: continue
		inst_list.append({"a":inst["0"]})
	if inst_list == []: return
	list_980 += inst_list

	#check if users institut in 980, if not take it from user_info
	if str(uid) == "1": pass # no adding of institutes into 980  for admin
	else:
		str_list_980 = [str(i) for i in list_980] #convert list with dicts into list with str(dicts), because python sets use list with strings
		intersection_groups = set(str_list_980) & set(user_groups) # user institute not in 980 yet
		intersection_vdb = set(["{'a': 'VDB'}", "{'a': 'VDBRELEVANT'}","{'a': 'VDBINPRINT'}"]) & set(str_list_980) # not vdb_relevant

		if intersection_groups == set([]) and  intersection_vdb == set([]): # # prevent submitting vdb irrelevant stuff for another institute
			list_980 += extract_user_institutes("a",user_groups)
	write_json(curdir,"hgf_980__",list_980)

示例#2

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def handle_0247(curdir):
	""" Handle persistend identifiers in 0247_. This implies to set $2
	to source and $a to value. only in case of user input

	Note: if we get new PIDs that should be handled we need to adopt
	this function!"""

	if check_field_exists(curdir,"hgf_0247_"):
		listdict_ = read_json(curdir,"hgf_0247_")

	else: listdict_ = []

	if check_field_exists(curdir,"hgf_0247_a2pat"): # Patent
		text = read_file(curdir,"hgf_0247_a2pat")
		listdict_.append({"2":"Patent","a":text})
	if check_field_exists(curdir,"hgf_0247_a2urn"): # URN
		text = read_file(curdir,"hgf_0247_a2urn")
		listdict_.append({"2":"URN","a":text})
	if check_field_exists(curdir,"hgf_773__a"):     # store DOI in both 773__ and in 0247, this is an input field
		text = read_file(curdir,"hgf_773__a")
		listdict_.append({"2":"doi","a":text})
	if (not check_field_exists(curdir,"hgf_773__a") and check_field_exists(curdir,"hgf_773__")): # doi can be stored in 773__ as json array
		dict_773 = read_json(curdir,"hgf_773__")
		for ent in dict_773: #more then 1 DOI
			if not "a" in ent.keys(): continue
			listdict_.append({"2":"doi","a":ent["a"]})

	if listdict_ == []: return

	new_listdict = []
	for dict in listdict_:
		if dict in new_listdict: continue # remove double entries
		new_listdict.append(dict)
	write_json(curdir,"hgf_0247_",new_listdict)

	#Insert DOI into 773__a only in case no 773__a or 773 json array exist
	if check_field_exists(curdir,"773__a"):     return #we have a 773__a

	if check_field_exists(curdir,"773__"):
		listdict_773 = read_json(curdir,"773__")
		for ent in listdict_773:
			if ent.has_key("a"): return # we have a 773__a

	for ent in new_listdict:
		if not ent.has_key("2"):     continue
		if not (ent["2"] == "doi"):  continue
		# map doi into 773__a

		# write DOI in 773__a if we do not yet have one.
		# in case of multiple DOIs the first one will win <--> we cannot
		# write the 773__ because we do not know if other 773__* fields
		# has been inputted and to which belongs the DOI. TODO!
		write_file(curdir,"hgf_773__a",ent["a"])

	return

示例#3

0

显示文件

文件： Prefill_hgf_fields.py 项目： pazera/hgf-invenio

def handle_institutes_for_modify(curdir):
	"""in case of non-vdb entry we do not have 9201_, but the institutes are stored in 980 as collections. so, prefill the 9201_ from 980"""
	_980 = read_json(curdir,"hgf_980__")
	jsondict_9201 = []
	if check_field_exists(curdir,"hgf_9201_"):
		jsondict_9201 = read_json(curdir,"hgf_9201_")
	if jsondict_9201 == []:
		
		inst_dict = {"9201_":[]}
		for _dict in _980: 
			if not "a" in _dict.keys(): continue
			if len(perform_request_search(p='id:"' + _dict["a"] +'"' ,cc='Institutes')) < 1 : continue #make sure institute exists
			inst_dict["9201_"].append({"0":_dict["a"]})
		write_json(curdir,"hgf_9201_",inst_dict["9201_"])

示例#4

0

显示文件

文件： Create_hgf_record_json.py 项目： pazera/hgf-invenio

  	def add_jsondict(self,fieldname):
		"""add field structure (json structure)"""
		marcfield = fieldname.replace("hgf_","")
		jsondict = {marcfield:read_json(self.curdir,fieldname)}
		for key in jsondict.keys():
			#  TODO Check if this code is really obsolete
			# Should be solved by our new initial ordering of the fields.
			#
			#if key in self.data.keys():
				#datafield already exists
			#	if isinstance(jsondict[key],list):
			#		if len(jsondict[key]) != 1: # repeatable field
			#			self.data[key] = jsondict[key]

			#			return # jsondict has repeatable field, but we cannot add a single field to repeatable fields --> we delete the single input field and override it with json-structure
			#		else:  #non repeatable
			#			if len(self.data[key]) != 1: #we have repeatable non-json field-->override with json structure
			#				self.data[key] = jsondict[key]
			#			else:
			#				jsondict[key][0].update(self.data[key][0]) #merge non-json with json structure
			#				self.data[key] = jsondict[key][0] # add all fields to dictionary
			#else:
				if isinstance(jsondict[key],list): self.data[key] = jsondict[key] #value is already a list
				elif isinstance(jsondict[key],str): #value is string
					dta =  eval(jsondict[key]) #TODO: eval is evil!
					if isinstance(dta,list): self.data[key] = dta #value is a list in a string
					else: self.data[key] = [dta] #value is just a string
				else: return

示例#5

0

显示文件

文件： Prefill_hgf_fields.py 项目： pazera/hgf-invenio

def prefill_gender(curdir):
	"""prefill gender. normally the radio,checkboxes are prefilled by the Create_modify_interface_hgf, but that field is part of hgf_1001_ technical field, which is by default not a checkbox or radio botton"""
	if not check_field_exists(curdir,"hgf_1001_"): return # no author
	jsonlist = read_json(curdir,"hgf_1001_")
	
	for jsondict in jsonlist: # there can be only one first author --> len(jsonlist ==1)
		if not jsondict.has_key("g"): continue
		write_file(curdir,"hgf_1001_g",jsondict["g"])
		
		#delete gender information from technical field and store info in 1001_g
		del jsondict["g"]
		write_json(curdir,"hgf_1001_",[jsondict])

示例#6

0

显示文件

文件： Prefill_hgf_fields.py 项目： pazera/hgf-invenio

def prefill_245(curdir):
	"""prefill 245__a and 245__f as simple input fields. only 245__h (publication form) into 245__"""
	jsonlist = read_json(curdir,"hgf_245__")
	pubforms = []
	for jsondict in jsonlist:
		if jsondict.has_key("h"): 
			pubformnames = jsondict["h"].split(",") #split multiple pubforms
			for i,pubform in enumerate(pubformnames): pubforms.append({"h":pubform,"x":i}) 
		for key in jsondict.keys(): #we have some Input fields
			filename = "hgf_245__" + key
			# encode jsondict[key] to utf-8 to handle utf-chars.
			# TODO Tomek, why does it seem that we need this only here and
			# not for any other field?
			write_file(curdir,filename,jsondict[key].encode('utf-8'))
	write_json(curdir,"hgf_245__",pubforms)

示例#7

0

显示文件

文件： Prefill_hgf_fields.py 项目： pazera/hgf-invenio

def handle_url(curdir):
	if check_field_exists(curdir,"hgf_8564_"): 
		remove_file(curdir,"hgf_8564_u")
		
		jsondict_list = read_json(curdir,"hgf_8564_")
		#only one URL can be submitted/modified. bibedit urls die ;)
		for i in jsondict_list:
			if not i.has_key("u"): continue # no subfield u detected
			if CFG_SITE_URL in i["u"]: continue # skip internal file
			write_file(curdir,"hgf_8564_u",i["u"])
			remove_file(curdir,"hgf_8564_")
			return # write only one URL
	if check_field_exists(curdir,"hgf_8564_u"):
		text = read_file(curdir,"hgf_8564_u")
		if CFG_SITE_URL in text: remove_file(curdir,"hgf_8564_u") #skip internal file

示例#8

0

显示文件

文件： Create_hgf_collection.py 项目： pazera/hgf-invenio

def filter_980(curdir):
	if not check_field_exists(curdir,"hgf_980__"): return []
	coll_list = read_json(curdir,"hgf_980__")
	doctype_tuples = get_docid_docname_alldoctypes()
	doctype_collections = []
	[doctype_collections.append(tup[0]) for tup in doctype_tuples]
	filter_collections = doctype_collections + get_technical_collections()
	json_filter_collections = [{"a":collection} for collection in filter_collections]
	old_collections = []
	for coll in coll_list:
		if not coll.has_key("a"): continue #drop all "foreign" collections
		if coll in json_filter_collections: continue #drop workflow collections
		if len(perform_request_search(p='id:"' + coll["a"] +'"' ,cc='Institutes')) >0: continue #drop institutes
		old_collections.append(coll)
	return old_collections

示例#9

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def handle_980(curdir):
	new_list = []
	list_980 = read_json(curdir,"hgf_980__")
	doctype = read_file(curdir,"doctype")

	old_index = list_980.index({"a":doctype})

	list_980.insert(0, list_980.pop(old_index)) #move original doctype to be first entry in 980 list, needed by invenio (more likely a bug)
	#remove double entries
	for _dict in list_980:
		if _dict in new_list: continue # remove double entries
		new_list.append(_dict)

	if check_field_exists(curdir,"hgf_delete"): new_list.append({"c":"DELETED"}) # user wants to delete this record
	new_list = set_restriction(new_list) # #set UNRESTRICTED if 980 collection appears in CFG_PUBLIC_COLLECTIONS
	write_json(curdir,"hgf_980__",new_list)

示例#10

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def handle_1001(curdir):
	"""add gender to 1001_ technical field:
		1001_ contains a list of a single dict with the name of the first
		author. gender should be applied to that one (we use gender only
		for phd-like entries), so we add it to the end of the dict.

		NOTE: for further processing the newly written technical field
		must not contain a real JSON structure, but again only this list
		of a single hash.
	"""
	if not check_field_exists(curdir,"hgf_1001_g"): return  # no gender set

	if check_field_exists(curdir,"hgf_1001_"):
		jsondict = read_json(curdir,"hgf_1001_")
		gender = read_file(curdir,"hgf_1001_g")
		jsondict[0]["g"] = gender            # 100 contains only one person
		write_json(curdir,"hgf_1001_",jsondict)

示例#11

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def add_FFT(curdir):
	"""
	!!!move_files_to_storage, move files to done have to be deleted from websubmit function!!!
	add FFT tag into record
	if this function is used: the functions stamp_uploaded_files should not be used in the websubmit anymore
	"""
	if not check_field_exists(curdir,"hgf_file"): return None # no file submitted
	fulltext_filename = read_file(curdir,"hgf_file")
	fulltext_path = os.path.join(curdir,"files","hgf_file",fulltext_filename)
	if not os.path.exists(fulltext_path): return None # no file named in hgf_file in files directory. something is wrong..
	if os.path.getsize(fulltext_path) == 0: #check file size
		#send email
		#first get the url record link
		if not check_field_exists(curdir,"SN"): return None # no recid-->something is wrong..
		recid = get_recordid(curdir)
		rec_url = CFG_SITE_URL + "/record/" + recid
		#create email
		email_txt = 'Dear Sir or Madam, \n\nAn empty file has been submitted for the record: %s\n\nProbably it was caused, because the file has been deleted from its directory before final submission into %s !!!\nIt is possible, that the record itself is not available, when this email was sent, but it should be processed within minutes. Once this is finished you may add the fulltext by accessing %s and using "modify record" link \n\n' %(rec_url,CFG_SITE_NAME,rec_url)
		email_subject = 'File submission incomplete!!!'
		#email check
		if check_field_exists(curdir,"SuE"): email_to = read_file(curdir,"SuE") # get email from submitter
		else: email_to = CFG_SITE_ADMIN_EMAIL # in case somehow no email of submitter exists, send email to admin

		send_email(CFG_SITE_ADMIN_EMAIL, email_to, email_subject, email_txt,copy_to_admin=CFG_WEBSUBMIT_COPY_MAILS_TO_ADMIN,header="",html_header="")
		return None #cancel file submission (the submitter has already been informed via email), the original submission will be processed.


	inst_dict_list = read_json(curdir,"hgf_9201_") #read in institutes
	inst_list = []
	restriction = "firerole: allow groups 'STAFF'" # staff is always
	# add the institutes id and append the external auth info as this
	# builds the actual group name we need to allow here.
	for inst in inst_dict_list:	restriction += ",'" + inst["0"] + ' ['+CFG_EXTERNAL_AUTH_DEFAULT+']' + "'"  # TODO: multiple authentifications
	filename = read_file(curdir,"hgf_file")
	file_path = os.path.join(curdir,"files","hgf_file",filename)
	if not check_field_exists(curdir,"rn"): return
	rn = read_file(curdir,"rn")

	#fill subfields for FFT
	fft_dict = {}
	fft_dict["a"] = file_path
	fft_dict["n"] = rn
	fft_dict["r"] = restriction
	write_json(curdir,"hgf_FFT__",fft_dict)

示例#12

0

显示文件

文件： Prefill_hgf_fields.py 项目： pazera/hgf-invenio

def prefill_0247(curdir):
	"""prefill URN, Patent"""
	if check_field_exists(curdir,"hgf_0247_"): #json structure
		jsonlist = read_json(curdir,"hgf_0247_")
		for jsondict in jsonlist:
			if not jsondict.has_key("2"): continue
			if jsondict["2"] == "Patent":
				write_file(curdir,"hgf_0247_a2pat",jsondict["a"])
			elif jsondict["2"] == "URN":	
				write_file(curdir,"hgf_0247_a2urn",jsondict["a"])
	
	if check_field_exists(curdir,"hgf_0247_2"):
		if check_field_exists(curdir,"hgf_0247_a"):
			subfield_2 = read_file(curdir,"hgf_0247_2")
			subfield_a = read_file(curdir,"hgf_0247_a")
			if subfield_2 == "Patent":
				write_file(curdir,"hgf_0247_a2pat",subfield_a)
			elif subfield_2 == "URN":
				write_file(curdir,"hgf_0247_a2urn",subfield_a)

示例#13

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def handle_245(curdir):
	"""245__a: title and 245__f:date  -->text input fields
	 245__h:publication form --> autosuggest
	 We need to read in 245__ (if exists) and add 245__a and 245__f in json format
	"""
	date,title = "",""
	# Title is special: we have non-structured input fields by default
	# where $f (date) needs a special handling plus we have a structured
	# input field from the possible token input of media type (AC's
	# request) => we have to assemble the structured field from it's
	# parts, and then re-store it as structure to a file, then the
	# follwoing workflow can transparently handle it as if it was passed
	# by a structure in the first place.


	# Get unstructured stuff
	if check_field_exists(curdir,"hgf_245__a"): title = read_file(curdir,"hgf_245__a")
	if check_field_exists(curdir,"hgf_245__f"): date = read_file(curdir,"hgf_245__f")

	# Initialize the structure
	jsondict = {}
	jsondict['245__'] = {}
	dict = {}
	# Try to get what we have already in the structure as such
	if check_field_exists(curdir,"hgf_245__"):
		jsondict = read_json(curdir,"hgf_245__")
	# in case of multiple publication forms (???, should be non repeatable, but just in case: create seperated comma string)
	pubforms = []
	for pubform in jsondict:
		if 'h' in pubform:
			pubforms.append(pubform["h"])
	pubstring = ", ".join(pubforms)
	if pubstring == "": jsondict = {}
	else: jsondict = {"h":pubstring}
	# Add unstructured fields, if they exist
	if not title == "": jsondict["a"] = title
	if not date  == "": jsondict["f"] = date
	# Write the full structured file
	write_json(curdir,"hgf_245__",jsondict)

示例#14

0

显示文件

文件： Convert_hgf_fields.py 项目： pazera/hgf-invenio

def insert_3367(curdir):
	"""get doctype from authorities and create 3367 and set our ddoctypes into 980 """
	doctype = read_file(curdir,"doctype")
	access = read_file(curdir,"access") #submission id
	subtype = ''
	try:
		# Check if we have a refinement of the doctype. Usually we have
		# this only for talks which could be "Invited" or whatever. If so,
		# add it to 3367_$x
		subtype = read_file(curdir,"hgf_3367_x")
	except:
	  # Usually, we do not have refinements.
		pass
	doctype_dict = get_pubtype_info(doctype)
	if doctype_dict == {}: 
		doctype_dict_list = [{"m":doctype}]   #no authority
	# Run over the dictionary and build up a list of all document types.
	# Note that not all document types have to be hgf-types, they may as
	# well stem from other vocabularies (DINI/DRIVER...)
	else:
		doctype_dict_list = handle_list_of_doctype_dict(doctype_dict,access,doctype,subtype)
		doctype_dict_list = add_reportdoctype(curdir,doctype, doctype_dict_list) #add intrep doctype
		doctype_dict_list = add_journaldoctype(curdir,doctype, doctype_dict_list) #add journal doctype
		doctype_dict_list = add_bookdoctype(curdir,doctype, doctype_dict_list) #add book doctype
		doctype_dict_list = add_procdoctype(curdir,doctype, doctype_dict_list) #add proc doctype
	if check_field_exists(curdir,"hgf_980__"):
		list_980 = read_json(curdir,"hgf_980__")

	else: list_980 = []
	# Only add our own doctypes to 980 (ie collections and not DINI/DRIVER)
	for dict in doctype_dict_list:
		try:
			if {"a":dict["m"]} in list_980: continue
			list_980.append({"a":dict["m"]})
		except:
			pass
	write_json(curdir,"hgf_980__",list_980)
	write_json(curdir,"hgf_3367_",doctype_dict_list)