def get_global_ids(record_type): values = admin.login() output = admin.opentxt() get_ids = requests.get(values[0] + '/' + str(record_type) + '?all_ids=true', headers=values[1]).json() output.write(get_ids)
def delete_do_instance(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: archival_object_uri = row[0] digital_object_uri = row[1] #may or may not need this...could just loop through instances and check for DOs, then delete #OR could find a specific digital object instance I want to delete try: archival_object_json = requests.get(values[0] + archival_object_uri, headers=values[1]).json() instance_list = list(archival_object_json['instances']) if digital_object_uri == '': for instance in instance_list: if instance['instance_type'] == 'digital_object': archival_object_json['instances'].remove(instance) archival_object_data = json.dumps(archival_object_json) archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json() admin.writetxt(txtfile, archival_object_update) print(archival_object_update) else: for instance in instance_list: if 'digital_object' in instance: if instance['digital_object'] == {'ref': digital_object_uri}: archival_object_json['instances'].remove(instance) archival_object_data = json.dumps(archival_object_json) archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json() admin.writetxt(txtfile, archival_object_update) print(archival_object_update) except: txtfile.write('error, could not update ' + str(archival_object_uri)) continue
def etv_ead(): values = admin.login() inputfile = admin.opentxtin() outputfile = admin.opentxt() dirpath = admin.setdirectory() print('Downloading EAD files to directory') for ead_uri in inputfile: print('Retrieving' + str(ead_uri).rstrip()) get_ead = requests.get(values[0] + ead_uri + '.xml?include_unpublished=true', headers=values[1], stream=True).text #Finds URLs with 2-digit repo ids if re.search(r'[0-9]', ead_uri[15]): outfile = admin.openxml(dirpath, ead_uri[39:].rstrip()) outfile.write(str(get_ead).rstrip()) #Others - assumes it's a 1-digit repo id. How many institutions will have more than 99 repositories? else: outfile = admin.openxml(dirpath, ead_uri[38:].rstrip()) outfile.write(str(get_ead).rstrip()) '''the subprocess call cannot take the EAD from AS directly as input. First need to save the file, and then run the transformation over each file''' print('Done!') print('Transforming EAD files to Yale Best Practices guidelines') filelist = os.listdir(dirpath) os.makedirs(dirpath + '/outfiles') for file in filelist: #finds all the EAD files in the working directory if file[-3:] == 'xml': #haven't changed the hard coding of the command or the xsl file yet subprocess.run([ "java", "-cp", "/usr/local/Cellar/saxon/9.8.0.4/libexec/saxon9he.jar", "net.sf.saxon.Transform", "-s:" + dirpath + '/' + file, "-xsl:" + dirpath + "/transformations/yale.aspace_v112_to_yalebpgs.xsl", "-o:" + dirpath + '/outfiles/' + file[:-4] + "_out.xml" ]) '''next we need to validate each output file against the EAD 2002 schema and the local Yale schematron''' print('Done!') print('Validating transformations against EAD 2002 and Schematron schemas') newfilelist = os.listdir(dirpath + '/outfiles') for outfile in newfilelist: subprocess.Popen([ "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar", "-s", dirpath + "/transformations/yale.aspace.ead2002.sch", dirpath + '/outfiles/' + outfile ], stdout=outputfile, stderr=subprocess.PIPE, encoding='utf-8') subprocess.Popen([ "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar", dirpath + '/outfiles/' + outfile ], stdout=outputfile, stderr=subprocess.PIPE, encoding='utf-8') print('All Done! Check outfile for validation report')
def get_enumerations(): values = admin.login() output = admin.opentxt() enumerations = requests.get(values[0] + '/config/enumerations?all_ids=true', headers=values[1]).json() json.dump(enumerations, output, indent=4, separators=(',', ':')) output.close()
def export_ead(repo_id, resource): values = admin.login() output = admin.opentxt() get_ead = requests.get(values[0] + '/repositories/' + str(repo_id) + '/resource_descriptions/' + str(resource) + '.xml?include_unpublished=true', headers=values[1], stream=True).text output.write(str(get_ead)) output.close()
def get_rids(): csvfile = admin.opencsv() txtinput = admin.opentxtin() txtoutput = admin.opentxt() new_dict = {} for row in csvfile: new_dict[row[0]] = row[1] for line in txtinput: line = line.rstrip() new_list = [k for k, v in new_dict.items() if line == v][0] txtoutput.write('/repositories/12/resource_descriptions/' + new_list + '\n')
def delete_records(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: record_uri = row[0] try: record_json = requests.get(values[0] + record_uri, headers=values[1]).json() record_data = json.dumps(record_json) delete = requests.delete(values[0] + record_uri, headers=values[1], data=record_data).json() admin.writetxt(txtfile, delete) print(delete) except: txtfile.write('error, could not delete ' + str(record_uri)) continue
def get_global_json(record_type): values = admin.login() output = admin.opentxt() get_ids = requests.get(values[0] + '/' + str(record_type) + '?all_ids=true', headers=values[1]).json() # print(get_ids) x = 0 for i in get_ids: x = x + 1 resource = requests.get(values[0] + '/' + str(record_type) + '/' + str(i), headers=values[1]).json() json.dump(resource, output, indent=4, separators=(',', ':')) print('Dumping ' + str(x) + ' of ' + str(len(get_ids))) output.close()
def create_singlepart_notes(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: record_uri = row[0] note_text = row[1] note_type = row[2] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() new_note = {'jsonmodel_type': 'note_singlepart', 'content': [note_text], 'type': note_type} record_json['notes'].append(new_note) record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() admin.writetxt(txtfile, record_update) print(record_update)
def delete_notes(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: resource_uri = row[0] persistent_id = row[1] resource_json = requests.get(values[0] + resource_uri, headers=values[1]).json() for key, valuelist in resource_json.items(): if key == 'notes': for note in valuelist: newdict = {k:v for k,v in note.items()} for key, value in newdict.items(): if value == persistent_id: note.clear() resource_data = json.dumps(resource_json) resource_update = requests.post(values[0] + resource_uri, headers=values[1], data=resource_data).json() admin.writetxt(txtfile, resource_update) print(resource_update)
def create_multipart_notes(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: record_uri = row[0] note_text = row[1] note_type = row[2] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() new_note = {"jsonmodel_type": "note_multipart", "subnotes": [{'content': note_text, 'jsonmodel_type': 'note_text', 'publish': True}], 'type': note_type, 'publish': True} try: record_json['notes'].append(new_note) except: print('note did not append') record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() admin.writetxt(txtfile, record_update) print(record_update)
def update_subrecord_components(subrecord, *field): starttime = time.time() values = admin.login() csvfile = admin.opencsvdict() txtout = admin.opentxt() x = 0 y = 0 for row in csvfile: x = x + 1 record_uri = row['uri'] try: record_json = requests.get(values[0] + record_uri, headers=values[1]).json() for f in field: for key, value in row.items(): if f == key: if key == 'repository': record_json[subrecord][0][key] = {'ref': value} else: #this needs the position because it doesn't update every one, just the first. #all the more reason we need IDs for this stuff. record_json[subrecord][0][key] = value record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update) if 'status' in record_update.keys(): y = y + 1 if 'error' in record_update.keys(): txtout.write('error: could not update ' + str(record_uri) + '\n') txtout.write('log: ' + str(record_update.get('error')) + '\n') except: txtout.write('could not locate object ' + str(record_uri)) continue elapsedtime = time.time() - starttime m, s = divmod(elapsedtime, 60) h, m = divmod(m, 60) txtout.write('Total time elapsed: ') txtout.write('%d:%02d:%02d' % (h, m, s)) txtout.write('\n' + 'Total update attempts: ' + str(x) + '\n') #add count of successful updates to log file txtout.write('Records updated successfully: ' + str(y) + '\n') txtout.close()
def replace_note_by_id(): #replaces a note's content in ArchivesSpace using a persistent ID values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: resource_uri = row[0] persistent_id = row[1] note_text = row[2] resource_json = requests.get(values[0] + resource_uri, headers=values[1]).json() for note in resource_json['notes']: if note['jsonmodel_type'] == 'note_multipart': if note['persistent_id'] == persistent_id: note['subnotes'][0]['content'] = note_text elif note['jsonmodel_type'] == 'note_singlepart': if note['persistent_id'] == persistent_id: note['content'] = [note_text] resource_data = json.dumps(resource_json) resource_update = requests.post(values[0] + resource_uri, headers=values[1], data=resource_data).json() admin.writetxt(txtfile, resource_update) print(resource_update)
def create_rights_restrictions(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: record_uri = row[0] persistent_id = row[1] begin = row[2] end = row[3] local_type = row[4] note_type = row[5] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() new_restriction = {'begin': begin, 'end': end, 'local_access_restriction_type': [local_type], 'restriction_note_type': note_type, 'jsonmodel_type': 'rights_restriction'} for note in record_json['notes']: if note['persistent_id'] == persistent_id: note['rights_restriction'] = new_restriction record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() admin.writetxt(txtfile, record_update) print(record_update)
def update_subrecord_component(subrecord, component): starttime = time.time() values = admin.login() csvfile = admin.opencsv() txtout = admin.opentxt() x = 0 y = 0 for row in csvfile: x = x + 1 resource_uri = row[0] updated_text = row[1] try: resource_json = requests.get(values[0] + resource_uri, headers=values[1]).json() #This doesn't need the position because it will update any value...careful! for date in resource_json[subrecord]: date[component] = updated_text resource_data = json.dumps(resource_json) resource_update = requests.post(values[0]+ resource_uri, headers=values[1], data=resource_data).json() print(resource_update) if 'status' in resource_update.keys(): y = y + 1 if 'error' in resource_update.keys(): txtout.write('error: could not update ' + str(resource_uri) + '\n') #this isn't working txtout.write('log: ' + str(resource_update.get('error')) + '\n') except: txtout.write('could not locate object ' + str(resource_uri) + '\n') continue elapsedtime = time.time() - starttime m, s = divmod(elapsedtime, 60) h, m = divmod(m, 60) txtout.write('Total time elapsed: ') txtout.write('%d:%02d:%02d' % (h, m, s)) txtout.write('\n' + 'Total update attempts: ' + str(x) + '\n') #add count of successful updates to log file txtout.write('Records updated successfully: ' + str(y) + '\n') txtout.close()