def main(): client = ASnakeClient(baseurl='XXXX', username='******', password='******') client.authorize() changes = { 'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'], 'cubic_feet': ['Cubic Feet'], 'gigabytes': ['Gigabytes'] } res_records = (client.get('repositories/2/resources', params={'all_ids': True})).json() found_records = set([]) for record in tqdm(res_records): rec_uri = 'repositories/2/resources/{0}'.format(record) res_record = client.get(rec_uri).json() updated_record = deepcopy(res_record) try: extents = res_record['extents'] for ext_index, extent in enumerate(extents): for key, value in changes.items(): if extent['extent_type'] in value: updated_record['extents'][ext_index][ 'extent_type'] = key break else: pass if res_record['extents'] != updated_record['extents']: response = client.post(rec_uri, json=updated_record) if response.status_code == 200: logger.info('Extent change successfully pushed', rec=record, response=response) found_records.add(record) else: logger.info('Extent change failed', rec=record, response=response) else: pass except: pass print('{0} resource records checked; {1} records updated.'.format( len(res_records), len(found_records)))
types.append(child_type) unknown_count += 1 for indicator in indicators: try: del instance["sub_container"][indicator] except Exception as e: print("There was an error when deleting the unknown indicator: {}".format(e)) print(instance) for child_type in types: try: del instance["sub_container"][child_type] except Exception as e: print("There was an error when deleting the unknown child/grandchild type: {}".format(e)) print(instance) if indicators and types: update_ao = client.post(node.uri, json=ao_json).json() print(update_ao) else: indicators = [] types = [] for key, value in instance.items(): if "indicator_" in key: if "unknown container" == value: child_type = "type_" + str(key[-1]) indicators.append(key) types.append(child_type) unknown_count += 1 for indicator in indicators: try: del instance[indicator] except Exception as e:
# print instructions print( 'This script takes viafCorporateResults.csv and posts the organizations as corporate_entities to ArchivesSpace.' ) input('Press Enter to continue...') # This is where we connect to ArchivesSpace. client = ASnakeClient() client.authorize() # login, using default values targetFile = 'viafCorporateResults.csv' csv = csv.DictReader(open(targetFile)) orgList = [] for row in csv: orgRecord = {} # changed this since ASpace doesn't come with 'viaf' as an option for source out of the box. source = 'naf' if row.get('lc') is not None else 'local' orgRecord['names'] = [{ 'primary_name': row['result'], 'sort_name': row['result'], 'source': source, 'authority_id': row['lc'] }] post = client.post('/agents/corporate_entities', json=orgRecord).json() print(post, '\n') print("Check out your instance of ArchivesSpace to see what's new.")
def main(ID, path=None, accession=None): if path == None: if not os.path.isdir(defaultPath): raise Exception("ERROR: default path " + defaultPath + " does not exist.") path = os.path.join(defaultPath, ID) if not os.path.isdir(path): raise Exception("ERROR: no " + ID + " directory exists for ingest in " + defaultPath) else: if not os.path.isdir(path): raise Exception("ERROR: " + str(path) + " is not a valid path.") print("Reading " + path) if accession == None: print("Building SIP...") SIP = SubmissionInformationPackage() SIP.create(ID) SIP.package(path) print("SIP " + SIP.bagID + " created.") else: print("Reading accession " + accession) import asnake.logging as logging from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() logging.setup_logging(stream=sys.stdout, level='INFO') call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}" accessionResponse = client.get(call).json() if len(accessionResponse["results"]) < 1: raise Exception("ERROR: Could not find accession with ID: " + accession) else: accessionObject = json.loads( accessionResponse["results"][0]["json"]) if "id_1" in accessionObject.keys(): accessionID = accessionObject["id_0"] + "-" + accessionObject[ "id_1"] if accession != accessionID: raise Exception( "ERROR: Could not find exact accession with ID: " + accession) if not "content_description" in accessionObject.keys(): raise Exception("ERROR: no content description in " + accessionID + " accession, " + accessionObject["uri"]) if len(accessionObject["related_resources"]) < 1: raise Exception("ERROR: no related resource for " + accessionID + " accession, " + accessionObject["uri"]) else: resource = client.get( accessionObject["related_resources"][0]["ref"]).json() creator = resource["title"] if not ID.lower() == resource["id_0"].lower(): raise Exception("ERROR: accession " + accessionID + " does not link to collection ID " + ID + ". Instead linked to " + resource["id_0"]) description = accessionObject["content_description"] print("Building SIP...") SIP = SubmissionInformationPackage() SIP.create(ID) SIP.package(path) print("SIP " + SIP.bagID + " created.") SIP.bag.info["Accession-Identifier"] = accessionID SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"] SIP.bag.info["Records-Creator"] = creator SIP.bag.info["Content-Description"] = description if "condition_description" in accessionObject.keys(): SIP.bag.info["Condition-Description"] = accessionObject[ "condition_description"] if "provenance" in accessionObject.keys(): SIP.bag.info["Provenance"] = accessionObject["provenance"] if "general_note" in accessionObject.keys(): SIP.bag.info["General-Note"] = accessionObject[ "general_note"] SIP.bag.info["Source-Location"] = path SIP.bag.info[ "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py" print("Writing checksums...") SIP.bag.save(manifests=True) print("SIP Saved!") # List files in txt for processing print("(not) Listing files for processing...") #listFiles(ID) if accession == None: SIP.extentLog( "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx" ) print("Logged ingest to DigitizationExtentTracker.") else: print("Updating accession " + accessionID) if "disposition" in accessionObject.keys(): accessionObject["disposition"] = accessionObject[ "disposition"] + "\n" + str(SIP.bagID) else: accessionObject["disposition"] = str(SIP.bagID) totalSize = SIP.size() inclusiveDates = SIP.dates() extent = { "jsonmodel_type": "extent", "portion": "whole", "number": str(totalSize[0]), "extent_type": str(totalSize[1]) } extentFiles = { "jsonmodel_type": "extent", "portion": "whole", "number": str(totalSize[2]), "extent_type": "Digital Files" } if inclusiveDates[0] == inclusiveDates[1]: date = { "jsonmodel_type": "date", "date_type": "inclusive", "label": "creation", "begin": inclusiveDates[0], "expression": inclusiveDates[0] } else: date = { "jsonmodel_type": "date", "date_type": "inclusive", "label": "creation", "begin": inclusiveDates[0], "end": inclusiveDates[1] } if "extents" in accessionObject.keys(): accessionObject["extents"].append(extent) accessionObject["extents"].append(extentFiles) else: accessionObject["extents"] = [extent, extentFiles] accessionObject["dates"].append(date) updateAccession = client.post(accessionObject["uri"], json=accessionObject) if updateAccession.status_code == 200: print("\tSuccessfully updated accession " + accessionID) else: print(updateAccession.text) print("\tERROR " + str(updateAccession.status_code) + "! Failed to update accession: " + accessionID) return SIP
import json, csv, runtime from asnake.client import ASnakeClient # print instructions print( 'This script replaces existing fauxcodes with real barcodes (linked in a separate csv file) in ArchivesSpace.' ) input('Press Enter to connect to ArchivesSpace and post those barcodes...') # This is where we connect to ArchivesSpace. See authenticate.py client = ASnakeClient() client.authorize() # open csv and generate dict reader = csv.DictReader(open('barcodes.csv')) # GET each top_container listed in top_containers and add to records print('The following barcodes have been updated in ArchivesSpace:') for row in reader: uri = row['uri'] container = client.get(uri).json() container['barcode'] = row['real'] post = client.post(uri, json=container).json() print(post)
title=str(row['Title']) identifier=str(row['Identifier']) urlRaw=str(row['url'] # Have to clean the URL url=urlRaw.replace("/api/items","/items/show") # print(title+identifier+url) # file_version = { # "jsonmodel_type":"digital_object", # "file_uri":url, # "is_representative":False, # "caption":title+" ["+url+"]", # "use_statement":"Image-Service", # "publish":True} data = { "jsonmodel_type":"digital_object", "file_versions": [{ "jsonmodel_type":"file_version", "file_uri":url, "is_representative":False, "caption":title+" ["+url+"]", "use_statement":"Image-Service", "publish":True}], "digital_object_id":identifier, "title":title} r = client.post('repositories/3/digital_objects', json.dumps(data)) print(json.dumps(data))
idList = [] for aID in accessions: entry = client.get("repositories/2/accessions/" + str(aID)).json() if entry["accession_date"].split("-")[0] == "2019": idList.append(int(entry["id_1"])) newID = max(idList) + 1 if len(str(newID)) == 1: newID = "00" + str(newID) elif len(str(newID)) == 2: newID = "0" + str(newID) print ("Creating new accession " + year + "-" + str(newID) + "...") newAccession["id_0"] = year newAccession["id_1"] = str(newID) accessionID = year + "-" + str(newID) updateAccession = client.post("repositories/2/accessions", json=newAccession) if updateAccession.status_code == 200: print ("\tSuccessfully updated accession " + newAccession["id_0"] + "-" + newAccession["id_1"]) else: print ("\tERROR " + str(updateAccession.status_code) + "! Failed to update accession: " + newAccession["id_0"] + "-" + newAccession["id_1"]) print ("Waiting for new accession to be indexed...") time.sleep(120) print ("Ingesting records at " + path) arrangementSwitch = False arrangementsPath = "/media/Masters/Archives/arrangements" arrangements = os.path.join(arrangementsPath, args.ID.upper()) if os.path.isdir(arrangements): arrangementSwitch = True
# Remove normalized end date try: if date['end']: print('normalized end date exists; will be removed') date.pop('end') update = True except KeyError: print('no normalized end date') print() print('Dates will be updated to:') print(date) print() if update: update = client.post(collectJson['uri'], json=collectJson) print(update.status_code) # Print all dates num = collectJson['uri'].replace('/repositories/2/resources/', '') collection = repo.resources(num) for date in collection.dates: date_type = '' date_expression = '' date_begin = '' date_end = '' try: date_type = date.date_type date_expression = date.expression date_begin = date.begin
import json import time from asnake.client import ASnakeClient startTime = time.time() client = ASnakeClient() client.authorize() records = json.load(open('all_AOs.json')) for record in records: post = client.post('/repositories/3/archival_objects', json=record).json() print(post) elapsedTime = time.time() - startTime m, s = divmod(elapsedTime, 60) h, m = divmod(m, 60) print('Total script run time: ', '%d:%02d:%02d' % (h, m, s))
from asnake.client import ASnakeClient from secrets import * as_username = input("ArchivesSpace username: "******"ArchivesSpace password: "******"repositories").json() print("Publishing Digital Objects...", end='', flush=True) for repo in repos: digital_object = {} dig_objs_per_repo = [] repo_digital_objects = client.get(repo["uri"] + "/digital_objects?all_ids=true").json() for dig_obj_id in repo_digital_objects: object_request = repo["uri"] + "/digital_objects/" + str(dig_obj_id) + "/publish" try: client.post(object_request) except Exception as e: print("Error found when requesting id: " + str(e) + "\n" + object_request) # digital_object[dig_obj_id] = client.get(repo["uri"] + "/digital_objects/" + str(dig_obj_id)).json() # dig_objs_per_repo.append(digital_object) # repo_dig_objects[repo['name']] = dig_objs_per_repo print("Done") # print(json_data)
}) # Can't use get_paged because this endpoint returns raw Solr results = client.get(endpoint, params={ 'aq': advanced_query }).json()["response"]["docs"] # populate top_containers with the ids of each top_container in search results top_containers = [] for value in gen_dict_extract('id', results): top_containers.append(value) # GET each top_container listed in top_containers and add to records records = [] for top_container in top_containers: output = client.get(top_container).json() records.append(output) # have user enter container profile id profile_id = input( 'Enter container profile ID (I am going to enter 9. You can select another value, as long that ID is in your instance of ArchivesSpace.): ' ) # Add container profile to records and post print('The following records have been updated in ArchivesSpace:') for record in records: record['container_profile'] = {'ref': '/container_profiles/' + profile_id} jsonLine = record uri = record['uri'] post = client.post(uri, json=jsonLine).json() print(post)
from openpyxl import load_workbook from secrets import * from asnake.aspace import ASpace from asnake.client import ASnakeClient aspace = ASpace(baseurl=as_api, username=as_un, password=as_pw) client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw) client.authorize() resource_id = input("Enter ASpace URI: ") excel_filepath = input("Enter full filepath for spreadsheet: ") wb = load_workbook(excel_filepath) sheet = wb.active for row in sheet.iter_rows(min_row=2, values_only=True): archival_object = client.get(row[0]).json() print("Converting: {} > {} ... ".format( archival_object["instances"][0]["sub_container"]["indicator_2"], row[5]), end='', flush=True) archival_object["instances"][0]["sub_container"]["indicator_2"] = str( row[5]) update_ao = client.post(row[0], json=archival_object) print("Done. Response: {}".format(update_ao.json()))
# Note: if this script is re-run to create new digital objects or updated to include additional rows # (e.g. after changing the do object IDs, or if you attempt to add another row that references a preceding archival object) # then only the most-recently created digital objects will be linked # and the previously-created digital objects will be orphaned records. # Parse csv and update ArchivesSpace. for row in csv_dict: file_uri = row['fileuri'] title = row['title'] digital_object_id = row['objectid'] ref_ID = row['refID'] # Construct new digital object from csv doRecord = {'title': title, 'digital_object_id': digital_object_id, 'publish': False} doRecord['file_versions'] = [{'file_uri': file_uri, 'publish': False, 'file_format_name': 'jpeg'}] doPost = client.post('/repositories/2/digital_objects', json=doRecord).json() print(doPost) # Store uri of newly posted digital objects because we'll need it uri = doPost['uri'] # Find AOs based on refIDs supplied in csv AOQuery = json.dumps({ "query": { "jsonmodel_type":"boolean_query", "op":"AND", "subqueries":[ { "jsonmodel_type":"field_query", "field":"primary_type", "value":"archival_object", "literal":True },
repository_processing_note) raw_input = input("Proceed anyway? y/n?") if raw_input == "n": break else: pass except: pass #Test for unpublished nodes, only proceed with publish and export if no unpublished nodes if has_unpublished_nodes() == False: #If the finding aid status is already set to publish, just export the EAD if "published" in publish_status: # Set publish to 'true' for all levels, components, notes, etc. Same as clicking "publish all" in staff UI resource_publish_all = aspace_client.post(resource_uri + '/publish') print(eadID + ' | resource and all children set to published') #Pause for 10 seconds so publish action takes effect...maybe? print(eadid + " | Pausing for 10 seconds to index publish action...") time.sleep(10.0) print(eadID + " | Exporting EAD file...") ead = aspace_client.get(id_uri_string + '.xml' + export_options).text f = io.open(destination + eadID + '.xml', mode='w', encoding='utf-8') f.write(ead) f.close() print(eadID + '.xml' ' | ' + resource_id + ' | ' + aspace_id_short + ' | ' +
print("Logged into: " + repo['name']) print("Getting list of resources...") resources_list = aspace_client.get( "repositories/2/resources?all_ids=true").json() resources_sorted = sorted(resources_list, reverse=True) for resource in resources_sorted: try: resource_json = aspace_client.get("repositories/2/resources/" + str(resource)).json() #print (resource_json) resource_uri = resource_json['uri'] print("updating: " + resource_uri) resource_update = aspace_client.post(resource_json['uri'], json=resource_json) response = resource_update.json() logger.info('update_resource', action='updating-resource', data={ 'resource_uri': resource_uri, 'response': response }) print(response['status']) except: print("ERROR") pass print("All Done with Resources...") print("Getting list of archival_objects...") ao_list = aspace_client.get(
import json, time, runtime from asnake.client import ASnakeClient from asnake.client.web_client import ASnakeAuthError # Create a client client = ASnakeClient() client.authorize() # login, using default values # print instructions print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.") input("Press Enter to continue...") # post container_profiles print ("The following container profiles have been added to ArchivesSpace:") jsonfile = open("containerProfiles.json") jsonfile = json.load(jsonfile) for container_profile in jsonfile: post = client.post("/container_profiles", json=container_profile).json() print (post) print ("You've just completed your first API POST. Congratulations!")
except: properDates = '' viafid = response['viafID'] except: label = '' viafid = '' if viafid != '': links = json.loads( requests.get('http://viaf.org/viaf/' + viafid + '/justlinks.json').text) viafid = 'http://viaf.org/viaf/' + viafid toPost = { "lock_version": lockVersion, "names": [{ "primary_name": properPrimary.strip(), "rest_of_name": properSecondary.strip(), "dates": properDates.strip(), "sort_name": properName, "authorized": True, "is_display_name": True, "source": "viaf", "rules": "dacs", "name_order": "inverted", "jsonmodel_type": "name_person", "authority_id": viafid }] } post = client.post(uri, json=toPost).json() print(post)
str(len(children_of_fake_wrapper_component_dict)) + " Direct Children of Wrapper Component") row.append( str(len(children_of_fake_wrapper_component_dict)) + " children of Wrapper AO") print("Reposting children as direct children of resource...") #Capture any error responses and save to output CSV report success_ao_keys_list = [] fail_ao_keys_list = [] for key, value in children_of_fake_wrapper_component_dict.items(): #Repost AOs as direct children of Resource (to flatten the tree) based on Dict info update_resource = aspace_client.post(resource_object.uri + '/accept_children', params={ 'children[]': key, 'position': value }).json() try: if update_resource['status'] == "Updated": print("Status: " + update_resource['status']) success_ao_keys_list.append(key) else: print("NOT UPDATED") fail_ao_keys_list.append(key) except: print("ERROR REPOSTING AO") fail_ao_keys_list.append(key) if len(fail_ao_keys_list) != 0: row.append("ERROR Posting AOs: " + str(fail_ao_keys_list))
print('Digital object already exists.') else: doPost['digital_object_id'] = doid doPost['title'] = 'Web crawl of ' + crawl['original'] doPost['dates'] = [{ 'expression': crawl['timestamp'], 'date_type': 'single', 'label': 'creation' }] doPost['file_versions'] = [{ 'file_uri': crawl['filename'], 'checksum': crawl['digest'], 'checksum_method': 'sha-1' }] if doPost != {}: post = client.post('/repositories/2/digital_objects', json=doPost).json() print(post) doItem = {} doItem['digital_object'] = {'ref': post['uri']} doItem['instance_type'] = 'digital_object' newInstances.append(doItem) aoGet = client.get(uri).json() existingInstances = aoGet['instances'] existingInstances = existingInstances + newInstances aoGet['instances'] = existingInstances aoUpdate = client.post(uri, json=aoGet).json() print('The following archival objects have been updated in ArchivesSpace:') print(aoUpdate) # TO DO LATER # Parse dates for ArchivesSpace record, push to AOs above