示例#1
0
class ASpace():
    # this happens when you call ASpace()
    def __init__(self, **config):
        # Connect to ASpace using .archivessnake.yml
        self.client = ASnakeClient(**config)
        self.client.authorize()
        m = re.match(r'\(v?(.+\))', self.client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'

    def __getattr__(self, attr):
        '''returns the JSONModelRelation representing the route with the same name as the attribute requested.'''
        if not attr.startswith('_'):
            return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client)

    @property
    def resources(self):
        '''return all resources from every repo.'''
        return ResourceRelation({}, self.client)


    @property
    def agents(self):
        '''returns an AgentRelation.'''
        return AgentRelation("/agents", {}, self.client)

    @property
    def users(self):
        '''returns a UserRelation.'''
        return UserRelation("/users", {}, self.client)

    def by_external_id(self, external_id, record_types=None):
        '''return any resources fetched from the 'by-external-id' route.

Note: while the route will return differently depending on how many records are returned,
this method deliberately flattens that out - it will _always_ return a generator, even if only
one record is found.'''
        params = {"eid": external_id}
        if record_types: params['type[]'] = record_types

        res = self.client.get('by-external-id', params=params)
        if res.status_code == 404:
            return []
        elif res.status_code == 300: # multiple returns, bare list of uris
            yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json()))
        elif res.status_code == 200: # single obj, redirects to obj with 303->200
            yield wrap_json_object(res.json(), self.client)
        else:
            raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code))

    def from_uri(self, uri):
        '''returns a JSONModelObject representing the URI passed in'''
        return wrap_json_object(self.client.get(uri).json(), self.client)
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
            for ext_index, extent in enumerate(extents):
                for key, value in changes.items():
                    if extent['extent_type'] in value:
                        updated_record['extents'][ext_index][
                            'extent_type'] = key
                        break
                    else:
                        pass
            if res_record['extents'] != updated_record['extents']:
                response = client.post(rec_uri, json=updated_record)
                if response.status_code == 200:
                    logger.info('Extent change successfully pushed',
                                rec=record,
                                response=response)
                    found_records.add(record)
                else:
                    logger.info('Extent change failed',
                                rec=record,
                                response=response)
            else:
                pass
        except:
            pass

    print('{0} resource records checked; {1} records updated.'.format(
        len(res_records), len(found_records)))
示例#3
0
def test_authorize():
    client = ASnakeClient()  # relies on default config, see ASnakeConfig class
    toke = client.authorize()
    assert isinstance(toke, str)
    assert len(toke) == 64
    assert set(toke) <= set('0123456789abcdef')
    assert client.session.headers['X-ArchivesSpace-Session'] == toke
    # Try to get admin user info, should only work if we're authed as admin
    assert client.get('users/1').status_code == 200
from asnake.client import ASnakeClient
from secrets import *

# as_repo = input("Enter ArchivesSpace repository #: ")
client = ASnakeClient(baseurl=as_api, username=as_username, password=as_password)
client.authorize()

# Dublin Core XML
# do_dc = client.get("/repositories/2/digital_objects/dublin_core/2679.xml")
# # print(do.content)
# with open("do_dublincore.xml", "wb") as file:
#     file.write(do_dc.content)
#     file.close()

# Dublin Core FMT
do_dc_fmt = client.get("/repositories/2/digital_objects/dublin_core/2679.fmt/metadata")
print(do_dc_fmt.content)
# with open("do_dc_fmt.json", "wb") as file:
#     file.write(do_dc_fmt.content)
#     file.close()

# METS XML
# mets_xml = client.get("/repositories/2/digital_objects/mets/2679.xml", params={"dmd": "PKG410P"})
# # print(mets_xml.content)
# with open("do_mets.xml", "wb") as file:
#     file.write(mets_xml.content)
#     file.close()
#
# # METS FMT
mets_fmt = client.get('/repositories/2/digital_objects/mets/2697.fmt/metadata')
print(mets_fmt.content)
import asnake.logging as logging
logging.setup_logging(level='DEBUG',
                      filename="remove_fake_wrapper.log",
                      filemode="a")

aspace = ASpace(baseurl="[ASPACE API URL]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set target repo
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

rl_repo = aspace.repositories(2)

#input is output of SQL query above
input_csv = input("Path to CSV Input: ")
#output will be input CSV plus some extra columns for reporting on actions taken, errors, etc.
updated_resources_csv = input("Path to CSV Output: ")


#Test if more than one direct child of Resource Object
#Why? Don't want to assign all children to Resource if there are other sibling Components of the fake wrapper component
def only_one_direct_child_of_resource_test(resource_object):
    print("Checking for multiple top-level AOs...")
    resource_object = rl_repo.resources(row[0])
#/usr/bin/python3
#~/anaconda3/bin/python
from asnake.client import ASnakeClient
import asnake.logging as logging

logging.setup_logging(filename="date_update.log", filemode="a")
logger = logging.get_logger("date_updating")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[backendURL]",
                             username="******",
                             password="******")
aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

print("Getting list of resources...")
resources_list = aspace_client.get(
    "repositories/2/resources?all_ids=true").json()
resources_sorted = sorted(resources_list, reverse=True)

for resource in resources_sorted:

    try:
        resource_json = aspace_client.get("repositories/2/resources/" +
                                          str(resource)).json()
        #print (resource_json)
        resource_uri = resource_json['uri']
        print("updating: " + resource_uri)
        resource_update = aspace_client.post(resource_json['uri'],
                                             json=resource_json)
import io
import csv
from asnake.client import ASnakeClient
from asnake.aspace import ASpace

aspace = ASpace(baseurl="[ASPACE BACKEND URL]",
                      username="******",
                      password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE BACKEND URL]",
                      username="******",
                      password="******")

aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

destination = 'C:/users/nh48/desktop/as_exports_temp/'

input_csv = input("Path to CSV Input: ")
#output will be input CSV plus some extra columns for reporting on actions taken, errors, etc.
updated_records_csv = input("Path to CSV Output: ")


#If Resource finding aid status = published, export the EAD for the resource, save to folder
def if_published_export_EAD(resource_uri):
    resource_json = aspace_client.get(resource_uri).json()
    published_status = resource_json['finding_aid_status']
    id_uri_string = resource_json['uri'].replace("resources","resource_descriptions")
    #set EAD export options: number components and include DAOs
示例#8
0
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    catalog = {
        'linear': ['linear_feet', 'Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic': ['cubic_feet', 'Cubic Feet'],
        'gb': ['gigabytes', 'Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()

    data_list = []

    print('Compiling resource records from API...')

    for record in tqdm(res_records):
        res_record = client.get(
            'repositories/2/resources/{0}'.format(record)).json()
        try:
            extents = res_record['extents']
            for x in extents:
                if x['extent_type'] == 'megabytes':
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': str(float(x['number']) / 1000),
                        'units': 'gigabytes'
                    })
                else:
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': x['number'],
                        'units': x['extent_type']
                    })
        except:
            pass

    linear_ms = 0
    linear_ua = 0
    gb_ms = 0
    gb_ua = 0
    cubic_ms = 0
    cubic_ua = 0

    print('Analyzing extents in resource data...')

    for entry in data_list:
        try:
            if entry['id'].startswith(
                    'MS') and entry['units'] in catalog['linear']:
                linear_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['linear']:
                linear_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['gb']:
                gb_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['gb']:
                gb_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['cubic']:
                cubic_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['cubic']:
                cubic_ua += float(entry['amount'])
            else:
                pass
        except:
            exception = input(
                'Uh oh, looks like the analysis ran into a snag; most likely, '
                'a unit of extent for {0} ({1}) is not a pure number. Enter '
                '\'stop\' to kill the process so you can fix the record. Alternatively, '
                'you can enter \'continue\' to skip this entry and keep the analysis '
                'going.'.format(entry['id'], entry['amount']))
            if (exception.lower()).strip() == 'continue':
                pass
            elif (exception.lower()).strip() == 'stop':
                quit()

    report = {
        'MS Linear feet': round(linear_ms, 2),
        'UA Linear feet': round(linear_ua, 2),
        'Total linear feet': round((linear_ua + linear_ms), 2),
        'MS GB': round(gb_ms, 2),
        'UA GB': round(gb_ua, 2),
        'Total GB': round((gb_ms + gb_ua), 2),
        'MS Cubic feet': round(cubic_ms, 2),
        'UA Cubic feet': round(cubic_ua, 2),
        'Total Cubic feet': round((cubic_ua + cubic_ms), 2)
    }

    print('Generating report as JSON...')

    with open(('extent_calculator_' +
               (datetime.datetime.today().strftime('%Y-%m-%d')) + '.json'),
              'w') as f:
        json.dump(report, f)
#read existing exported collection data
collectionData = []
collectionFile = open(os.path.join(staticData, "collections.csv"), "r", encoding='utf-8')
for line in csv.reader(collectionFile, delimiter="|"):
    collectionData.append(line)
collectionFile.close()

#read existing exported subject data
subjectData = []
subjectFile = open(os.path.join(staticData, "subjects.csv"), "r", encoding='utf-8')
for line in csv.reader(subjectFile, delimiter="|"):
    subjectData.append(line)
subjectFile.close

print ("\tQuerying ArchivesSpace...")
modifiedList = client.get("repositories/2/resources?all_ids=true&modified_since=" + str(startTime)).json()
if len(modifiedList) > 0:
    print ("\tFound " + str(len(modifiedList)) + " new records!")
    print ("\tArchivesSpace URIs: " + str(modifiedList))
else:
    print ("\tFound no new records.")
for colID in modifiedList:
    collection = client.get("repositories/2/resources/" + str(colID)).json()
    if collection["publish"] != True: 
        print ("\t\tSkipping " + collection["title"] + " because it is unpublished")
    else:
        print ("\t\tExporting " + collection["title"] + " " + "(" + collection["id_0"] + ")")
    
        checkDACS = {}
        try:
            normalName = collection["finding_aid_title"]
primary_types = '/(resource|archival_object|accession|digital_object)/'
results_file = 'term_audit_results.csv'

# Repo list can either be a command line argument or prompted
if len(sys.argv) == 2:
    repos = sys.argv[1]
elif len(sys.argv) < 2:
    repos = input('Enter repository number (e.g., 1): ')
else:
    sys.exit('Run script again with valid repo number(s)')

if repos:
    repos = re.split(r'\D+', repos)
    repos = list(filter(None, repos))
else:
    repos = client.get('repositories').json()

# Get list of search terms from CSV file
with open('search_terms.csv', 'r', newline='') as term_file:
    reader = csv.DictReader(term_file)
    search_terms = list(reader)

# Loop through ASpace repositories
for repo in repos:
    headers = []
    rows = []

    if isinstance(repo, str):  # For prompted or arg value repo lists
        repo_no = repo
        response = client.get(f'repositories/{repo_no}')
示例#11
0
from openpyxl import load_workbook
from secrets import *
from asnake.aspace import ASpace
from asnake.client import ASnakeClient

aspace = ASpace(baseurl=as_api, username=as_un, password=as_pw)
client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
client.authorize()

resource_id = input("Enter ASpace URI: ")
excel_filepath = input("Enter full filepath for spreadsheet: ")
wb = load_workbook(excel_filepath)
sheet = wb.active
for row in sheet.iter_rows(min_row=2, values_only=True):
    archival_object = client.get(row[0]).json()
    print("Converting: {} > {} ... ".format(
        archival_object["instances"][0]["sub_container"]["indicator_2"],
        row[5]),
          end='',
          flush=True)
    archival_object["instances"][0]["sub_container"]["indicator_2"] = str(
        row[5])
    update_ao = client.post(row[0], json=archival_object)
    print("Done. Response: {}".format(update_ao.json()))
示例#12
0
        for key, value in instance["sub_container"].items():
            if "indicator_" in key:
                if "unknown container" == value:
                    print(archival_object)
                    top_container = client.get(
                        instance["sub_container"]["top_container"]
                        ["ref"]).json()
                    write_csv("a", archival_object["uri"],
                              archival_object["title"],
                              archival_object["dates"][0]["expression"],
                              "Box {}".format(top_container["indicator"]),
                              instance["sub_container"]["type_2"], value)
                    cont_count += 1
        return cont_count


client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
client.authorize()
ua97_090_uri = "/repositories/5/resources/5071"

write_csv("w", "URI", "Title", "Date", "Box Number", "Child Type",
          "Child Indicator")
resource_info = client.get(ua97_090_uri).json()
res_tree = client.get(resource_info["tree"]["ref"]).json()
if "children" in res_tree.keys():
    print(resource_info["title"])
    unknowns = check_children(res_tree["children"], 0)
    print("Total unknown containers = {}".format(str(unknowns)))
    print("\n")
    print("-" * 100)
示例#13
0
            ]
        }
    })

    # it can take some time for the posted DOs to be indexed, so...
    showed_up_yet = None
    while not showed_up_yet:
        aoSearch = list(client.get_paged('search', params={"filter": AOQuery}))
        if any(aoSearch):
            showed_up_yet = True
        else:
            print("DOs not present in search yet, waiting a second for the indexer to catch up")
            sleep(1)
    linked_ao_uri = aoSearch[0]['uri']
    # Get and store archival objects from above search
    aoRecord = client.get(linked_ao_uri).json()
    # Find existing instances and create new ones from new digital objects
    exising_instance = aoRecord['instances'][0]
    new_instance = {"instance_type": "digital_object", "digital_object": {"ref": uri}}

    # Merge old and new instances
    instances_new = []
    instances_new.append(exising_instance)
    instances_new.append(new_instance)
    aoRecord['instances'] = instances_new
    # Post updated archival objects
    aoPost = client.post(linked_ao_uri, json=aoRecord).json()
    print(aoPost)
    # Save select information to new csv file
    f.writerow([title, digital_object_id, uri, linked_ao_uri])
示例#14
0
advanced_query = json.dumps({
    "filter_term": {
        "field": "collection_uri_u_sstr",
        "value": "/repositories/2/resources/" + resource_id,
        "jsonmodel_type":"field_query"}
})
results = list(client.get_paged(endpoint, params={'aq': advanced_query}))

# populate top_containers with the ids of each top_container in search results
top_containers = []
for value in gen_dict_extract('id', results):
    top_containers.append(value)

# GET each top_container listed in top_containers and add to records
records = []
for top_container in top_containers:
    output = client.get(top_container).json()
    records.append(output)

# have user enter container profile id
profile_id = input('Enter container profile ID (I am going to enter 9. You can select another value, as long that ID is in your instance of ArchivesSpace.): ')

# Add container profile to records and post
print ('The following records have been updated in ArchivesSpace:')
for record in records:
    record['container_profile'] = {'ref': '/container_profiles/' + profile_id}
    jsonLine = record
    uri = record['uri']
    post = client.post(uri, json=jsonLine).json()
    print(post)
示例#15
0
from asnake.client import ASnakeClient
from asnake.aspace import ASpace

#BaseURL should point to backend (e.g. https://archivesspace.duke.edu/api or https://localhost:8089)
aspace = ASpace(baseurl="[baseurl]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[baseurl]",
                             username="******",
                             password="******")
aspace_client.authorize()

#set target repo by id
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

# Prompt for input, a comma separated list of EADID values (e.g. johndoepapers, janedoepapers, johnandjanedoepapers)
eadids = input("List of EADIDs:  ")
# Split comma separated list
eadids_list = eadids.split(",")

destination = 'C:/users/nh48/desktop/as_exports_temp/'

#set EAD export options: number components and include DAOs
export_options = '?include_daos=true&numbered_cs=true&include_unpublished=false'


#Check if any unpublished nodes in the resource tree and if so, do not publish and export
def has_unpublished_nodes():
示例#16
0
                 newAccession["access_restrictions"] = True
                 newAccession["restrictions_apply"] = True
             elif accessionKey == "use_restrictions_note":
                 newAccession["use_restrictions"] = True
                 newAccession["restrictions_apply"] = True
 newAccession["accession_date"] = str(datetime.today().strftime('%Y-%m-%d'))
 year = newAccession["accession_date"].split("-")[0]
 
 #login to ASpace
 client = ASnakeClient()
 client.authorize()
 logging.setup_logging(stream=sys.stdout, level='INFO')
 
 # Get related resouces
 call = "repositories/2/search?type[]=resource&page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + str(args.ID) + "\", \"jsonmodel_type\":\"field_query\"}}"
 resourceResponse = client.get(call).json()
 if len(resourceResponse["results"]) < 1:
     raise Exception("ERROR: Could not find resource with ID: " + str(args.ID))
 else:
     newAccession["related_resources"] = [{"ref": resourceResponse["results"][0]["uri"]}]
 
 # get accession id
 print ("Getting correct accession ID...")
 yearBegin = datetime.strptime(year + "-01-01 00:00", "%Y-%m-%d %H:%M")
 yearBeginPosix = str(time.mktime(yearBegin.timetuple())).split(".")[0]
 yearCall = "repositories/2/accessions?all_ids=true&modified_since=" + yearBeginPosix
 accessions = client.get(yearCall).json()
 
 idList = []
 for aID in accessions:
     entry = client.get("repositories/2/accessions/" + str(aID)).json()
示例#17
0
#!/usr/bin/env python

from asnake.client import ASnakeClient
import pandas as pd
import datetime
from tqdm import tqdm

client = ASnakeClient(baseurl='XXX', username='******', password='******')
client.authorize()

accession_records = client.get('repositories/2/accessions',
                               params={
                                   'all_ids': True
                               }).json()

unit_column = []
extent_column = []
collection_no_column = []
created_column = []

start = datetime.datetime.strptime('2017-07-01', '%Y-%m-%d')
end = datetime.datetime.strptime('2018-07-31', '%Y-%m-%d')

for record in tqdm(accession_records):
    accession_uri = client.get('repositories/2/accessions/' +
                               str(record)).json()
    create_date = accession_uri['create_time'][0:10]
    date_parsed = datetime.datetime.strptime(create_date, '%Y-%m-%d')
    if start <= date_parsed <= end:
        coll_num = accession_uri['id_0']
        extents = accession_uri['extents']
示例#18
0
# have user enter resource id
resource_id = input('Enter resource ID (in this case, you should enter 1): ')

# search for top_containers linked to entered resource id
endpoint = '/repositories/2/top_containers/search'
advanced_query = json.dumps({
    "filter_term": {
        "field": "collection_uri_u_sstr",
        "value": "/repositories/2/resources/" + resource_id,
        "jsonmodel_type": "field_query"
    }
})
# Can't use get_paged because this endpoint returns raw Solr
results = client.get(endpoint, params={
    'aq': advanced_query
}).json()["response"]["docs"]

# populate top_containers with the ids of each top_container in search results
top_containers = []
for value in gen_dict_extract('id', results):
    top_containers.append(value)

# GET each top_container listed in top_containers and add to records
records = []
for top_container in top_containers:
    output = client.get(top_container).json()
    records.append(output)

# have user enter container profile id
profile_id = input(
示例#19
0
                                        parentList = []
                                        itemData = arclight.json()
                                        for parent in itemData["response"][
                                                "document"]["parent_ssm"][1:]:
                                            parentList.append(
                                                parent.split("_")[1])
                                        parents = "|".join(parentList)
                                    else:
                                        #for new objects not yet indexed in ArcLight
                                        if tree is None:
                                            from asnake.client import ASnakeClient
                                            client = ASnakeClient()
                                            client.authorize()

                                            ref = client.get(
                                                "repositories/2/find_by_id/archival_objects?ref_id[]="
                                                + refID).json()
                                            item = client.get(
                                                ref["archival_objects"][0]
                                                ["ref"]).json()
                                            resource = client.get(
                                                item["resource"]
                                                ["ref"]).json()
                                            tree = client.get(resource["tree"]
                                                              ["ref"]).json()
                                        else:
                                            ref = client.get(
                                                "repositories/2/find_by_id/archival_objects?ref_id[]="
                                                + refID).json()

                                        objURI = ref["archival_objects"][0][
示例#20
0
from asnake.client import ASnakeClient
from secrets import *

as_username = input("ArchivesSpace username: "******"ArchivesSpace password: "******"repositories").json()
print("Publishing Digital Objects...", end='', flush=True)
for repo in repos:
    digital_object = {}
    dig_objs_per_repo = []
    repo_digital_objects = client.get(repo["uri"] + "/digital_objects?all_ids=true").json()
    for dig_obj_id in repo_digital_objects:
        object_request = repo["uri"] + "/digital_objects/" + str(dig_obj_id) + "/publish"
        try:
            client.post(object_request)
        except Exception as e:
            print("Error found when requesting id: " + str(e) + "\n" + object_request)
    #     digital_object[dig_obj_id] = client.get(repo["uri"] + "/digital_objects/" + str(dig_obj_id)).json()
    #     dig_objs_per_repo.append(digital_object)
    # repo_dig_objects[repo['name']] = dig_objs_per_repo
print("Done")
# print(json_data)
示例#21
0
import json, csv, runtime
from asnake.client import ASnakeClient
# print instructions
print(
    'This script replaces existing fauxcodes with real barcodes (linked in a separate csv file) in ArchivesSpace.'
)
input('Press Enter to connect to ArchivesSpace and post those barcodes...')

# This is where we connect to ArchivesSpace.  See authenticate.py
client = ASnakeClient()
client.authorize()

# open csv and generate dict
reader = csv.DictReader(open('barcodes.csv'))

# GET each top_container listed in top_containers and add to records
print('The following barcodes have been updated in ArchivesSpace:')
for row in reader:
    uri = row['uri']
    container = client.get(uri).json()
    container['barcode'] = row['real']
    post = client.post(uri, json=container).json()
    print(post)
示例#22
0
#Set time interval here (to get accessions created in last 24 hours)
current_time_minus_day = current_time - timedelta(hours=24)

#Convert time to ISO format for comparing to create dates in ASpace
current_time_minus_day = current_time_minus_day.isoformat()

print("Getting all Accessions created since: " + str(current_time_minus_day))

#ASNAKE
#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ArchivesSpace backend API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set Target Repository
repo = aspace_client.get("repositories/2").json()
print(repo['name'])

accessions_list = aspace_client.get(
    "repositories/2/accessions?all_ids=true").json()
#Sort accessions by ASpace ID (e.g. repositories/2/accessions/1234)
accessions_sorted = sorted(accessions_list)

#Just get the last 20 created accessions in ASpace (based on IDs, not create time)
#assuming we won't create more than 20 accessions in time interval between cron jobs
#get last 20 accessions in list (most recent accession will be last in list)
last_20_accessions = accessions_sorted[-20:]

print("Examining last 20 accessions created in ASpace...")

for accession in last_20_accessions:
def main(ID, path=None, accession=None):

    if path == None:
        if not os.path.isdir(defaultPath):
            raise Exception("ERROR: default path " + defaultPath +
                            " does not exist.")
        path = os.path.join(defaultPath, ID)
        if not os.path.isdir(path):
            raise Exception("ERROR: no " + ID +
                            " directory exists for ingest in " + defaultPath)
    else:
        if not os.path.isdir(path):
            raise Exception("ERROR: " + str(path) + " is not a valid path.")
    print("Reading " + path)

    if accession == None:
        print("Building SIP...")
        SIP = SubmissionInformationPackage()
        SIP.create(ID)
        SIP.package(path)
        print("SIP " + SIP.bagID + " created.")

    else:
        print("Reading accession " + accession)
        import asnake.logging as logging
        from asnake.client import ASnakeClient
        client = ASnakeClient()
        client.authorize()

        logging.setup_logging(stream=sys.stdout, level='INFO')

        call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}"
        accessionResponse = client.get(call).json()
        if len(accessionResponse["results"]) < 1:
            raise Exception("ERROR: Could not find accession with ID: " +
                            accession)
        else:
            accessionObject = json.loads(
                accessionResponse["results"][0]["json"])
            if "id_1" in accessionObject.keys():
                accessionID = accessionObject["id_0"] + "-" + accessionObject[
                    "id_1"]
            if accession != accessionID:
                raise Exception(
                    "ERROR: Could not find exact accession with ID: " +
                    accession)
            if not "content_description" in accessionObject.keys():
                raise Exception("ERROR: no content description in " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            if len(accessionObject["related_resources"]) < 1:
                raise Exception("ERROR: no related resource for " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            else:
                resource = client.get(
                    accessionObject["related_resources"][0]["ref"]).json()
                creator = resource["title"]
                if not ID.lower() == resource["id_0"].lower():
                    raise Exception("ERROR: accession " + accessionID +
                                    " does not link to collection ID " + ID +
                                    ". Instead linked to " + resource["id_0"])
                description = accessionObject["content_description"]

                print("Building SIP...")
                SIP = SubmissionInformationPackage()
                SIP.create(ID)
                SIP.package(path)
                print("SIP " + SIP.bagID + " created.")

                SIP.bag.info["Accession-Identifier"] = accessionID
                SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"]
                SIP.bag.info["Records-Creator"] = creator
                SIP.bag.info["Content-Description"] = description
                if "condition_description" in accessionObject.keys():
                    SIP.bag.info["Condition-Description"] = accessionObject[
                        "condition_description"]
                if "provenance" in accessionObject.keys():
                    SIP.bag.info["Provenance"] = accessionObject["provenance"]
                if "general_note" in accessionObject.keys():
                    SIP.bag.info["General-Note"] = accessionObject[
                        "general_note"]
                SIP.bag.info["Source-Location"] = path
                SIP.bag.info[
                    "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py"

    print("Writing checksums...")
    SIP.bag.save(manifests=True)
    print("SIP Saved!")

    # List files in txt for processing
    print("(not) Listing files for processing...")
    #listFiles(ID)

    if accession == None:
        SIP.extentLog(
            "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx"
        )
        print("Logged ingest to DigitizationExtentTracker.")
    else:
        print("Updating accession " + accessionID)
        if "disposition" in accessionObject.keys():
            accessionObject["disposition"] = accessionObject[
                "disposition"] + "\n" + str(SIP.bagID)
        else:
            accessionObject["disposition"] = str(SIP.bagID)

        totalSize = SIP.size()
        inclusiveDates = SIP.dates()
        extent = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[0]),
            "extent_type": str(totalSize[1])
        }
        extentFiles = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[2]),
            "extent_type": "Digital Files"
        }
        if inclusiveDates[0] == inclusiveDates[1]:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "expression": inclusiveDates[0]
            }
        else:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "end": inclusiveDates[1]
            }
        if "extents" in accessionObject.keys():
            accessionObject["extents"].append(extent)
            accessionObject["extents"].append(extentFiles)
        else:
            accessionObject["extents"] = [extent, extentFiles]
        accessionObject["dates"].append(date)

        updateAccession = client.post(accessionObject["uri"],
                                      json=accessionObject)
        if updateAccession.status_code == 200:
            print("\tSuccessfully updated accession " + accessionID)
        else:
            print(updateAccession.text)
            print("\tERROR " + str(updateAccession.status_code) +
                  "! Failed to update accession: " + accessionID)

    return SIP
class ASTemps():
    def __init__(self):
        self.client = ASnakeClient()
        self.auth = self.client.authorize()
        self.all_schemas = self.get_schemas()
        #a list of all enumerations
        #COULD ALSO DO /config/enumerations/names/:enum_name
        self.all_enums = self.get_dynamic_enums()
        #gets the list of schema names
        self.schema_list = [key for key in self.all_schemas.keys()]
        #gets the type list
        self.type_list = list(
            set([
                k for value in self.all_schemas.values()
                for k, v in value.items()
            ]))
        self.jsonmodel_pattern = re.compile(
            '(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')

    def get_schemas(self):
        schemas = self.client.get('/schemas').json()
        return (schemas)

    def get_schema(self, schema):
        schema = self.client.get('/schemas/' + schema).json()
        return (schema)

    def get_dynamic_enums(self):
        enums = self.client.get('/config/enumerations').json()
        return (enums)

    def parse_jsonmodel(self, obj_value):
        #reg ex to capture all jsonmodel references in schema
        #jsonmodel = re.compile('(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')
        logging.debug('starting jsonmodel')
        if self.jsonmodel_pattern.match(obj_value):
            logging.debug('match with ' + str(obj_value))
            #gets the name of the schema
            stripped_string = obj_value[obj_value.find("(") +
                                        1:obj_value.find(")")][1:]
            if stripped_string != 'repository':
                logging.debug('Getting schema for: ' + stripped_string)
                jsonmodel_schema = self.all_schemas[stripped_string]
                #wondering if this is where the problem is??? I know this works in some cases
                if 'uri' in obj_value:
                    logging.debug('uri in obj_value')
                    parsed_json = {'ref': jsonmodel_schema['uri']}
                    logging.debug(str(parsed_json))
                #LOL this also gets digital objects
                if 'object' in obj_value:
                    if 'digital_object' not in obj_value:
                        logging.debug('object in obj_value')
                        #workaround for testing - infinite recursion - but only fixes part of it...
                        if stripped_string == 'note_outline_level':
                            parsed_json = None
                        else:
                            #THIS IS BROKEN!!!! INFINITE RECURSION
                            logging.debug("obj_value " + str(obj_value))
                            logging.debug('running parse_schema on ' +
                                          str(obj_value))
                            parsed_json = self.parse_schema(
                                stripped_string, jsonmodel_schema)
            #saves lots of memory, likely will not change.
            if stripped_string == 'repository':
                parsed_json = {'ref': '/repositories/:repo_id'}
        return parsed_json

    #still more to do with the other ref properties
    def parse_refs(self, schema_name, obj_name, obj_value):
        logging.debug('starting parse_refs on ' + str(schema_name) + ' ' +
                      str(obj_name))
        #go through the properties of the refs
        if 'properties' in obj_value:
            logging.debug('properties in ' + str(obj_value))
            if 'ref' in obj_value['properties']:
                logging.debug('ref in properties')
                if type(obj_value['properties']['ref']['type']) is list:
                    logging.debug('Type of ref is list')
                    logging.debug("obj_value['properties']['ref']['type']: " +
                                  str(obj_value['properties']['ref']['type']))
                    ref_list = []
                    for ref in obj_value['properties']['ref']['type']:
                        logging.debug('Looping through ref list')
                        logging.debug(obj_value['properties']['ref']['type'])
                        logging.debug(ref['type'])
                        #FIX THIS
                        parsed_ref = self.parse_jsonmodel(ref['type'])
                        logging.debug('parsed ref ' + str(parsed_ref))
                        ref_list.append(parsed_ref)
                    logging.debug('ref_list: ' + str(ref_list))
                    return ref_list
                else:
                    logging.debug('Type of ref is not list')
                    if self.jsonmodel_pattern.match(
                            obj_value['properties']['ref']['type']):
                        logging.debug(
                            'RE match ' +
                            str(obj_value['properties']['ref']['type']))
                        logging.debug('calling parse_jsonmodel')
                        parsed_ref = self.parse_jsonmodel(
                            obj_value['properties']['ref']['type'])
                        return parsed_ref
        else:
            logging.debug('properties not in ' + str(obj_name) +
                          'value dictionary')
            logging.debug(str(obj_value['ref']['type']))
            if self.jsonmodel_pattern.match(obj_value['ref']['type']):
                logging.debug(
                    str(obj_value['ref']['type']) +
                    ' matches jsonmodel pattern')
                logging.debug('Calling parse_jsonmodel on ' +
                              str(obj_value['ref']['type']))
                parsed_ref = self.parse_jsonmodel(obj_value['ref']['type'])
                return parsed_ref

    def parse_enums(self, enum_name):
        enum_list = []
        for enum in self.all_enums:
            if enum['name'] == enum_name:
                for ev in enum['enumeration_values']:
                    enum_list.append(ev['value'])
        return enum_list

    def parse_schema(self, schema_name, schema_def):
        try:
            logging.debug("Working on schema: " + str(schema_name))
            template_dict = {}
            #Fixes infinite recursion for now
            exclusions = [
                'collection_management', 'rights_statement',
                'rights_statement_act', 'note_rights_statement',
                'note_rights_statement_act', 'children', 'deaccessions',
                '_inherited', 'rights_statements', 'external_id'
            ]
            for prop_name, prop_value in schema_def['properties'].items():
                logging.debug("Working on prop: " + str(prop_name))
                if schema_name in exclusions:
                    print(schema_name + ' in exclusion list')
                    continue
                elif prop_name in exclusions:
                    print(str(prop_name) + ' in exclusion list')
                    continue
                #If there is more than one type it will be stored in a list.
                elif type(prop_value['type']) is list:
                    '''
                    INTEGER/STRING

                    This is always (and only? )the lock version. Don't need to do anything
                    with it, but will keep in the check in case the schema changes.

                    '''
                    #WHAT WOULD HAPPEN IF I JUST SKIPPED ALTOGETHER - NOTHING STILL F****D!!
                    # if prop_value['type'] == ['integer', 'string']:
                    #     if prop_name == 'lock_version':
                    #         logging.debug(schema_name, prop_name, prop_value)
                    #         continue
                    #     if prop_name != 'lock_version':
                    #         template_dict[prop_name] = None
                    '''
                    What is this doing???

                    '''
                    if 'query' in prop_value['type'][0]:
                        continue
                        #logging.debug(schema_name, prop_name, prop_value)
                    '''
                    What is this doing???

                    '''
                    if type(prop_value['type'][0]) is dict:
                        continue
                        #if 'agent' in prop_value['type'][0]['type']:
                        #logging.debug(schema_name, prop_name, prop_value)
                #If there is only one type it won't be in a list.
                else:
                    '''
                    JSONMODEL TYPES

                    Can be either an object or URI. Refers to another schema or a reference
                    to another object. i.e. date subrecords, location URIs

                    '''
                    if self.jsonmodel_pattern.match(prop_value['type']):
                        logging.debug('Regex match, ' +
                                      str(prop_value['type']))
                        #Don't add read-only fields to the template. Might want to change this
                        #in the case of URIs or IDs...but don't worry about it for now.
                        if 'readonly' in prop_value:
                            logging.debug('Property value is readonly')
                            if 'subtype' in prop_value:
                                logging.debug('Subtype in property value')
                                if prop_value['subtype'] == 'ref':
                                    logging.debug(
                                        'Subtype of ' + str(prop_name) +
                                        'is ref, calling parse_jsonmodel on ' +
                                        str(prop_value['tyoe']))
                                    template_dict[
                                        prop_name] = self.parse_jsonmodel(
                                            prop_value['type'])
                        else:
                            logging.debug(
                                'readonly not in property value dict, calling parse_jsonmodel on '
                                + str(prop_value['type']))
                            template_dict[prop_name] = self.parse_jsonmodel(
                                prop_value['type'])
                    elif prop_value['type'] == 'array':
                        logging.debug('Prop value type is array')
                        #this will always be the case I think? Check
                        if 'items' in prop_value:
                            #no need to have readonly fields in template???
                            #if there is more than one type
                            if type(prop_value['items']['type']) is list:
                                logging.debug('Type of array items is list')
                                template_dict[prop_name] = []
                                #this might always be object??? check and see
                                for prop_type in prop_value['items']['type']:
                                    if self.jsonmodel_pattern.match(
                                            prop_type['type']):
                                        parsed_json = self.parse_jsonmodel(
                                            prop_type['type'])
                                        template_dict[prop_name].append(
                                            parsed_json)
                                    if prop_type['type'] is 'object':
                                        logging.debug(schema_name, prop_name,
                                                      prop_value)
                                #If there is only one type...
                            else:
                                logging.debug('Type of array items is object')
                                if prop_value['items']['type'] is 'object':
                                    if 'subtype' in prop_value['items']:
                                        #these usually have properties
                                        if 'properties' in prop_value['items']:
                                            template_dict[
                                                prop_name] = self.parse_refs(
                                                    schema_name, prop_name,
                                                    prop_value)
                                    else:
                                        if 'properties' in prop_value['items']:
                                            logging.debug(
                                                schema_name, schema_name,
                                                prop_name, prop_value)
                                if prop_value['items']['type'] == 'string':
                                    if 'enum' in prop_value['items']:
                                        template_dict[prop_name] = prop_value[
                                            'items']['enum']
                                #if it matches the object pattern
                                if self.jsonmodel_pattern.match(
                                        prop_value['items']['type']):
                                    logging.debug(prop_name)
                                    logging.debug(
                                        str(prop_value['items']['type']))
                                    parsed_json = self.parse_jsonmodel(
                                        prop_value['items']['type'])
                                    template_dict[prop_name] = [parsed_json]
                    #Changing this from 'is' to '==' causes infinite recursion. Interestingly changing it above causes many
                    #fields to be removed from the templates - 2 other instances of is/== 'object'
                    elif prop_value['type'] == 'object':
                        logging.debug('Prop value type is object')
                        if 'properties' in prop_value:
                            if 'subtype' in prop_value:
                                logging.debug(
                                    'subtype in prop value, calling parse_refs on '
                                    + str(schema_name) + ' ' + str(prop_name))
                                #these are all refs I think
                                template_dict[prop_name] = self.parse_refs(
                                    schema_name, prop_name, prop_value)
                            else:
                                logging.debug('subtype not in prop_value: ')
                                logging.debug(schema_name, prop_name,
                                              prop_value)
                    elif prop_value['type'] == 'string':
                        logging.debug('Prop value is string')
                        #enums are always strings
                        if 'readonly' not in prop_value:
                            logging.debug(
                                'readonly not in prop value dictionary')
                            if 'enum' in prop_value:
                                template_dict[prop_name] = prop_value['enum']
                            if 'dynamic_enum' in prop_value:
                                template_dict[prop_name] = self.parse_enums(
                                    prop_value['dynamic_enum'])
                            else:
                                template_dict[prop_name] = None

                    elif prop_value['type'] in [
                            'integer', 'boolean', 'date', 'date-time', 'number'
                    ]:
                        logging.debug(
                            'Prop value is type int, bool, date, date-time, number'
                        )
                        #make sure this is correct, as in not missing something that should be there
                        if 'readonly' not in prop_value:
                            logging.debug(
                                'readonly not in prop value dictionary')
                            template_dict[prop_name] = None
                    else:
                        logging.debug('Value not of a recognized type')
        except KeyError:
            logging.debug('KeyError: ' + schema_name + ' ' + prop_name)
        except Exception as exc:
            logging.debug('Error: ' + schema_name + ' ' + prop_name)
            logging.debug(traceback.format_exc())
        finally:
            template_dict['jsonmodel_type'] = schema_name
        return template_dict

    #QUESTION - SHOULD I CREATE LITTLE FUNCTIONS FOR EACH TYPE - i.e if whatever is 'object',
    #then do function stuff...might help with the nesting

    #want to go through each schema and create a sample dictionary template
    #need to be able to handle just one schema
    def parse_schemas(self, schemas):
        template_dict = {}
        for schema_name, schema_def in schemas.items():
            #check for a parent - but one that isn't "abstract" because those fields are the same
            #WHAT TO DO WITH THIS????
            # if 'parent' in schema_def:
            #     pass
            temp = self.parse_schema(schema_name, schema_def)
            template_dict[schema_name] = temp
        return template_dict

    def create_csv_template(self, jsontemplatedict):
        '''
        Goal is to create the JSON templates, and then convert those to CSV file that can
        be used to create either full finding aids/top level records, or to update subrecords
        in bulk
        '''
        fileob = open(jsontemplatedict['jsonmodel_type'] + '.csv',
                      'a',
                      encoding='utf-8',
                      newline='')
        csvout = csv.writer(fileob)
        subfield_list = []
        for key, value in jsontemplatedict.items():
            if type(value) is list:
                #should I just check the first one instead of looping through all?
                if type(value[0]) is dict:
                    for item in value:
                        for k in item.keys():
                            subfield_list.append(
                                jsontemplatedict['jsonmodel_type'] + '_' +
                                key + '_' + k)
                #only two options for lists, correct?
                if type(value[0]) is not dict:
                    #this means that it's just a list of enums probably - right?? No other list formats
                    #do I need the check now that I removed the loop?
                    check = jsontemplatedict['jsonmodel_type'] + '_' + key
                    if check not in subfield_list:
                        subfield_list.append(
                            jsontemplatedict['jsonmodel_type'] + '_' + key)
            else:
                subfield_list.append(jsontemplatedict['jsonmodel_type'] + '_' +
                                     key)
        csvout.writerow(subfield_list)
        fileob.close()
        return subfield_list

    #Wrapper loop to create all templates
    def create_csv_templates(self, jsontemplates):
        for template_key, template_value in jsontemplates.items():
            self.create_csv_template(template_value)

    def download_templates(self, jsontemplates):
        for template_key, template_value in jsontemplates.items():
            outfile = open(str(template_key) + '.json', 'w', encoding='utf-8')
            json.dump(template_value, outfile, sort_keys=True, indent=4)
                "/repositories/4/resources/1001", "/repositories/4/resources/4048", "/repositories/2/resources/633",
                "/repositories/2/resources/723", "/repositories/2/resources/748", "/repositories/2/resources/414"]
# "/repositories/5/resources/5071" - UA collection - Steve to check with Kat

for resource_id in resource_ids:
    unknown_count = 0
    uri_breakup = resource_id.split("/")
    res_id = uri_breakup[4]
    repo_id = uri_breakup[2]
    try:
        rl_repo = aspace.repositories(repo_id)
        resource_record = rl_repo.resources(res_id).tree
        resource_tree = resource_record.walk
        print(rl_repo.resources(res_id).json()["title"])
        for node in resource_tree:
            ao_json = client.get(node.uri).json()
            for instance in ao_json["instances"]:
                if "sub_container" in instance.keys():
                    indicators = []
                    types = []
                    for key, value in instance["sub_container"].items():
                        if "indicator_" in key:
                            if "unknown container" == value:
                                child_type = "type_" + str(key[-1])
                                indicators.append(key)
                                types.append(child_type)
                                unknown_count += 1
                    for indicator in indicators:
                        try:
                            del instance["sub_container"][indicator]
                        except Exception as e:
示例#26
0
#read existing exported collection data
collectionData = []
#collectionFile = open(os.path.join(staticData, "collections.csv"), "r", encoding='utf-8')
#for line in csv.reader(collectionFile, delimiter="|"):
#    collectionData.append(line)
#collectionFile.close()

#read existing exported subject data
subjectData = []
#subjectFile = open(os.path.join(staticData, "subjects.csv"), "r", encoding='utf-8')
#for line in csv.reader(subjectFile, delimiter="|"):
#    subjectData.append(line)
#subjectFile.close

print ("\tQuerying ArchivesSpace...")
modifiedList = client.get("repositories/3/resources?all_ids=true&modified_since=" + str(startTime)).json()
if len(modifiedList) > 0:
    print ("\tFound " + str(len(modifiedList)) + " new records!")
    print ("\tArchivesSpace URIs: " + str(modifiedList))
else:
    print ("\tFound no new records.")
for colID in modifiedList:
    collection = client.get("repositories/3/resources/" + str(colID)).json()
    if collection["publish"] != True: 
        print ("\t\tSkipping " + collection["title"] + " because it is unpublished")
    else:
        print ("\t\tExporting " + collection["title"] + " " + "(" + collection["id_0"] + ")")
    
        try:
            normalName = collection["finding_aid_title"]
        except:
示例#27
0
class ArchivesSpaceClient(object):
    """Client to get and receive data from ArchivesSpace."""
    def __init__(self, baseurl, username, password, repo_id):
        self.client = ASnakeClient(baseurl=baseurl,
                                   username=username,
                                   password=password)
        self.repo_id = repo_id
        if not self.client.authorize():
            raise ArchivesSpaceClientError(
                "Couldn't authenticate user credentials for ArchivesSpace")
        self.TYPE_LIST = {
            "family": ["agent_family", "agents/families"],
            "organization":
            ["agent_corporate_entity", "agents/corporate_entities"],
            "person": ["agent_person", "agents/people"],
            "component": [
                "archival_object",
                "repositories/{repo_id}/archival_objects".format(
                    repo_id=self.repo_id)
            ],
            "accession": [
                "accession", "repositories/{repo_id}/accessions".format(
                    repo_id=self.repo_id)
            ],
            "digital object": [
                "digital_objects",
                "repositories/{repo_id}/digital_objects".format(
                    repo_id=self.repo_id)
            ]
        }

    def send_request(self, method, url, data=None, **kwargs):
        """Base method for sending requests to ArchivesSpace."""
        r = getattr(self.client, method)(url, data=json.dumps(data), **kwargs)
        if r.status_code == 200:
            return r.json()
        else:
            if r.json()["error"].get("id_0"):
                """Account for indexing delays by bumping up to the next accession number."""
                id_1 = int(data["id_1"])
                id_1 += 1
                data["id_1"] = str(id_1).zfill(3)
                return self.create(data, "accession")
            raise ArchivesSpaceClientError(
                "Error sending {} request to {}: {}".format(
                    method, url,
                    r.json()["error"]))

    def retrieve(self, url, **kwargs):
        return self.send_request("get", url, **kwargs)

    def create(self, data, type, **kwargs):
        return self.send_request("post", self.TYPE_LIST[type][1], data,
                                 **kwargs)

    def update(self, uri, data, **kwargs):
        return self.send_request("post", uri, data, **kwargs)

    def get_or_create(self, type, field, value, last_updated, consumer_data):
        """
        Attempts to find and return an object in ArchivesSpace.
        If the object is not found, creates and returns a new object.
        """
        model_type = self.TYPE_LIST[type][0]
        endpoint = self.TYPE_LIST[type][1]
        query = json.dumps({
            "query": {
                "field": field,
                "value": value,
                "jsonmodel_type": "field_query"
            }
        })
        try:
            r = self.client.get("repositories/{}/search".format(self.repo_id),
                                params={
                                    "page": 1,
                                    "type[]": model_type,
                                    "aq": query
                                }).json()
            if len(r["results"]) == 0:
                r = self.client.get(endpoint,
                                    params={
                                        "all_ids": True,
                                        "modified_since": last_updated - 120
                                    }).json()
                for ref in r:
                    r = self.client.get("{}/{}".format(endpoint, ref)).json()
                    if r[field] == str(value):
                        return r["uri"]
                return self.create(consumer_data, type).get("uri")
            return r["results"][0]["uri"]
        except Exception as e:
            raise ArchivesSpaceClientError(
                "Error finding or creating object in ArchivesSpace: {}".format(
                    e))

    def next_accession_number(self):
        """
        Finds the next available accession number by searching for accession
        numbers with the current year, and then incrementing.

        Assumes that accession numbers are in the format YYYY NNN, where YYYY
        is the current year and NNN is a zero-padded integer.
        """
        current_year = str(date.today().year)
        try:
            query = json.dumps({
                "query": {
                    "field": "four_part_id",
                    "value": current_year,
                    "jsonmodel_type": "field_query"
                }
            })
            r = self.client.get("repositories/{}/search".format(self.repo_id),
                                params={
                                    "page": 1,
                                    "type[]": "accession",
                                    "sort": "identifier desc",
                                    "aq": query
                                }).json()
            number = "1"
            if r.get("total_hits") >= 1:
                if r["results"][0]["identifier"].split("-")[0] == current_year:
                    id_1 = int(r["results"][0]["identifier"].split("-")[1])
                    id_1 += 1
                    number = str(id_1).zfill(3)
            return ":".join([current_year, number.zfill(3)])
        except Exception as e:
            raise ArchivesSpaceClientError(
                "Error retrieving next accession number from ArchivesSpace: {}"
                .format(e))
示例#28
0
            "literal": True
        }, {
            "jsonmodel_type": "field_query",
            "field": "source",
            "value": "viaf",
            "literal": True
        }]
    }
})
ASoutput = list(client.get_paged("/search", params={"filter": query}))
print('Found ' + str(len(ASoutput)) + ' agents.')

# grab uri out of agent
for person in ASoutput:
    uri = person['uri']
    personRecord = client.get(uri).json()
    lockVersion = str(personRecord['lock_version'])
    primary_name = personRecord['names'][0]['primary_name']
    try:
        secondary_name = personRecord['names'][0]['rest_of_name']
    except:
        secondary_name = ''
    try:
        dates = personRecord['names'][0]['dates']
    except:
        dates = ''
    searchName = primary_name + ', ' + secondary_name + ', ' + dates
    nameEdited = quote(searchName.strip())
    url = viafURL + nameEdited + '%22+and+local.sources+%3D+%22lc%22&sortKeys=holdingscount&maximumRecords=1&httpAccept=application/rdf+json'
    # first need to treat the response as text since we get an xml resopnse (with json embedded inside)
    response = requests.get(url).text
    # add the handlers to the logger
    logger.addHandler(fh)
    logger.addHandler(ch)

    config = configparser.ConfigParser()
    config.read('settings.ini')
    args.config = config

    try:
        client = ASnakeClient(
            baseurl=config['aspace_credentials']['api_host'],
            username=config['aspace_credentials']['username'],
            password=config['aspace_credentials']['password'])
    except KeyError as e:
        logger.error('settings.ini does not exist or is invalid')
        raise e

    # Simple sanity check to make sure client is setup
    try:
        resp = client.get('/')
        if not resp.ok:
            resp.raise_for_status()
    except:
        logger.error('Unable to contact ArchivesSpace instance at %s' %
                     config['aspace_credentials']['api_host'])
        raise APIContactError(
            'Unable to contact ArchivesSpace instance at %s' %
            config['aspace_credentials']['api_host'])

    main_menu()
示例#30
0
    for k in d:
        if isinstance(d[k], list):
            for i in d[k]:
                for j in findKey(i, key):
                    yield j


repository = input('Enter Repository ID: ')
resourceID = input('Enter resource ID: ')

client = ASnakeClient()
client.authorize()

endpoint = '/repositories/' + repository + '/resources/' + resourceID + '/tree'

output = client.get(endpoint).json()

archivalObjects = []
for value in findKey(output, 'record_uri'):
    if 'archival_objects' in value:
        archivalObjects.append(value)

records = []
for archivalObject in archivalObjects:
    output = client.get(archivalObject).json()
    records.append(output)

f = open('archivalObjects.json', 'w')
json.dump(records, f)
f.close()