示例#1
0
class ASpace():
    # this happens when you call ASpace()
    def __init__(self, **config):
        # Connect to ASpace using .archivessnake.yml
        self.client = ASnakeClient(**config)
        self.client.authorize()
        m = re.match(r'\(v?(.+\))', self.client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'

    def __getattr__(self, attr):
        '''returns the JSONModelRelation representing the route with the same name as the attribute requested.'''
        if not attr.startswith('_'):
            return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client)

    @property
    def resources(self):
        '''return all resources from every repo.'''
        return ResourceRelation({}, self.client)


    @property
    def agents(self):
        '''returns an AgentRelation.'''
        return AgentRelation("/agents", {}, self.client)

    @property
    def users(self):
        '''returns a UserRelation.'''
        return UserRelation("/users", {}, self.client)

    def by_external_id(self, external_id, record_types=None):
        '''return any resources fetched from the 'by-external-id' route.

Note: while the route will return differently depending on how many records are returned,
this method deliberately flattens that out - it will _always_ return a generator, even if only
one record is found.'''
        params = {"eid": external_id}
        if record_types: params['type[]'] = record_types

        res = self.client.get('by-external-id', params=params)
        if res.status_code == 404:
            return []
        elif res.status_code == 300: # multiple returns, bare list of uris
            yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json()))
        elif res.status_code == 200: # single obj, redirects to obj with 303->200
            yield wrap_json_object(res.json(), self.client)
        else:
            raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code))

    def from_uri(self, uri):
        '''returns a JSONModelObject representing the URI passed in'''
        return wrap_json_object(self.client.get(uri).json(), self.client)
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
            for ext_index, extent in enumerate(extents):
                for key, value in changes.items():
                    if extent['extent_type'] in value:
                        updated_record['extents'][ext_index][
                            'extent_type'] = key
                        break
                    else:
                        pass
            if res_record['extents'] != updated_record['extents']:
                response = client.post(rec_uri, json=updated_record)
                if response.status_code == 200:
                    logger.info('Extent change successfully pushed',
                                rec=record,
                                response=response)
                    found_records.add(record)
                else:
                    logger.info('Extent change failed',
                                rec=record,
                                response=response)
            else:
                pass
        except:
            pass

    print('{0} resource records checked; {1} records updated.'.format(
        len(res_records), len(found_records)))
示例#3
0
def test_authorize():
    client = ASnakeClient()  # relies on default config, see ASnakeConfig class
    toke = client.authorize()
    assert isinstance(toke, str)
    assert len(toke) == 64
    assert set(toke) <= set('0123456789abcdef')
    assert client.session.headers['X-ArchivesSpace-Session'] == toke
    # Try to get admin user info, should only work if we're authed as admin
    assert client.get('users/1').status_code == 200
示例#4
0
import csv, json

from asnake.client import ASnakeClient
client = ASnakeClient()
client.authorize()


def startCSV(CSV):
    '''Creates the CSV with field names and writes header'''
    fieldnames = [
        'lock_version', 'indicator', 'uri', 'collection_identifier',
        'series_identifier'
    ]
    with open(CSV, 'w', newline='') as outputCSV:
        writer = csv.DictWriter(outputCSV, fieldnames=fieldnames)
        writer.writeheader()


def addCSV(CSV, lock, ind, uri, coll_id, ser_id):
    '''Opens CSV, appends row'''
    fieldnames = [
        'lock_version', 'indicator', 'uri', 'collection_identifier',
        'series_identifier'
    ]
    with open(CSV, 'a', newline='') as outputCSV:
        writer = csv.DictWriter(outputCSV, fieldnames=fieldnames)
        writer.writerow({
            'lock_version': lock,
            'indicator': ind,
            'uri': uri,
            'collection_identifier': coll_id,
from asnake.aspace import ASpace

import asnake.logging as logging
logging.setup_logging(level='DEBUG',
                      filename="remove_fake_wrapper.log",
                      filemode="a")

aspace = ASpace(baseurl="[ASPACE API URL]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set target repo
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

rl_repo = aspace.repositories(2)

#input is output of SQL query above
input_csv = input("Path to CSV Input: ")
#output will be input CSV plus some extra columns for reporting on actions taken, errors, etc.
updated_resources_csv = input("Path to CSV Output: ")


#Test if more than one direct child of Resource Object
#Why? Don't want to assign all children to Resource if there are other sibling Components of the fake wrapper component
def only_one_direct_child_of_resource_test(resource_object):
示例#6
0
class ArchivesSpaceClient(object):
    """Client to get and receive data from ArchivesSpace."""
    def __init__(self, baseurl, username, password, repo_id):
        self.client = ASnakeClient(baseurl=baseurl,
                                   username=username,
                                   password=password)
        self.repo_id = repo_id
        if not self.client.authorize():
            raise ArchivesSpaceClientError(
                "Couldn't authenticate user credentials for ArchivesSpace")
        self.TYPE_LIST = {
            "family": ["agent_family", "agents/families"],
            "organization":
            ["agent_corporate_entity", "agents/corporate_entities"],
            "person": ["agent_person", "agents/people"],
            "component": [
                "archival_object",
                "repositories/{repo_id}/archival_objects".format(
                    repo_id=self.repo_id)
            ],
            "accession": [
                "accession", "repositories/{repo_id}/accessions".format(
                    repo_id=self.repo_id)
            ],
            "digital object": [
                "digital_objects",
                "repositories/{repo_id}/digital_objects".format(
                    repo_id=self.repo_id)
            ]
        }

    def send_request(self, method, url, data=None, **kwargs):
        """Base method for sending requests to ArchivesSpace."""
        r = getattr(self.client, method)(url, data=json.dumps(data), **kwargs)
        if r.status_code == 200:
            return r.json()
        else:
            if r.json()["error"].get("id_0"):
                """Account for indexing delays by bumping up to the next accession number."""
                id_1 = int(data["id_1"])
                id_1 += 1
                data["id_1"] = str(id_1).zfill(3)
                return self.create(data, "accession")
            raise ArchivesSpaceClientError(
                "Error sending {} request to {}: {}".format(
                    method, url,
                    r.json()["error"]))

    def retrieve(self, url, **kwargs):
        return self.send_request("get", url, **kwargs)

    def create(self, data, type, **kwargs):
        return self.send_request("post", self.TYPE_LIST[type][1], data,
                                 **kwargs)

    def update(self, uri, data, **kwargs):
        return self.send_request("post", uri, data, **kwargs)

    def get_or_create(self, type, field, value, last_updated, consumer_data):
        """
        Attempts to find and return an object in ArchivesSpace.
        If the object is not found, creates and returns a new object.
        """
        model_type = self.TYPE_LIST[type][0]
        endpoint = self.TYPE_LIST[type][1]
        query = json.dumps({
            "query": {
                "field": field,
                "value": value,
                "jsonmodel_type": "field_query"
            }
        })
        try:
            r = self.client.get("repositories/{}/search".format(self.repo_id),
                                params={
                                    "page": 1,
                                    "type[]": model_type,
                                    "aq": query
                                }).json()
            if len(r["results"]) == 0:
                r = self.client.get(endpoint,
                                    params={
                                        "all_ids": True,
                                        "modified_since": last_updated - 120
                                    }).json()
                for ref in r:
                    r = self.client.get("{}/{}".format(endpoint, ref)).json()
                    if r[field] == str(value):
                        return r["uri"]
                return self.create(consumer_data, type).get("uri")
            return r["results"][0]["uri"]
        except Exception as e:
            raise ArchivesSpaceClientError(
                "Error finding or creating object in ArchivesSpace: {}".format(
                    e))

    def next_accession_number(self):
        """
        Finds the next available accession number by searching for accession
        numbers with the current year, and then incrementing.

        Assumes that accession numbers are in the format YYYY NNN, where YYYY
        is the current year and NNN is a zero-padded integer.
        """
        current_year = str(date.today().year)
        try:
            query = json.dumps({
                "query": {
                    "field": "four_part_id",
                    "value": current_year,
                    "jsonmodel_type": "field_query"
                }
            })
            r = self.client.get("repositories/{}/search".format(self.repo_id),
                                params={
                                    "page": 1,
                                    "type[]": "accession",
                                    "sort": "identifier desc",
                                    "aq": query
                                }).json()
            number = "1"
            if r.get("total_hits") >= 1:
                if r["results"][0]["identifier"].split("-")[0] == current_year:
                    id_1 = int(r["results"][0]["identifier"].split("-")[1])
                    id_1 += 1
                    number = str(id_1).zfill(3)
            return ":".join([current_year, number.zfill(3)])
        except Exception as e:
            raise ArchivesSpaceClientError(
                "Error retrieving next accession number from ArchivesSpace: {}"
                .format(e))
    total = len(filelist)
    count = 0
    for f in filelist:
        f = f.replace('\n', '')
        count += 1
        makeRow(getAo(f),f)
        print('Row added! - ' + str(count) + "/" + str(total))

# enter aspace login info
config = configparser.ConfigParser()
config.read('local_settings.cfg')
baseurl= config.get('ArchivesSpace', 'baseURL')
user = input('ArchivesSpace username: '******'ArchivesSpace password:'******'\a')

示例#8
0
import json, time, runtime
from asnake.client import ASnakeClient
from asnake.client.web_client import ASnakeAuthError

# Create a client
client = ASnakeClient()
client.authorize()  # login, using default values

# print instructions
print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.")
input("Press Enter to continue...")

# post container_profiles
print ("The following container profiles have been added to ArchivesSpace:")
jsonfile = open("containerProfiles.json")
jsonfile = json.load(jsonfile)
for container_profile in jsonfile:
    post = client.post("/container_profiles", json=container_profile).json()
    print (post)

print ("You've just completed your first API POST.  Congratulations!")
def main(ID, path=None, accession=None):

    if path == None:
        if not os.path.isdir(defaultPath):
            raise Exception("ERROR: default path " + defaultPath +
                            " does not exist.")
        path = os.path.join(defaultPath, ID)
        if not os.path.isdir(path):
            raise Exception("ERROR: no " + ID +
                            " directory exists for ingest in " + defaultPath)
    else:
        if not os.path.isdir(path):
            raise Exception("ERROR: " + str(path) + " is not a valid path.")
    print("Reading " + path)

    if accession == None:
        print("Building SIP...")
        SIP = SubmissionInformationPackage()
        SIP.create(ID)
        SIP.package(path)
        print("SIP " + SIP.bagID + " created.")

    else:
        print("Reading accession " + accession)
        import asnake.logging as logging
        from asnake.client import ASnakeClient
        client = ASnakeClient()
        client.authorize()

        logging.setup_logging(stream=sys.stdout, level='INFO')

        call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}"
        accessionResponse = client.get(call).json()
        if len(accessionResponse["results"]) < 1:
            raise Exception("ERROR: Could not find accession with ID: " +
                            accession)
        else:
            accessionObject = json.loads(
                accessionResponse["results"][0]["json"])
            if "id_1" in accessionObject.keys():
                accessionID = accessionObject["id_0"] + "-" + accessionObject[
                    "id_1"]
            if accession != accessionID:
                raise Exception(
                    "ERROR: Could not find exact accession with ID: " +
                    accession)
            if not "content_description" in accessionObject.keys():
                raise Exception("ERROR: no content description in " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            if len(accessionObject["related_resources"]) < 1:
                raise Exception("ERROR: no related resource for " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            else:
                resource = client.get(
                    accessionObject["related_resources"][0]["ref"]).json()
                creator = resource["title"]
                if not ID.lower() == resource["id_0"].lower():
                    raise Exception("ERROR: accession " + accessionID +
                                    " does not link to collection ID " + ID +
                                    ". Instead linked to " + resource["id_0"])
                description = accessionObject["content_description"]

                print("Building SIP...")
                SIP = SubmissionInformationPackage()
                SIP.create(ID)
                SIP.package(path)
                print("SIP " + SIP.bagID + " created.")

                SIP.bag.info["Accession-Identifier"] = accessionID
                SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"]
                SIP.bag.info["Records-Creator"] = creator
                SIP.bag.info["Content-Description"] = description
                if "condition_description" in accessionObject.keys():
                    SIP.bag.info["Condition-Description"] = accessionObject[
                        "condition_description"]
                if "provenance" in accessionObject.keys():
                    SIP.bag.info["Provenance"] = accessionObject["provenance"]
                if "general_note" in accessionObject.keys():
                    SIP.bag.info["General-Note"] = accessionObject[
                        "general_note"]
                SIP.bag.info["Source-Location"] = path
                SIP.bag.info[
                    "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py"

    print("Writing checksums...")
    SIP.bag.save(manifests=True)
    print("SIP Saved!")

    # List files in txt for processing
    print("(not) Listing files for processing...")
    #listFiles(ID)

    if accession == None:
        SIP.extentLog(
            "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx"
        )
        print("Logged ingest to DigitizationExtentTracker.")
    else:
        print("Updating accession " + accessionID)
        if "disposition" in accessionObject.keys():
            accessionObject["disposition"] = accessionObject[
                "disposition"] + "\n" + str(SIP.bagID)
        else:
            accessionObject["disposition"] = str(SIP.bagID)

        totalSize = SIP.size()
        inclusiveDates = SIP.dates()
        extent = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[0]),
            "extent_type": str(totalSize[1])
        }
        extentFiles = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[2]),
            "extent_type": "Digital Files"
        }
        if inclusiveDates[0] == inclusiveDates[1]:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "expression": inclusiveDates[0]
            }
        else:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "end": inclusiveDates[1]
            }
        if "extents" in accessionObject.keys():
            accessionObject["extents"].append(extent)
            accessionObject["extents"].append(extentFiles)
        else:
            accessionObject["extents"] = [extent, extentFiles]
        accessionObject["dates"].append(date)

        updateAccession = client.post(accessionObject["uri"],
                                      json=accessionObject)
        if updateAccession.status_code == 200:
            print("\tSuccessfully updated accession " + accessionID)
        else:
            print(updateAccession.text)
            print("\tERROR " + str(updateAccession.status_code) +
                  "! Failed to update accession: " + accessionID)

    return SIP
示例#10
0
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    catalog = {
        'linear': ['linear_feet', 'Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic': ['cubic_feet', 'Cubic Feet'],
        'gb': ['gigabytes', 'Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()

    data_list = []

    print('Compiling resource records from API...')

    for record in tqdm(res_records):
        res_record = client.get(
            'repositories/2/resources/{0}'.format(record)).json()
        try:
            extents = res_record['extents']
            for x in extents:
                if x['extent_type'] == 'megabytes':
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': str(float(x['number']) / 1000),
                        'units': 'gigabytes'
                    })
                else:
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': x['number'],
                        'units': x['extent_type']
                    })
        except:
            pass

    linear_ms = 0
    linear_ua = 0
    gb_ms = 0
    gb_ua = 0
    cubic_ms = 0
    cubic_ua = 0

    print('Analyzing extents in resource data...')

    for entry in data_list:
        try:
            if entry['id'].startswith(
                    'MS') and entry['units'] in catalog['linear']:
                linear_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['linear']:
                linear_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['gb']:
                gb_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['gb']:
                gb_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['cubic']:
                cubic_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['cubic']:
                cubic_ua += float(entry['amount'])
            else:
                pass
        except:
            exception = input(
                'Uh oh, looks like the analysis ran into a snag; most likely, '
                'a unit of extent for {0} ({1}) is not a pure number. Enter '
                '\'stop\' to kill the process so you can fix the record. Alternatively, '
                'you can enter \'continue\' to skip this entry and keep the analysis '
                'going.'.format(entry['id'], entry['amount']))
            if (exception.lower()).strip() == 'continue':
                pass
            elif (exception.lower()).strip() == 'stop':
                quit()

    report = {
        'MS Linear feet': round(linear_ms, 2),
        'UA Linear feet': round(linear_ua, 2),
        'Total linear feet': round((linear_ua + linear_ms), 2),
        'MS GB': round(gb_ms, 2),
        'UA GB': round(gb_ua, 2),
        'Total GB': round((gb_ms + gb_ua), 2),
        'MS Cubic feet': round(cubic_ms, 2),
        'UA Cubic feet': round(cubic_ua, 2),
        'Total Cubic feet': round((cubic_ua + cubic_ms), 2)
    }

    print('Generating report as JSON...')

    with open(('extent_calculator_' +
               (datetime.datetime.today().strftime('%Y-%m-%d')) + '.json'),
              'w') as f:
        json.dump(report, f)
def buildSelections(colID, refID=None, filter=None, date=False, verbose=False):

    client = ASnakeClient()
    client.authorize()
    
    collection = []
    page = 1

    outDir = "/media/SPE/uploads"
    
    if refID:
        url = "https://archives.albany.edu/catalog?f[record_parent_sim][]=" + refID + "&format=json&per_page=100"
        outFile = os.path.join(outDir, refID + ".json")
        descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") + "aspace_" + refID
        outDesc = os.path.join(outDir, "desc_" + refID + ".json")
    else:
        url = "https://archives.albany.edu/catalog?f[collection_number_sim][]=" + colID + "&format=json&per_page=100"
        outFile = os.path.join(outDir, colID.replace(".", "-") + ".json")
        descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-")
        outDesc = os.path.join(outDir, "desc_" + colID.replace(".", "-") + ".json")
    if filter:
        url = url + "&" + filter
    
    print (descriptionURL + "?format=json")
    r = requests.get(descriptionURL + "?format=json", verify=False)
    print (r.status_code)
    with open(outDesc, 'w', encoding='utf-8', newline='') as f:
        json.dump(r.json()["response"], f, ensure_ascii=True, indent=4)
        

    def getPage(page, collection, url):

        r = requests.get(url + "&page=" + str(page), verify=False)
        print (r.status_code)
        for item in r.json()["response"]["docs"]:

            obj = {}
            obj["title"] = item["title_tesim"][0]
            obj["date"] = item["date_created_tesim"][0]
            #print (item)
            ref_id = item["archivesspace_record_tesim"][0]
            obj["thumb"] = "https://archives.albany.edu" + item["thumbnail_path_ss"]
            obj["url"] = "https://archives.albany.edu/concern/" + item["has_model_ssim"][0].lower() + "s/" + item["id"]
            
            record = client.get("repositories/2/find_by_id/archival_objects?ref_id[]=" + ref_id).json()
            ao = client.get(record["archival_objects"][0]["ref"]).json()
            print (ao["ref_id"])
            dateNormal = ao["dates"][0]["begin"]
            if "end" in ao["dates"][0].keys():
                dateNormal = dateNormal + "/" + ao["dates"][0]["end"]
            if "undated" in ao["dates"][0]["expression"].lower():
                obj["date_normal"] = "9999"
            else:
                obj["date_normal"] = dateNormal
            
            if date:
                if not obj["date"].lower() == "undated":
                    if obj["date"].lower().startswith("ca."):
                        objDate = obj["date"].split(" ")[1]
                    else:
                        if "-" in obj["date"]:
                            objDate = obj["date"].split("-")[0]
                        else:
                            objDate = obj["date"].split(" ")[0]
                    print (objDate)
                    try:
                        if "-" in date:
                            if int(objDate) >= int(date.split("-")[0]) and int(objDate) <= int(date.split("-")[1]):
                                collection.append(obj)
                        else:
                            if int(objDate) < int(date):
                                collection.append(obj)
                    except:
                        print ("Date Error: " + objDate)
            else:
                collection.append(obj)
        if r.json()["response"]["pages"]["last_page?"] == False:
            getPage(page + 1, collection, url)

    getPage(page, collection, url)
        
        
    #print (collection)
    sortedTitle = sorted(collection, key = lambda i: i['title'].split(" ")[0])
    sortedCollection = sorted(sortedTitle, key = lambda i: i['date_normal'].split(" ")[0])
    print (len(sortedCollection))

    with open(outFile, 'w', encoding='utf-8', newline='') as f:
        json.dump(sortedCollection, f, ensure_ascii=True, indent=4)
class ASTemps():
    def __init__(self):
        self.client = ASnakeClient()
        self.auth = self.client.authorize()
        self.all_schemas = self.get_schemas()
        #a list of all enumerations
        #COULD ALSO DO /config/enumerations/names/:enum_name
        self.all_enums = self.get_dynamic_enums()
        #gets the list of schema names
        self.schema_list = [key for key in self.all_schemas.keys()]
        #gets the type list
        self.type_list = list(
            set([
                k for value in self.all_schemas.values()
                for k, v in value.items()
            ]))
        self.jsonmodel_pattern = re.compile(
            '(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')

    def get_schemas(self):
        schemas = self.client.get('/schemas').json()
        return (schemas)

    def get_schema(self, schema):
        schema = self.client.get('/schemas/' + schema).json()
        return (schema)

    def get_dynamic_enums(self):
        enums = self.client.get('/config/enumerations').json()
        return (enums)

    def parse_jsonmodel(self, obj_value):
        #reg ex to capture all jsonmodel references in schema
        #jsonmodel = re.compile('(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')
        logging.debug('starting jsonmodel')
        if self.jsonmodel_pattern.match(obj_value):
            logging.debug('match with ' + str(obj_value))
            #gets the name of the schema
            stripped_string = obj_value[obj_value.find("(") +
                                        1:obj_value.find(")")][1:]
            if stripped_string != 'repository':
                logging.debug('Getting schema for: ' + stripped_string)
                jsonmodel_schema = self.all_schemas[stripped_string]
                #wondering if this is where the problem is??? I know this works in some cases
                if 'uri' in obj_value:
                    logging.debug('uri in obj_value')
                    parsed_json = {'ref': jsonmodel_schema['uri']}
                    logging.debug(str(parsed_json))
                #LOL this also gets digital objects
                if 'object' in obj_value:
                    if 'digital_object' not in obj_value:
                        logging.debug('object in obj_value')
                        #workaround for testing - infinite recursion - but only fixes part of it...
                        if stripped_string == 'note_outline_level':
                            parsed_json = None
                        else:
                            #THIS IS BROKEN!!!! INFINITE RECURSION
                            logging.debug("obj_value " + str(obj_value))
                            logging.debug('running parse_schema on ' +
                                          str(obj_value))
                            parsed_json = self.parse_schema(
                                stripped_string, jsonmodel_schema)
            #saves lots of memory, likely will not change.
            if stripped_string == 'repository':
                parsed_json = {'ref': '/repositories/:repo_id'}
        return parsed_json

    #still more to do with the other ref properties
    def parse_refs(self, schema_name, obj_name, obj_value):
        logging.debug('starting parse_refs on ' + str(schema_name) + ' ' +
                      str(obj_name))
        #go through the properties of the refs
        if 'properties' in obj_value:
            logging.debug('properties in ' + str(obj_value))
            if 'ref' in obj_value['properties']:
                logging.debug('ref in properties')
                if type(obj_value['properties']['ref']['type']) is list:
                    logging.debug('Type of ref is list')
                    logging.debug("obj_value['properties']['ref']['type']: " +
                                  str(obj_value['properties']['ref']['type']))
                    ref_list = []
                    for ref in obj_value['properties']['ref']['type']:
                        logging.debug('Looping through ref list')
                        logging.debug(obj_value['properties']['ref']['type'])
                        logging.debug(ref['type'])
                        #FIX THIS
                        parsed_ref = self.parse_jsonmodel(ref['type'])
                        logging.debug('parsed ref ' + str(parsed_ref))
                        ref_list.append(parsed_ref)
                    logging.debug('ref_list: ' + str(ref_list))
                    return ref_list
                else:
                    logging.debug('Type of ref is not list')
                    if self.jsonmodel_pattern.match(
                            obj_value['properties']['ref']['type']):
                        logging.debug(
                            'RE match ' +
                            str(obj_value['properties']['ref']['type']))
                        logging.debug('calling parse_jsonmodel')
                        parsed_ref = self.parse_jsonmodel(
                            obj_value['properties']['ref']['type'])
                        return parsed_ref
        else:
            logging.debug('properties not in ' + str(obj_name) +
                          'value dictionary')
            logging.debug(str(obj_value['ref']['type']))
            if self.jsonmodel_pattern.match(obj_value['ref']['type']):
                logging.debug(
                    str(obj_value['ref']['type']) +
                    ' matches jsonmodel pattern')
                logging.debug('Calling parse_jsonmodel on ' +
                              str(obj_value['ref']['type']))
                parsed_ref = self.parse_jsonmodel(obj_value['ref']['type'])
                return parsed_ref

    def parse_enums(self, enum_name):
        enum_list = []
        for enum in self.all_enums:
            if enum['name'] == enum_name:
                for ev in enum['enumeration_values']:
                    enum_list.append(ev['value'])
        return enum_list

    def parse_schema(self, schema_name, schema_def):
        try:
            logging.debug("Working on schema: " + str(schema_name))
            template_dict = {}
            #Fixes infinite recursion for now
            exclusions = [
                'collection_management', 'rights_statement',
                'rights_statement_act', 'note_rights_statement',
                'note_rights_statement_act', 'children', 'deaccessions',
                '_inherited', 'rights_statements', 'external_id'
            ]
            for prop_name, prop_value in schema_def['properties'].items():
                logging.debug("Working on prop: " + str(prop_name))
                if schema_name in exclusions:
                    print(schema_name + ' in exclusion list')
                    continue
                elif prop_name in exclusions:
                    print(str(prop_name) + ' in exclusion list')
                    continue
                #If there is more than one type it will be stored in a list.
                elif type(prop_value['type']) is list:
                    '''
                    INTEGER/STRING

                    This is always (and only? )the lock version. Don't need to do anything
                    with it, but will keep in the check in case the schema changes.

                    '''
                    #WHAT WOULD HAPPEN IF I JUST SKIPPED ALTOGETHER - NOTHING STILL F****D!!
                    # if prop_value['type'] == ['integer', 'string']:
                    #     if prop_name == 'lock_version':
                    #         logging.debug(schema_name, prop_name, prop_value)
                    #         continue
                    #     if prop_name != 'lock_version':
                    #         template_dict[prop_name] = None
                    '''
                    What is this doing???

                    '''
                    if 'query' in prop_value['type'][0]:
                        continue
                        #logging.debug(schema_name, prop_name, prop_value)
                    '''
                    What is this doing???

                    '''
                    if type(prop_value['type'][0]) is dict:
                        continue
                        #if 'agent' in prop_value['type'][0]['type']:
                        #logging.debug(schema_name, prop_name, prop_value)
                #If there is only one type it won't be in a list.
                else:
                    '''
                    JSONMODEL TYPES

                    Can be either an object or URI. Refers to another schema or a reference
                    to another object. i.e. date subrecords, location URIs

                    '''
                    if self.jsonmodel_pattern.match(prop_value['type']):
                        logging.debug('Regex match, ' +
                                      str(prop_value['type']))
                        #Don't add read-only fields to the template. Might want to change this
                        #in the case of URIs or IDs...but don't worry about it for now.
                        if 'readonly' in prop_value:
                            logging.debug('Property value is readonly')
                            if 'subtype' in prop_value:
                                logging.debug('Subtype in property value')
                                if prop_value['subtype'] == 'ref':
                                    logging.debug(
                                        'Subtype of ' + str(prop_name) +
                                        'is ref, calling parse_jsonmodel on ' +
                                        str(prop_value['tyoe']))
                                    template_dict[
                                        prop_name] = self.parse_jsonmodel(
                                            prop_value['type'])
                        else:
                            logging.debug(
                                'readonly not in property value dict, calling parse_jsonmodel on '
                                + str(prop_value['type']))
                            template_dict[prop_name] = self.parse_jsonmodel(
                                prop_value['type'])
                    elif prop_value['type'] == 'array':
                        logging.debug('Prop value type is array')
                        #this will always be the case I think? Check
                        if 'items' in prop_value:
                            #no need to have readonly fields in template???
                            #if there is more than one type
                            if type(prop_value['items']['type']) is list:
                                logging.debug('Type of array items is list')
                                template_dict[prop_name] = []
                                #this might always be object??? check and see
                                for prop_type in prop_value['items']['type']:
                                    if self.jsonmodel_pattern.match(
                                            prop_type['type']):
                                        parsed_json = self.parse_jsonmodel(
                                            prop_type['type'])
                                        template_dict[prop_name].append(
                                            parsed_json)
                                    if prop_type['type'] is 'object':
                                        logging.debug(schema_name, prop_name,
                                                      prop_value)
                                #If there is only one type...
                            else:
                                logging.debug('Type of array items is object')
                                if prop_value['items']['type'] is 'object':
                                    if 'subtype' in prop_value['items']:
                                        #these usually have properties
                                        if 'properties' in prop_value['items']:
                                            template_dict[
                                                prop_name] = self.parse_refs(
                                                    schema_name, prop_name,
                                                    prop_value)
                                    else:
                                        if 'properties' in prop_value['items']:
                                            logging.debug(
                                                schema_name, schema_name,
                                                prop_name, prop_value)
                                if prop_value['items']['type'] == 'string':
                                    if 'enum' in prop_value['items']:
                                        template_dict[prop_name] = prop_value[
                                            'items']['enum']
                                #if it matches the object pattern
                                if self.jsonmodel_pattern.match(
                                        prop_value['items']['type']):
                                    logging.debug(prop_name)
                                    logging.debug(
                                        str(prop_value['items']['type']))
                                    parsed_json = self.parse_jsonmodel(
                                        prop_value['items']['type'])
                                    template_dict[prop_name] = [parsed_json]
                    #Changing this from 'is' to '==' causes infinite recursion. Interestingly changing it above causes many
                    #fields to be removed from the templates - 2 other instances of is/== 'object'
                    elif prop_value['type'] == 'object':
                        logging.debug('Prop value type is object')
                        if 'properties' in prop_value:
                            if 'subtype' in prop_value:
                                logging.debug(
                                    'subtype in prop value, calling parse_refs on '
                                    + str(schema_name) + ' ' + str(prop_name))
                                #these are all refs I think
                                template_dict[prop_name] = self.parse_refs(
                                    schema_name, prop_name, prop_value)
                            else:
                                logging.debug('subtype not in prop_value: ')
                                logging.debug(schema_name, prop_name,
                                              prop_value)
                    elif prop_value['type'] == 'string':
                        logging.debug('Prop value is string')
                        #enums are always strings
                        if 'readonly' not in prop_value:
                            logging.debug(
                                'readonly not in prop value dictionary')
                            if 'enum' in prop_value:
                                template_dict[prop_name] = prop_value['enum']
                            if 'dynamic_enum' in prop_value:
                                template_dict[prop_name] = self.parse_enums(
                                    prop_value['dynamic_enum'])
                            else:
                                template_dict[prop_name] = None

                    elif prop_value['type'] in [
                            'integer', 'boolean', 'date', 'date-time', 'number'
                    ]:
                        logging.debug(
                            'Prop value is type int, bool, date, date-time, number'
                        )
                        #make sure this is correct, as in not missing something that should be there
                        if 'readonly' not in prop_value:
                            logging.debug(
                                'readonly not in prop value dictionary')
                            template_dict[prop_name] = None
                    else:
                        logging.debug('Value not of a recognized type')
        except KeyError:
            logging.debug('KeyError: ' + schema_name + ' ' + prop_name)
        except Exception as exc:
            logging.debug('Error: ' + schema_name + ' ' + prop_name)
            logging.debug(traceback.format_exc())
        finally:
            template_dict['jsonmodel_type'] = schema_name
        return template_dict

    #QUESTION - SHOULD I CREATE LITTLE FUNCTIONS FOR EACH TYPE - i.e if whatever is 'object',
    #then do function stuff...might help with the nesting

    #want to go through each schema and create a sample dictionary template
    #need to be able to handle just one schema
    def parse_schemas(self, schemas):
        template_dict = {}
        for schema_name, schema_def in schemas.items():
            #check for a parent - but one that isn't "abstract" because those fields are the same
            #WHAT TO DO WITH THIS????
            # if 'parent' in schema_def:
            #     pass
            temp = self.parse_schema(schema_name, schema_def)
            template_dict[schema_name] = temp
        return template_dict

    def create_csv_template(self, jsontemplatedict):
        '''
        Goal is to create the JSON templates, and then convert those to CSV file that can
        be used to create either full finding aids/top level records, or to update subrecords
        in bulk
        '''
        fileob = open(jsontemplatedict['jsonmodel_type'] + '.csv',
                      'a',
                      encoding='utf-8',
                      newline='')
        csvout = csv.writer(fileob)
        subfield_list = []
        for key, value in jsontemplatedict.items():
            if type(value) is list:
                #should I just check the first one instead of looping through all?
                if type(value[0]) is dict:
                    for item in value:
                        for k in item.keys():
                            subfield_list.append(
                                jsontemplatedict['jsonmodel_type'] + '_' +
                                key + '_' + k)
                #only two options for lists, correct?
                if type(value[0]) is not dict:
                    #this means that it's just a list of enums probably - right?? No other list formats
                    #do I need the check now that I removed the loop?
                    check = jsontemplatedict['jsonmodel_type'] + '_' + key
                    if check not in subfield_list:
                        subfield_list.append(
                            jsontemplatedict['jsonmodel_type'] + '_' + key)
            else:
                subfield_list.append(jsontemplatedict['jsonmodel_type'] + '_' +
                                     key)
        csvout.writerow(subfield_list)
        fileob.close()
        return subfield_list

    #Wrapper loop to create all templates
    def create_csv_templates(self, jsontemplates):
        for template_key, template_value in jsontemplates.items():
            self.create_csv_template(template_value)

    def download_templates(self, jsontemplates):
        for template_key, template_value in jsontemplates.items():
            outfile = open(str(template_key) + '.json', 'w', encoding='utf-8')
            json.dump(template_value, outfile, sort_keys=True, indent=4)
示例#13
0
            combined_aspace_id_clean = id_combined_regex.sub('', combined_id)
            if resource.json()["publish"] is True:
                if resource.status_code == 200:
                    export_ead = client.get(
                        "repositories/{}/resource_descriptions/{}.xml".format(
                            repo_id, resource_id),
                        params={
                            "include_unpublished": False,
                            "include_daos": True,
                            "numbered_cs": True,
                            "print_pdf": False,
                            "ead3": False
                        })
                    filepath = str(Path(source_path,
                                        combined_aspace_id_clean)) + ".xml"
                    with open(filepath, "wb") as local_file:
                        local_file.write(export_ead.content)
                        local_file.close()
                        print("Exported: {}".format(combined_id))
                else:
                    print(
                        "\nThe following errors were found when exporting {}:\n{}: {}\n"
                        .format(combined_id, resource, resource.text))
        print("-" * 100)


sourcepath = input("Enter folder path for exported EADs: ")
asp_client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
asp_client.authorize()
export_eads(asp_client, sourcepath)
示例#14
0
def get_aspace_log(defaults):
    """
    Gets a user's ArchiveSpace credentials.
    There are 3 components to it, the setup code, correct_creds while loop, and the window_asplog_active while loop. It
    uses ASnake.client to authenticate and stay connected to ArchivesSpace. Documentation for ASnake can be found here:
    https://archivesspace-labs.github.io/ArchivesSnake/html/index.html
    Args:
        defaults (UserSetting class): contains the data from defaults.json file, all data the user has specified as default
    Returns:
        close_program (bool): if a user exits the popup, this will return true and end run_gui()
        connect_client (ASnake.client object): the ArchivesSpace ASnake client for accessing and connecting to the API
    """
    connect_client = None
    repositories = {}
    save_button_asp = " Save and Continue "
    window_asplog_active = True
    correct_creds = False
    close_program = False
    while correct_creds is False:
        asplog_col1 = [
            [psg.Text("ArchivesSpace username:"******"Roboto", 11))],
            [psg.Text("ArchivesSpace password:"******"Roboto", 11))],
            [psg.Text("ArchivesSpace API URL:", font=("Roboto", 11))]
        ]
        asplog_col2 = [[psg.InputText(focus=True, key="_ASPACE_UNAME_")],
                       [
                           psg.InputText(password_char='*',
                                         key="_ASPACE_PWORD_")
                       ],
                       [psg.InputText(defaults["as_api"], key="_ASPACE_API_")]]
        layout_asplog = [[
            psg.Column(asplog_col1, key="_ASPLOG_COL1_", visible=True),
            psg.Column(asplog_col2, key="_ASPLOG_COL2_", visible=True)
        ],
                         [
                             psg.Button(save_button_asp,
                                        bind_return_key=True,
                                        key="_SAVE_CLOSE_LOGIN_")
                         ]]
        window_login = psg.Window("ArchivesSpace Login Credentials",
                                  layout_asplog)
        while window_asplog_active is True:
            event_log, values_log = window_login.Read()
            if event_log == "_SAVE_CLOSE_LOGIN_":
                try:
                    connect_client = ASnakeClient(
                        baseurl=values_log["_ASPACE_API_"],
                        username=values_log["_ASPACE_UNAME_"],
                        password=values_log["_ASPACE_PWORD_"])
                    connect_client.authorize()
                    defaults["as_api"] = values_log["_ASPACE_API_"]
                    repo_results = connect_client.get('/repositories')
                    repo_results_dec = json.loads(
                        repo_results.content.decode())
                    for result in repo_results_dec:
                        uri_components = result["uri"].split("/")
                        repositories[result["name"]] = int(uri_components[-1])
                    window_asplog_active = False
                    correct_creds = True
                except Exception as e:
                    error_message = ""
                    if ":" in str(e):
                        error_divided = str(e).split(":")
                        for line in error_divided:
                            error_message += line + "\n"
                    else:
                        error_message = str(e)
                    psg.Popup(
                        "Your username and/or password were entered incorrectly. Please try again.\n\n"
                        + error_message)
            if event_log is None or event_log == 'Cancel':
                window_login.close()
                window_asplog_active = False
                correct_creds = True
                close_program = True
                break
        window_login.close()
    return close_program, connect_client, repositories
示例#15
0
class ASpace():

    # this happens when you call ASpace()
    def __init__(self, **config):
        # Repository will default to 2 if not provided
        self.repository = config.pop('repository', '2')

        # Connect to ASpace using .archivessnake.yml
        self.__client = ASnakeClient(**config)
        self.__client.authorize()
        m = re.match(r'\(v?(.+\))', self.__client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'

    # this automatically sets attributes to ASpace(), so you can ASpace().resources, etc.
    def __getattr__(self, attr):
        if not attr.startswith('_'):
            # This sets plural attributes, like resources and archival_objects
            # Not sure if this is safe
            if attr.lower().endswith("s"):
                shortCalls = [
                    "repositories", "locations", "subjects", "users",
                    "vocabularies", "location_profiles", "container_profiles"
                ]
                #for calls without repositories in them
                if attr in shortCalls:
                    return jsonmodel_muliple_object(
                        self.__client.get("/" + str(attr),
                                          params={
                                              "all_ids": True
                                          }).json(), self.__client,
                        self.repository, attr)
                else:
                    return jsonmodel_muliple_object(
                        self.__client.get(
                            "/repositories/" + str(self.repository) + "/" +
                            str(attr),
                            params={
                                "all_ids": True
                            }).json(), self.__client, self.repository, attr)

    def resources(self):
        '''return all resources from every repo'''
        repo_uris = [
            r['uri'] for r in self.__client.get('repositories').json()
        ]
        for resource in chain(*[
                paged_result('{}/resources'.format(uri), self.__client)
                for uri in repo_uris
        ]):
            yield resource

    # not sure if theres a way to pass a variable to implement this with __getattr__
    def resource(self, id):
        return jsonmodel_single_object(
            self.__client.get("repositories/" + self.repository +
                              "/resources/" + str(id)).json(), self.__client)

    #this doesn't work yet
    def resourceID(self, id):
        result = self.__client.get(
            "/repositories/" + self.repository +
            "/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\""
            + str(id) + "\", \"jsonmodel_type\":\"field_query\"}}").json()
        resourceURI = result["results"][0]["uri"]
        return jsonmodel_single_object(
            self.__client.get(resourceURI).json(), self.__client)

    def archival_object(self, id):
        if isinstance(id, str):
            if len(id) == 32:
                # its a ref_id
                params = {"ref_id[]": str(id)}
                refList = self.__client.get(
                    "repositories/" + self.repository +
                    "/find_by_id/archival_objects?page=1&ref_id[]=" +
                    str(id)).json()
                return jsonmodel_single_object(
                    self.__client.get(
                        refList["archival_objects"][0]["ref"]).json(),
                    self.__client)
        #its a uri number
        return jsonmodel_single_object(
            self.__client.get("repositories/" + self.repository +
                              "/archival_objects/" + str(id)).json(),
            self.__client)

    def agents(self, type, id=None):
        if id == None:
            return jsonmodel_muliple_object(
                self.__client.get("/agents/" + str(type) +
                                  "?all_ids=true").json(), self.__client,
                self.repository, type)
        else:
            return jsonmodel_single_object(
                self.__client.get("/agents/" + str(type) + "/" +
                                  str(id)).json(), self.__client)