示例#1
0
 def __init__(self, baseurl, username, password, repo_id):
     self.client = ASnakeClient(baseurl=baseurl,
                                username=username,
                                password=password)
     self.repo_id = repo_id
     if not self.client.authorize():
         raise ArchivesSpaceClientError(
             "Couldn't authenticate user credentials for ArchivesSpace")
     self.TYPE_LIST = {
         "family": ["agent_family", "agents/families"],
         "organization":
         ["agent_corporate_entity", "agents/corporate_entities"],
         "person": ["agent_person", "agents/people"],
         "component": [
             "archival_object",
             "repositories/{repo_id}/archival_objects".format(
                 repo_id=self.repo_id)
         ],
         "accession": [
             "accession", "repositories/{repo_id}/accessions".format(
                 repo_id=self.repo_id)
         ],
         "digital object": [
             "digital_objects",
             "repositories/{repo_id}/digital_objects".format(
                 repo_id=self.repo_id)
         ]
     }
示例#2
0
 def __init__(self, **config):
     # Connect to ASpace using .archivessnake.yml
     self.client = ASnakeClient(**config)
     self.client.authorize()
     m = re.match(r'\(v?(.+\))', self.client.get('version').text)
     if m:
         self.version = m[1]
     else:
         self.version = 'unknown version'
示例#3
0
def test_authorize():
    client = ASnakeClient()  # relies on default config, see ASnakeConfig class
    toke = client.authorize()
    assert isinstance(toke, str)
    assert len(toke) == 64
    assert set(toke) <= set('0123456789abcdef')
    assert client.session.headers['X-ArchivesSpace-Session'] == toke
    # Try to get admin user info, should only work if we're authed as admin
    assert client.get('users/1').status_code == 200
示例#4
0
class ASpace():
    # this happens when you call ASpace()
    def __init__(self, **config):
        # Connect to ASpace using .archivessnake.yml
        self.client = ASnakeClient(**config)
        self.client.authorize()
        m = re.match(r'\(v?(.+\))', self.client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'

    def __getattr__(self, attr):
        '''returns the JSONModelRelation representing the route with the same name as the attribute requested.'''
        if not attr.startswith('_'):
            return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client)

    @property
    def resources(self):
        '''return all resources from every repo.'''
        return ResourceRelation({}, self.client)


    @property
    def agents(self):
        '''returns an AgentRelation.'''
        return AgentRelation("/agents", {}, self.client)

    @property
    def users(self):
        '''returns a UserRelation.'''
        return UserRelation("/users", {}, self.client)

    def by_external_id(self, external_id, record_types=None):
        '''return any resources fetched from the 'by-external-id' route.

Note: while the route will return differently depending on how many records are returned,
this method deliberately flattens that out - it will _always_ return a generator, even if only
one record is found.'''
        params = {"eid": external_id}
        if record_types: params['type[]'] = record_types

        res = self.client.get('by-external-id', params=params)
        if res.status_code == 404:
            return []
        elif res.status_code == 300: # multiple returns, bare list of uris
            yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json()))
        elif res.status_code == 200: # single obj, redirects to obj with 303->200
            yield wrap_json_object(res.json(), self.client)
        else:
            raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code))

    def from_uri(self, uri):
        '''returns a JSONModelObject representing the URI passed in'''
        return wrap_json_object(self.client.get(uri).json(), self.client)
示例#5
0
    def __init__(self, **config):
        # Repository will default to 2 if not provided
        self.repository = config.pop('repository', '2')

        # Connect to ASpace using .archivessnake.yml
        self.__client = ASnakeClient(**config)
        self.__client.authorize()
        m = re.match(r'\(v?(.+\))', self.__client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'
 def __init__(self):
     self.client = ASnakeClient()
     self.auth = self.client.authorize()
     self.all_schemas = self.get_schemas()
     #a list of all enumerations
     #COULD ALSO DO /config/enumerations/names/:enum_name
     self.all_enums = self.get_dynamic_enums()
     #gets the list of schema names
     self.schema_list = [key for key in self.all_schemas.keys()]
     #gets the type list
     self.type_list = list(
         set([
             k for value in self.all_schemas.values()
             for k, v in value.items()
         ]))
     self.jsonmodel_pattern = re.compile(
         '(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
            for ext_index, extent in enumerate(extents):
                for key, value in changes.items():
                    if extent['extent_type'] in value:
                        updated_record['extents'][ext_index][
                            'extent_type'] = key
                        break
                    else:
                        pass
            if res_record['extents'] != updated_record['extents']:
                response = client.post(rec_uri, json=updated_record)
                if response.status_code == 200:
                    logger.info('Extent change successfully pushed',
                                rec=record,
                                response=response)
                    found_records.add(record)
                else:
                    logger.info('Extent change failed',
                                rec=record,
                                response=response)
            else:
                pass
        except:
            pass

    print('{0} resource records checked; {1} records updated.'.format(
        len(res_records), len(found_records)))
示例#8
0
import json, time, runtime
from asnake.client import ASnakeClient
from asnake.client.web_client import ASnakeAuthError

# Create a client
client = ASnakeClient()
client.authorize()  # login, using default values

# print instructions
print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.")
input("Press Enter to continue...")

# post container_profiles
print ("The following container profiles have been added to ArchivesSpace:")
jsonfile = open("containerProfiles.json")
jsonfile = json.load(jsonfile)
for container_profile in jsonfile:
    post = client.post("/container_profiles", json=container_profile).json()
    print (post)

print ("You've just completed your first API POST.  Congratulations!")
#/usr/bin/python3
#~/anaconda3/bin/python
from asnake.client import ASnakeClient
import asnake.logging as logging

logging.setup_logging(filename="date_update.log", filemode="a")
logger = logging.get_logger("date_updating")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[backendURL]",
                             username="******",
                             password="******")
aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

print("Getting list of resources...")
resources_list = aspace_client.get(
    "repositories/2/resources?all_ids=true").json()
resources_sorted = sorted(resources_list, reverse=True)

for resource in resources_sorted:

    try:
        resource_json = aspace_client.get("repositories/2/resources/" +
                                          str(resource)).json()
        #print (resource_json)
        resource_uri = resource_json['uri']
        print("updating: " + resource_uri)
        resource_update = aspace_client.post(resource_json['uri'],
                                             json=resource_json)
from secrets import *
from asnake.aspace import ASpace
from asnake.client import ASnakeClient


aspace = ASpace(baseurl=as_api, username=as_un, password=as_pw)
client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
client.authorize()
resource_ids = ["/repositories/4/resources/4103", "/repositories/4/resources/4064", "/repositories/4/resources/2798",
                "/repositories/4/resources/1001", "/repositories/4/resources/4048", "/repositories/2/resources/633",
                "/repositories/2/resources/723", "/repositories/2/resources/748", "/repositories/2/resources/414"]
# "/repositories/5/resources/5071" - UA collection - Steve to check with Kat

for resource_id in resource_ids:
    unknown_count = 0
    uri_breakup = resource_id.split("/")
    res_id = uri_breakup[4]
    repo_id = uri_breakup[2]
    try:
        rl_repo = aspace.repositories(repo_id)
        resource_record = rl_repo.resources(res_id).tree
        resource_tree = resource_record.walk
        print(rl_repo.resources(res_id).json()["title"])
        for node in resource_tree:
            ao_json = client.get(node.uri).json()
            for instance in ao_json["instances"]:
                if "sub_container" in instance.keys():
                    indicators = []
                    types = []
                    for key, value in instance["sub_container"].items():
                        if "indicator_" in key:
示例#11
0
import json, csv, runtime
from asnake.client import ASnakeClient

# print instructions
print(
    'This script takes viafCorporateResults.csv and posts the organizations as corporate_entities to ArchivesSpace.'
)
input('Press Enter to continue...')

# This is where we connect to ArchivesSpace.
client = ASnakeClient()
client.authorize()  # login, using default values

targetFile = 'viafCorporateResults.csv'

csv = csv.DictReader(open(targetFile))

orgList = []
for row in csv:
    orgRecord = {}
    # changed this since ASpace doesn't come with 'viaf' as an option for source out of the box.
    source = 'naf' if row.get('lc') is not None else 'local'
    orgRecord['names'] = [{
        'primary_name': row['result'],
        'sort_name': row['result'],
        'source': source,
        'authority_id': row['lc']
    }]

    post = client.post('/agents/corporate_entities', json=orgRecord).json()
    print(post, '\n')
#Then, checks to see if REsources have finding_aid_status = 'published' in ASpace
#If so, exports EADs to specified location using EADID as filename


import io
import csv
from asnake.client import ASnakeClient
from asnake.aspace import ASpace

aspace = ASpace(baseurl="[ASPACE BACKEND URL]",
                      username="******",
                      password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE BACKEND URL]",
                      username="******",
                      password="******")

aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

destination = 'C:/users/nh48/desktop/as_exports_temp/'

input_csv = input("Path to CSV Input: ")
#output will be input CSV plus some extra columns for reporting on actions taken, errors, etc.
updated_records_csv = input("Path to CSV Output: ")


#If Resource finding aid status = published, export the EAD for the resource, save to folder
def if_published_export_EAD(resource_uri):
示例#13
0
import requests, csv, json, time
from urllib.parse import quote
from asnake.client import ASnakeClient

viafURL = 'http://viaf.org/viaf/search?query=local.personalNames+%3D+%22'

# # print instructions
print(
    'This script queries existing person agent records in ArchivesSpace with the source of "viaf" and updates them with the proper/updated name form from VIAF (if one exists) and appends the VIAF URI to the existing records.  Please note: This is a PROOF OF CONCEPT script, and should not be used in production settings without thinking this through!'
)
input('Press Enter to continue...')

# This is where we connect to ArchivesSpace.  See authenticate.py
client = ASnakeClient()
client.authorize()  # login, using default values

# search AS for person agents with source "viaf"
query = json.dumps({
    "query": {
        "jsonmodel_type":
        "boolean_query",
        "op":
        "AND",
        "subqueries": [{
            "jsonmodel_type": "field_query",
            "field": "primary_type",
            "value": "agent_person",
            "literal": True
        }, {
            "jsonmodel_type": "field_query",
            "field": "source",
示例#14
0
class ArchivesSpaceClient(object):
    """Client to get and receive data from ArchivesSpace."""
    def __init__(self, baseurl, username, password, repo_id):
        self.client = ASnakeClient(baseurl=baseurl,
                                   username=username,
                                   password=password)
        self.repo_id = repo_id
        if not self.client.authorize():
            raise ArchivesSpaceClientError(
                "Couldn't authenticate user credentials for ArchivesSpace")
        self.TYPE_LIST = {
            "family": ["agent_family", "agents/families"],
            "organization":
            ["agent_corporate_entity", "agents/corporate_entities"],
            "person": ["agent_person", "agents/people"],
            "component": [
                "archival_object",
                "repositories/{repo_id}/archival_objects".format(
                    repo_id=self.repo_id)
            ],
            "accession": [
                "accession", "repositories/{repo_id}/accessions".format(
                    repo_id=self.repo_id)
            ],
            "digital object": [
                "digital_objects",
                "repositories/{repo_id}/digital_objects".format(
                    repo_id=self.repo_id)
            ]
        }

    def send_request(self, method, url, data=None, **kwargs):
        """Base method for sending requests to ArchivesSpace."""
        r = getattr(self.client, method)(url, data=json.dumps(data), **kwargs)
        if r.status_code == 200:
            return r.json()
        else:
            if r.json()["error"].get("id_0"):
                """Account for indexing delays by bumping up to the next accession number."""
                id_1 = int(data["id_1"])
                id_1 += 1
                data["id_1"] = str(id_1).zfill(3)
                return self.create(data, "accession")
            raise ArchivesSpaceClientError(
                "Error sending {} request to {}: {}".format(
                    method, url,
                    r.json()["error"]))

    def retrieve(self, url, **kwargs):
        return self.send_request("get", url, **kwargs)

    def create(self, data, type, **kwargs):
        return self.send_request("post", self.TYPE_LIST[type][1], data,
                                 **kwargs)

    def update(self, uri, data, **kwargs):
        return self.send_request("post", uri, data, **kwargs)

    def get_or_create(self, type, field, value, last_updated, consumer_data):
        """
        Attempts to find and return an object in ArchivesSpace.
        If the object is not found, creates and returns a new object.
        """
        model_type = self.TYPE_LIST[type][0]
        endpoint = self.TYPE_LIST[type][1]
        query = json.dumps({
            "query": {
                "field": field,
                "value": value,
                "jsonmodel_type": "field_query"
            }
        })
        try:
            r = self.client.get("repositories/{}/search".format(self.repo_id),
                                params={
                                    "page": 1,
                                    "type[]": model_type,
                                    "aq": query
                                }).json()
            if len(r["results"]) == 0:
                r = self.client.get(endpoint,
                                    params={
                                        "all_ids": True,
                                        "modified_since": last_updated - 120
                                    }).json()
                for ref in r:
                    r = self.client.get("{}/{}".format(endpoint, ref)).json()
                    if r[field] == str(value):
                        return r["uri"]
                return self.create(consumer_data, type).get("uri")
            return r["results"][0]["uri"]
        except Exception as e:
            raise ArchivesSpaceClientError(
                "Error finding or creating object in ArchivesSpace: {}".format(
                    e))

    def next_accession_number(self):
        """
        Finds the next available accession number by searching for accession
        numbers with the current year, and then incrementing.

        Assumes that accession numbers are in the format YYYY NNN, where YYYY
        is the current year and NNN is a zero-padded integer.
        """
        current_year = str(date.today().year)
        try:
            query = json.dumps({
                "query": {
                    "field": "four_part_id",
                    "value": current_year,
                    "jsonmodel_type": "field_query"
                }
            })
            r = self.client.get("repositories/{}/search".format(self.repo_id),
                                params={
                                    "page": 1,
                                    "type[]": "accession",
                                    "sort": "identifier desc",
                                    "aq": query
                                }).json()
            number = "1"
            if r.get("total_hits") >= 1:
                if r["results"][0]["identifier"].split("-")[0] == current_year:
                    id_1 = int(r["results"][0]["identifier"].split("-")[1])
                    id_1 += 1
                    number = str(id_1).zfill(3)
            return ":".join([current_year, number.zfill(3)])
        except Exception as e:
            raise ArchivesSpaceClientError(
                "Error retrieving next accession number from ArchivesSpace: {}"
                .format(e))
from datetime import datetime
from asnake.client import ASnakeClient
from asnake.aspace import ASpace

import asnake.logging as logging
logging.setup_logging(level='DEBUG',
                      filename="remove_fake_wrapper.log",
                      filemode="a")

aspace = ASpace(baseurl="[ASPACE API URL]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set target repo
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

rl_repo = aspace.repositories(2)

#input is output of SQL query above
input_csv = input("Path to CSV Input: ")
#output will be input CSV plus some extra columns for reporting on actions taken, errors, etc.
updated_resources_csv = input("Path to CSV Output: ")


#Test if more than one direct child of Resource Object
示例#16
0
import re
from asnake.aspace import ASpace
from asnake.client import ASnakeClient

client = ASnakeClient()
client.authorize()

aspace = ASpace()
repo = aspace.repositories(2)

collection = repo.resources(189)
for date in collection.dates:
    date_type = ''
    date_expression = ''
    date_begin = ''
    date_end = ''

    try:
        date_type = date.date_type
        date_expression = date.expression
        date_begin = date.begin
        date_end = date.end
    except KeyError:
        pass

    if re.match(r'^\d{4}$', date_expression) and date_type == 'inclusive':
        print('====================')
        print(collection.id_0 + '\t' + collection.title + '\t' +
              collection.uri)
        print(date_expression + '\t' + date_type + '\t' + date_begin + '\t' +
              date_end)
示例#17
0
                                        + colID.replace(".", "-") + "aspace_" +
                                        refID + "?format=json",
                                        verify=False)
                                    if arclight.status_code == 200:
                                        parentList = []
                                        itemData = arclight.json()
                                        for parent in itemData["response"][
                                                "document"]["parent_ssm"][1:]:
                                            parentList.append(
                                                parent.split("_")[1])
                                        parents = "|".join(parentList)
                                    else:
                                        #for new objects not yet indexed in ArcLight
                                        if tree is None:
                                            from asnake.client import ASnakeClient
                                            client = ASnakeClient()
                                            client.authorize()

                                            ref = client.get(
                                                "repositories/2/find_by_id/archival_objects?ref_id[]="
                                                + refID).json()
                                            item = client.get(
                                                ref["archival_objects"][0]
                                                ["ref"]).json()
                                            resource = client.get(
                                                item["resource"]
                                                ["ref"]).json()
                                            tree = client.get(resource["tree"]
                                                              ["ref"]).json()
                                        else:
                                            ref = client.get(
            prefix = '...'

        if position[1] + char_length > len(text):
            end = len(text)
            postfix = ''
        else:
            end = position[1] + char_length
            postfix = '...'

        context.append(prefix + text[start:end] + postfix)
    context = ' | '.join(context)

    return term_count, context


client = ASnakeClient()

primary_types = '/(resource|archival_object|accession|digital_object)/'
results_file = 'term_audit_results.csv'

# Repo list can either be a command line argument or prompted
if len(sys.argv) == 2:
    repos = sys.argv[1]
elif len(sys.argv) < 2:
    repos = input('Enter repository number (e.g., 1): ')
else:
    sys.exit('Run script again with valid repo number(s)')

if repos:
    repos = re.split(r'\D+', repos)
    repos = list(filter(None, repos))
示例#19
0
#!/usr/bin/env python

from asnake.client import ASnakeClient
import pandas as pd
import datetime
from tqdm import tqdm

client = ASnakeClient(baseurl='XXX', username='******', password='******')
client.authorize()

accession_records = client.get('repositories/2/accessions',
                               params={
                                   'all_ids': True
                               }).json()

unit_column = []
extent_column = []
collection_no_column = []
created_column = []

start = datetime.datetime.strptime('2017-07-01', '%Y-%m-%d')
end = datetime.datetime.strptime('2018-07-31', '%Y-%m-%d')

for record in tqdm(accession_records):
    accession_uri = client.get('repositories/2/accessions/' +
                               str(record)).json()
    create_date = accession_uri['create_time'][0:10]
    date_parsed = datetime.datetime.strptime(create_date, '%Y-%m-%d')
    if start <= date_parsed <= end:
        coll_num = accession_uri['id_0']
        extents = accession_uri['extents']
示例#20
0
import dacs
import time
import csv
import shutil

import csv
import requests
import json
from asnake.client import ASnakeClient
import asnake.logging as logging


print ("\tConnecting to ArchivesSpace")

client = ASnakeClient(baseurl="http://localhost:8092",
                      username="******",
                      password="******")
client.authorize()

logging.setup_logging(stream=sys.stdout, level='INFO')

with open('items_output.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            line_count += 1

  
        title=str(row['Title'])
        identifier=str(row['Identifier'])
    total = len(filelist)
    count = 0
    for f in filelist:
        f = f.replace('\n', '')
        count += 1
        makeRow(getAo(f),f)
        print('Row added! - ' + str(count) + "/" + str(total))

# enter aspace login info
config = configparser.ConfigParser()
config.read('local_settings.cfg')
baseurl= config.get('ArchivesSpace', 'baseURL')
user = input('ArchivesSpace username: '******'ArchivesSpace password:'******'\a')

示例#22
0
 def default_client(cls):
     '''return existing ASnakeClient or create, store, and return a new ASnakeClient'''
     if not cls.__default_client:
         from asnake.client import ASnakeClient
         cls.__default_client = ASnakeClient()
     return cls.__default_client
from asnake.client import ASnakeClient
import re
import logging
from secrets import *

id_field_regex = re.compile(r"(^id_+\d)")
logging.basicConfig(filename="unpublish.log", level=logging.INFO)
as_username = input("Enter your ArchivesSpace username: "******"Enter your ArchivesSpace password: "******"repositories").json()
    for repo in repos:
        print(repo["name"])
        repo_id = repo["uri"].split("/")[2]
        resources = client.get("repositories/{}/resources".format(repo_id), params={"all_ids": True}).json()
        for resource_id in resources:
            resource = client.get("repositories/{}/resources/{}".format(repo_id, resource_id)).json()
            combined_id = ""
            for field, value in resource.items():
                id_match = id_field_regex.match(field)
                if id_match:
                    combined_id += value + "-"
            combined_id = combined_id[:-1]
            if "[CLOSED]" in combined_id:
                logging.info("Unpublishing {} from {}".format(combined_id, repo["name"]))
                print(combined_id)
                all_uris = client.get("repositories/{}/resources/{}/ordered_records".format(repo_id,
import sys
import dacs
import time
import csv
import shutil
from git import Repo
from datetime import datetime
from subprocess import Popen, PIPE, STDOUT
import asnake.logging as logging
from asnake.client import ASnakeClient
#from asnake.aspace import ASpace

print (str(datetime.now()) + " Exporting Records from ArchivesSpace")

print ("\tConnecting to ArchivesSpace")
client = ASnakeClient()
client.authorize()
logging.setup_logging(stream=sys.stdout, level='INFO')

#repo = ASpace().repositories(2)

__location__ = os.path.dirname(os.path.realpath(__file__))

lastExportTime = time.time()
try:
    timePath = os.path.join(__location__, "lastExport.txt")
    with open(timePath, 'r') as timeFile:
        startTime = int(timeFile.read().replace('\n', ''))
        timeFile.close()
except:
    startTime = 0
def buildSelections(colID, refID=None, filter=None, date=False, verbose=False):

    client = ASnakeClient()
    client.authorize()
    
    collection = []
    page = 1

    outDir = "/media/SPE/uploads"
    
    if refID:
        url = "https://archives.albany.edu/catalog?f[record_parent_sim][]=" + refID + "&format=json&per_page=100"
        outFile = os.path.join(outDir, refID + ".json")
        descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") + "aspace_" + refID
        outDesc = os.path.join(outDir, "desc_" + refID + ".json")
    else:
        url = "https://archives.albany.edu/catalog?f[collection_number_sim][]=" + colID + "&format=json&per_page=100"
        outFile = os.path.join(outDir, colID.replace(".", "-") + ".json")
        descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-")
        outDesc = os.path.join(outDir, "desc_" + colID.replace(".", "-") + ".json")
    if filter:
        url = url + "&" + filter
    
    print (descriptionURL + "?format=json")
    r = requests.get(descriptionURL + "?format=json", verify=False)
    print (r.status_code)
    with open(outDesc, 'w', encoding='utf-8', newline='') as f:
        json.dump(r.json()["response"], f, ensure_ascii=True, indent=4)
        

    def getPage(page, collection, url):

        r = requests.get(url + "&page=" + str(page), verify=False)
        print (r.status_code)
        for item in r.json()["response"]["docs"]:

            obj = {}
            obj["title"] = item["title_tesim"][0]
            obj["date"] = item["date_created_tesim"][0]
            #print (item)
            ref_id = item["archivesspace_record_tesim"][0]
            obj["thumb"] = "https://archives.albany.edu" + item["thumbnail_path_ss"]
            obj["url"] = "https://archives.albany.edu/concern/" + item["has_model_ssim"][0].lower() + "s/" + item["id"]
            
            record = client.get("repositories/2/find_by_id/archival_objects?ref_id[]=" + ref_id).json()
            ao = client.get(record["archival_objects"][0]["ref"]).json()
            print (ao["ref_id"])
            dateNormal = ao["dates"][0]["begin"]
            if "end" in ao["dates"][0].keys():
                dateNormal = dateNormal + "/" + ao["dates"][0]["end"]
            if "undated" in ao["dates"][0]["expression"].lower():
                obj["date_normal"] = "9999"
            else:
                obj["date_normal"] = dateNormal
            
            if date:
                if not obj["date"].lower() == "undated":
                    if obj["date"].lower().startswith("ca."):
                        objDate = obj["date"].split(" ")[1]
                    else:
                        if "-" in obj["date"]:
                            objDate = obj["date"].split("-")[0]
                        else:
                            objDate = obj["date"].split(" ")[0]
                    print (objDate)
                    try:
                        if "-" in date:
                            if int(objDate) >= int(date.split("-")[0]) and int(objDate) <= int(date.split("-")[1]):
                                collection.append(obj)
                        else:
                            if int(objDate) < int(date):
                                collection.append(obj)
                    except:
                        print ("Date Error: " + objDate)
            else:
                collection.append(obj)
        if r.json()["response"]["pages"]["last_page?"] == False:
            getPage(page + 1, collection, url)

    getPage(page, collection, url)
        
        
    #print (collection)
    sortedTitle = sorted(collection, key = lambda i: i['title'].split(" ")[0])
    sortedCollection = sorted(sortedTitle, key = lambda i: i['date_normal'].split(" ")[0])
    print (len(sortedCollection))

    with open(outFile, 'w', encoding='utf-8', newline='') as f:
        json.dump(sortedCollection, f, ensure_ascii=True, indent=4)
示例#26
0
#!/usr/bin/env python

from asnake.client import ASnakeClient
from tqdm import tqdm
import pandas as pd
import datetime
import re

client = ASnakeClient(baseurl='XXX',
                      username='******',
                      password='******')
client.authorize()


def pattern_matcher(x):
    """Match a resource title that ends with a comma."""
    pattern_match = re.compile(r'^.*\>$|^\<.*$|^.*\<.*$|\>')
    result = pattern_match.match(x)
    return result


def data_framer(rec_ids, rec_index, rec_titles):
    """Create a DataFrame from generated lists."""
    rec_df = pd.DataFrame()
    rec_df['Resource_no'] = rec_index
    rec_df['Identifier'] = rec_ids
    rec_df['Collection_Title'] = rec_titles
    indexed_rec_df = rec_df.set_index(['Identifier'])
    return indexed_rec_df

示例#27
0
import json, csv, runtime
from asnake.client import ASnakeClient
# print instructions
print(
    'This script replaces existing fauxcodes with real barcodes (linked in a separate csv file) in ArchivesSpace.'
)
input('Press Enter to connect to ArchivesSpace and post those barcodes...')

# This is where we connect to ArchivesSpace.  See authenticate.py
client = ASnakeClient()
client.authorize()

# open csv and generate dict
reader = csv.DictReader(open('barcodes.csv'))

# GET each top_container listed in top_containers and add to records
print('The following barcodes have been updated in ArchivesSpace:')
for row in reader:
    uri = row['uri']
    container = client.get(uri).json()
    container['barcode'] = row['real']
    post = client.post(uri, json=container).json()
    print(post)
    # create formatter and add it to the handlers
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    # add the handlers to the logger
    logger.addHandler(fh)
    logger.addHandler(ch)

    config = configparser.ConfigParser()
    config.read('settings.ini')
    args.config = config

    try:
        client = ASnakeClient(
            baseurl=config['aspace_credentials']['api_host'],
            username=config['aspace_credentials']['username'],
            password=config['aspace_credentials']['password'])
    except KeyError as e:
        logger.error('settings.ini does not exist or is invalid')
        raise e

    # Simple sanity check to make sure client is setup
    try:
        resp = client.get('/')
        if not resp.ok:
            resp.raise_for_status()
    except:
        logger.error('Unable to contact ArchivesSpace instance at %s' %
                     config['aspace_credentials']['api_host'])
        raise APIContactError(
            'Unable to contact ArchivesSpace instance at %s' %
示例#29
0
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    catalog = {
        'linear': ['linear_feet', 'Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic': ['cubic_feet', 'Cubic Feet'],
        'gb': ['gigabytes', 'Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()

    data_list = []

    print('Compiling resource records from API...')

    for record in tqdm(res_records):
        res_record = client.get(
            'repositories/2/resources/{0}'.format(record)).json()
        try:
            extents = res_record['extents']
            for x in extents:
                if x['extent_type'] == 'megabytes':
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': str(float(x['number']) / 1000),
                        'units': 'gigabytes'
                    })
                else:
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': x['number'],
                        'units': x['extent_type']
                    })
        except:
            pass

    linear_ms = 0
    linear_ua = 0
    gb_ms = 0
    gb_ua = 0
    cubic_ms = 0
    cubic_ua = 0

    print('Analyzing extents in resource data...')

    for entry in data_list:
        try:
            if entry['id'].startswith(
                    'MS') and entry['units'] in catalog['linear']:
                linear_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['linear']:
                linear_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['gb']:
                gb_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['gb']:
                gb_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['cubic']:
                cubic_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['cubic']:
                cubic_ua += float(entry['amount'])
            else:
                pass
        except:
            exception = input(
                'Uh oh, looks like the analysis ran into a snag; most likely, '
                'a unit of extent for {0} ({1}) is not a pure number. Enter '
                '\'stop\' to kill the process so you can fix the record. Alternatively, '
                'you can enter \'continue\' to skip this entry and keep the analysis '
                'going.'.format(entry['id'], entry['amount']))
            if (exception.lower()).strip() == 'continue':
                pass
            elif (exception.lower()).strip() == 'stop':
                quit()

    report = {
        'MS Linear feet': round(linear_ms, 2),
        'UA Linear feet': round(linear_ua, 2),
        'Total linear feet': round((linear_ua + linear_ms), 2),
        'MS GB': round(gb_ms, 2),
        'UA GB': round(gb_ua, 2),
        'Total GB': round((gb_ms + gb_ua), 2),
        'MS Cubic feet': round(cubic_ms, 2),
        'UA Cubic feet': round(cubic_ua, 2),
        'Total Cubic feet': round((cubic_ua + cubic_ms), 2)
    }

    print('Generating report as JSON...')

    with open(('extent_calculator_' +
               (datetime.datetime.today().strftime('%Y-%m-%d')) + '.json'),
              'w') as f:
        json.dump(report, f)
示例#30
0
import csv, json

from asnake.client import ASnakeClient
client = ASnakeClient()
client.authorize()


def startCSV(CSV):
    '''Creates the CSV with field names and writes header'''
    fieldnames = [
        'lock_version', 'indicator', 'uri', 'collection_identifier',
        'series_identifier'
    ]
    with open(CSV, 'w', newline='') as outputCSV:
        writer = csv.DictWriter(outputCSV, fieldnames=fieldnames)
        writer.writeheader()


def addCSV(CSV, lock, ind, uri, coll_id, ser_id):
    '''Opens CSV, appends row'''
    fieldnames = [
        'lock_version', 'indicator', 'uri', 'collection_identifier',
        'series_identifier'
    ]
    with open(CSV, 'a', newline='') as outputCSV:
        writer = csv.DictWriter(outputCSV, fieldnames=fieldnames)
        writer.writerow({
            'lock_version': lock,
            'indicator': ind,
            'uri': uri,
            'collection_identifier': coll_id,
def main(ID, path=None, accession=None):

    if path == None:
        if not os.path.isdir(defaultPath):
            raise Exception("ERROR: default path " + defaultPath +
                            " does not exist.")
        path = os.path.join(defaultPath, ID)
        if not os.path.isdir(path):
            raise Exception("ERROR: no " + ID +
                            " directory exists for ingest in " + defaultPath)
    else:
        if not os.path.isdir(path):
            raise Exception("ERROR: " + str(path) + " is not a valid path.")
    print("Reading " + path)

    if accession == None:
        print("Building SIP...")
        SIP = SubmissionInformationPackage()
        SIP.create(ID)
        SIP.package(path)
        print("SIP " + SIP.bagID + " created.")

    else:
        print("Reading accession " + accession)
        import asnake.logging as logging
        from asnake.client import ASnakeClient
        client = ASnakeClient()
        client.authorize()

        logging.setup_logging(stream=sys.stdout, level='INFO')

        call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}"
        accessionResponse = client.get(call).json()
        if len(accessionResponse["results"]) < 1:
            raise Exception("ERROR: Could not find accession with ID: " +
                            accession)
        else:
            accessionObject = json.loads(
                accessionResponse["results"][0]["json"])
            if "id_1" in accessionObject.keys():
                accessionID = accessionObject["id_0"] + "-" + accessionObject[
                    "id_1"]
            if accession != accessionID:
                raise Exception(
                    "ERROR: Could not find exact accession with ID: " +
                    accession)
            if not "content_description" in accessionObject.keys():
                raise Exception("ERROR: no content description in " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            if len(accessionObject["related_resources"]) < 1:
                raise Exception("ERROR: no related resource for " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            else:
                resource = client.get(
                    accessionObject["related_resources"][0]["ref"]).json()
                creator = resource["title"]
                if not ID.lower() == resource["id_0"].lower():
                    raise Exception("ERROR: accession " + accessionID +
                                    " does not link to collection ID " + ID +
                                    ". Instead linked to " + resource["id_0"])
                description = accessionObject["content_description"]

                print("Building SIP...")
                SIP = SubmissionInformationPackage()
                SIP.create(ID)
                SIP.package(path)
                print("SIP " + SIP.bagID + " created.")

                SIP.bag.info["Accession-Identifier"] = accessionID
                SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"]
                SIP.bag.info["Records-Creator"] = creator
                SIP.bag.info["Content-Description"] = description
                if "condition_description" in accessionObject.keys():
                    SIP.bag.info["Condition-Description"] = accessionObject[
                        "condition_description"]
                if "provenance" in accessionObject.keys():
                    SIP.bag.info["Provenance"] = accessionObject["provenance"]
                if "general_note" in accessionObject.keys():
                    SIP.bag.info["General-Note"] = accessionObject[
                        "general_note"]
                SIP.bag.info["Source-Location"] = path
                SIP.bag.info[
                    "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py"

    print("Writing checksums...")
    SIP.bag.save(manifests=True)
    print("SIP Saved!")

    # List files in txt for processing
    print("(not) Listing files for processing...")
    #listFiles(ID)

    if accession == None:
        SIP.extentLog(
            "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx"
        )
        print("Logged ingest to DigitizationExtentTracker.")
    else:
        print("Updating accession " + accessionID)
        if "disposition" in accessionObject.keys():
            accessionObject["disposition"] = accessionObject[
                "disposition"] + "\n" + str(SIP.bagID)
        else:
            accessionObject["disposition"] = str(SIP.bagID)

        totalSize = SIP.size()
        inclusiveDates = SIP.dates()
        extent = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[0]),
            "extent_type": str(totalSize[1])
        }
        extentFiles = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[2]),
            "extent_type": "Digital Files"
        }
        if inclusiveDates[0] == inclusiveDates[1]:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "expression": inclusiveDates[0]
            }
        else:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "end": inclusiveDates[1]
            }
        if "extents" in accessionObject.keys():
            accessionObject["extents"].append(extent)
            accessionObject["extents"].append(extentFiles)
        else:
            accessionObject["extents"] = [extent, extentFiles]
        accessionObject["dates"].append(date)

        updateAccession = client.post(accessionObject["uri"],
                                      json=accessionObject)
        if updateAccession.status_code == 200:
            print("\tSuccessfully updated accession " + accessionID)
        else:
            print(updateAccession.text)
            print("\tERROR " + str(updateAccession.status_code) +
                  "! Failed to update accession: " + accessionID)

    return SIP