Exemplo n.º 1
0
    def __init__(self, **config):
        global log

        if 'config_file' in config:
            self.config = conf.ASnakeConfig(config['config_file'])
        else:
            self.config = conf.ASnakeConfig()

        self.config.update(config)

        # Only a subset of logging config can be supported in config
        # For more complex setups (configuring output format, say),
        # configure logs in Python code prior to loading
        #
        # Properties supported are:
        #    filename, filemode, level, and default_config
        # Default config can be any of the default configurations exposed in logging
        if not log:
            if not logging.already_configured and 'logging_config' in self.config:
                if 'default_config' in self.config['logging_config']:
                    default_logging_config = logging.configurations.get(
                        self.config['logging_config']['default_config'])
                    del self.config['logging_config']['default_config']
                else:
                    default_logging_config = None

                logging.setup_logging(config=default_logging_config,
                                      **self.config['logging_config'])

            log = logging.get_logger(__name__)

        if not hasattr(self, 'session'): self.session = Session()
        self.session.headers.update({
            'Accept': 'application/json',
            'User-Agent': 'ArchivesSnake/0.1'
        })
        log.debug("client created")
#     archival_object ao
#     left join
# 		resource on ao.root_record_id = resource.id
# WHERE
#     ao.title LIKE '%Container List%'

import csv
import io
import time
from datetime import datetime
from asnake.client import ASnakeClient
from asnake.aspace import ASpace

import asnake.logging as logging
logging.setup_logging(level='DEBUG',
                      filename="remove_fake_wrapper.log",
                      filemode="a")

aspace = ASpace(baseurl="[ASPACE API URL]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set target repo
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])
#!/usr/bin/env python

import re
from tqdm import tqdm
import pandas as pd
import datetime
import asnake.logging as logging
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
logging.setup_logging(filename='comma_end_logfile_funct_' + str(today_date) +
                      '.log')
logger = logging.get_logger('comma_end_changes_log')
from asnake.client import ASnakeClient

client = ASnakeClient(baseurl='xxx', username='******', password='******')
client.authorize()


def pattern_matcher(x):
    """Match a resource title that ends with a comma."""
    pattern_match = re.compile(r'^.*\,$')
    result = pattern_match.match(x)
    return result


def extract_resources(y):
    """Look for ArchivesSpace resources that match pattern_matcher, then save them in a list and generate a CSV report."""
    if y == 'resources':
        obj_type = 'resource_records'
        all_records = client.get('repositories/2/resources',
                                 params={
                                     'all_ids': True
Exemplo n.º 4
0

def dictify_sheet(sheet):
    rows = iter(sheet)
    rowmap = [cell.value.strip() for cell in next(rows) if cell.value]

    for row in rows:
        out = {}
        for idx, header in enumerate(rowmap):
            out[header] = cell_value(row[idx], header)
        yield out


if __name__ == '__main__':
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('update_containers')

    aspace = ASpace()

    log.info('start_ingest')

    for row in dictify_sheet(next(iter(args.excel))):
        try:
            container = aspace.repositories(args.repo_id).top_containers(
                row['Container Record ID']).json()
            container['barcode'] = row['Barcode']
        except (AttributeError, RuntimeError) as e:
            log.error('FAILED update_container',
                      response=container,
                      data=row,
#/usr/bin/python3
#~/anaconda3/bin/python
from asnake.client import ASnakeClient
import asnake.logging as logging

logging.setup_logging(filename="date_update.log", filemode="a")
logger = logging.get_logger("date_updating")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[backendURL]",
                             username="******",
                             password="******")
aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

print("Getting list of resources...")
resources_list = aspace_client.get(
    "repositories/2/resources?all_ids=true").json()
resources_sorted = sorted(resources_list, reverse=True)

for resource in resources_sorted:

    try:
        resource_json = aspace_client.get("repositories/2/resources/" +
                                          str(resource)).json()
        #print (resource_json)
        resource_uri = resource_json['uri']
        print("updating: " + resource_uri)
        resource_update = aspace_client.post(resource_json['uri'],
                                             json=resource_json)
Exemplo n.º 6
0
#!/usr/bin/env python
import json, glob, datetime, re, os
import asnake.logging as logging

# set up logging. mutter profanity.

logname = 'logs/uploading_updated_resources_' + datetime.datetime.now(
).strftime('%Y-%m-%d-T-%H-%M') + '.log'

logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger('upload_updated_resources')

# add ASnake Client
from asnake.client import ASnakeClient
# validate ASnake client
client = ASnakeClient()
client.authorize()


def upload_updated_resources(file_directory, file_prefix, repo_num):
    '''This moves to, then scans the entire directory which the user has supplied and globs JSON files. It gets the resource number from using the prefix which the person supplied.'''
    filename_strip = '.*' + file_prefix
    os.chdir(file_directory)
    resources = glob.glob('*.json')
    for file in resources:
        res_num = file.rstrip('.json')
        res_num = re.sub(filename_strip, '', res_num)
        resource = json.load(open(file))
        response = client.post('repositories/' + repo_num + '/resources/' +
                               res_num,
def main(ID, path=None, accession=None):

    if path == None:
        if not os.path.isdir(defaultPath):
            raise Exception("ERROR: default path " + defaultPath +
                            " does not exist.")
        path = os.path.join(defaultPath, ID)
        if not os.path.isdir(path):
            raise Exception("ERROR: no " + ID +
                            " directory exists for ingest in " + defaultPath)
    else:
        if not os.path.isdir(path):
            raise Exception("ERROR: " + str(path) + " is not a valid path.")
    print("Reading " + path)

    if accession == None:
        print("Building SIP...")
        SIP = SubmissionInformationPackage()
        SIP.create(ID)
        SIP.package(path)
        print("SIP " + SIP.bagID + " created.")

    else:
        print("Reading accession " + accession)
        import asnake.logging as logging
        from asnake.client import ASnakeClient
        client = ASnakeClient()
        client.authorize()

        logging.setup_logging(stream=sys.stdout, level='INFO')

        call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}"
        accessionResponse = client.get(call).json()
        if len(accessionResponse["results"]) < 1:
            raise Exception("ERROR: Could not find accession with ID: " +
                            accession)
        else:
            accessionObject = json.loads(
                accessionResponse["results"][0]["json"])
            if "id_1" in accessionObject.keys():
                accessionID = accessionObject["id_0"] + "-" + accessionObject[
                    "id_1"]
            if accession != accessionID:
                raise Exception(
                    "ERROR: Could not find exact accession with ID: " +
                    accession)
            if not "content_description" in accessionObject.keys():
                raise Exception("ERROR: no content description in " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            if len(accessionObject["related_resources"]) < 1:
                raise Exception("ERROR: no related resource for " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            else:
                resource = client.get(
                    accessionObject["related_resources"][0]["ref"]).json()
                creator = resource["title"]
                if not ID.lower() == resource["id_0"].lower():
                    raise Exception("ERROR: accession " + accessionID +
                                    " does not link to collection ID " + ID +
                                    ". Instead linked to " + resource["id_0"])
                description = accessionObject["content_description"]

                print("Building SIP...")
                SIP = SubmissionInformationPackage()
                SIP.create(ID)
                SIP.package(path)
                print("SIP " + SIP.bagID + " created.")

                SIP.bag.info["Accession-Identifier"] = accessionID
                SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"]
                SIP.bag.info["Records-Creator"] = creator
                SIP.bag.info["Content-Description"] = description
                if "condition_description" in accessionObject.keys():
                    SIP.bag.info["Condition-Description"] = accessionObject[
                        "condition_description"]
                if "provenance" in accessionObject.keys():
                    SIP.bag.info["Provenance"] = accessionObject["provenance"]
                if "general_note" in accessionObject.keys():
                    SIP.bag.info["General-Note"] = accessionObject[
                        "general_note"]
                SIP.bag.info["Source-Location"] = path
                SIP.bag.info[
                    "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py"

    print("Writing checksums...")
    SIP.bag.save(manifests=True)
    print("SIP Saved!")

    # List files in txt for processing
    print("(not) Listing files for processing...")
    #listFiles(ID)

    if accession == None:
        SIP.extentLog(
            "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx"
        )
        print("Logged ingest to DigitizationExtentTracker.")
    else:
        print("Updating accession " + accessionID)
        if "disposition" in accessionObject.keys():
            accessionObject["disposition"] = accessionObject[
                "disposition"] + "\n" + str(SIP.bagID)
        else:
            accessionObject["disposition"] = str(SIP.bagID)

        totalSize = SIP.size()
        inclusiveDates = SIP.dates()
        extent = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[0]),
            "extent_type": str(totalSize[1])
        }
        extentFiles = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[2]),
            "extent_type": "Digital Files"
        }
        if inclusiveDates[0] == inclusiveDates[1]:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "expression": inclusiveDates[0]
            }
        else:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "end": inclusiveDates[1]
            }
        if "extents" in accessionObject.keys():
            accessionObject["extents"].append(extent)
            accessionObject["extents"].append(extentFiles)
        else:
            accessionObject["extents"] = [extent, extentFiles]
        accessionObject["dates"].append(date)

        updateAccession = client.post(accessionObject["uri"],
                                      json=accessionObject)
        if updateAccession.status_code == 200:
            print("\tSuccessfully updated accession " + accessionID)
        else:
            print(updateAccession.text)
            print("\tERROR " + str(updateAccession.status_code) +
                  "! Failed to update accession: " + accessionID)

    return SIP
import time
import csv
import shutil
from git import Repo
from datetime import datetime
from subprocess import Popen, PIPE, STDOUT
import asnake.logging as logging
from asnake.client import ASnakeClient
#from asnake.aspace import ASpace

print (str(datetime.now()) + " Exporting Records from ArchivesSpace")

print ("\tConnecting to ArchivesSpace")
client = ASnakeClient()
client.authorize()
logging.setup_logging(stream=sys.stdout, level='INFO')

#repo = ASpace().repositories(2)

__location__ = os.path.dirname(os.path.realpath(__file__))

lastExportTime = time.time()
try:
    timePath = os.path.join(__location__, "lastExport.txt")
    with open(timePath, 'r') as timeFile:
        startTime = int(timeFile.read().replace('\n', ''))
        timeFile.close()
except:
    startTime = 0
humanTime = datetime.utcfromtimestamp(startTime).strftime('%Y-%m-%d %H:%M:%S')
print ("\tChecking for collections updated since " + humanTime)
from tqdm import tqdm
import datetime
from copy import deepcopy
from asnake.client import ASnakeClient
import asnake.logging as logging
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
logging.setup_logging(filename='extent_type_changer_' + str(today_date) +
                      '.log')
logger = logging.get_logger('extent_type_changes_log')


def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
Exemplo n.º 10
0
#!/usr/bin/env python3

import csv
import configparser
import json
import requests
import time

from asnake.client import ASnakeClient
import asnake.logging as logging
from asnake.aspace import ASpace
from configparser import ConfigParser, ExtendedInterpolation

logging.setup_logging(filename='logging.txt', level='INFO', filemode='a')
logger = logging.get_logger()

config = configparser.ConfigParser()
config.read('local_settings.cfg')

aspace = ASpace(baseurl=config['ArchivesSpace']['baseURL'],
                username=config['ArchivesSpace']['user'],
                password=config['ArchivesSpace']['password'])
repo = aspace.repositories(config['ArchivesSpace']['repository'])


def get_collection():
    """Returns a collection corresponding to an ID provided by user input"""
    try:
        identifier = input('Resource ID: ')
        return repo.resources(int(identifier))
    except Exception as e: