def __init__(self, **config): global log if 'config_file' in config: self.config = conf.ASnakeConfig(config['config_file']) else: self.config = conf.ASnakeConfig() self.config.update(config) # Only a subset of logging config can be supported in config # For more complex setups (configuring output format, say), # configure logs in Python code prior to loading # # Properties supported are: # filename, filemode, level, and default_config # Default config can be any of the default configurations exposed in logging if not log: if not logging.already_configured and 'logging_config' in self.config: if 'default_config' in self.config['logging_config']: default_logging_config = logging.configurations.get( self.config['logging_config']['default_config']) del self.config['logging_config']['default_config'] else: default_logging_config = None logging.setup_logging(config=default_logging_config, **self.config['logging_config']) log = logging.get_logger(__name__) if not hasattr(self, 'session'): self.session = Session() self.session.headers.update({ 'Accept': 'application/json', 'User-Agent': 'ArchivesSnake/0.1' }) log.debug("client created")
return { k: (json.dumps(v) if k in ('ao_ids', 'component_ids') else v) for k, v in row.items() } def chain_aos(for_aos): for row in for_aos: yield from row['ao_ids'] if __name__ == '__main__': args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('map_box_numbers') log.info('start') aspace = ASpace() log.info('aspace_connect') # note: fields match up to fields in MySQL query plus additional field for in_fields = [ 'container_id', 'barcode', 'component_ids', 'ao_ids', 'shared' ] out_fields = ( *in_fields[0:2], 'proposed_box_number', *in_fields[2:], )
#!/usr/bin/env python import json, glob, datetime, re, os import asnake.logging as logging # set up logging. mutter profanity. logname = 'logs/uploading_updated_resources_' + datetime.datetime.now( ).strftime('%Y-%m-%d-T-%H-%M') + '.log' logfile = open(logname, 'w') logging.setup_logging(stream=logfile) logger = logging.get_logger('upload_updated_resources') # add ASnake Client from asnake.client import ASnakeClient # validate ASnake client client = ASnakeClient() client.authorize() def upload_updated_resources(file_directory, file_prefix, repo_num): '''This moves to, then scans the entire directory which the user has supplied and globs JSON files. It gets the resource number from using the prefix which the person supplied.''' filename_strip = '.*' + file_prefix os.chdir(file_directory) resources = glob.glob('*.json') for file in resources: res_num = file.rstrip('.json') res_num = re.sub(filename_strip, '', res_num) resource = json.load(open(file)) response = client.post('repositories/' + repo_num + '/resources/' + res_num,
from tqdm import tqdm import datetime from copy import deepcopy from asnake.client import ASnakeClient import asnake.logging as logging today_date = datetime.datetime.today().strftime('%Y-%m-%d') logging.setup_logging(filename='extent_type_changer_' + str(today_date) + '.log') logger = logging.get_logger('extent_type_changes_log') def main(): client = ASnakeClient(baseurl='XXXX', username='******', password='******') client.authorize() changes = { 'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'], 'cubic_feet': ['Cubic Feet'], 'gigabytes': ['Gigabytes'] } res_records = (client.get('repositories/2/resources', params={'all_ids': True})).json() found_records = set([]) for record in tqdm(res_records): rec_uri = 'repositories/2/resources/{0}'.format(record) res_record = client.get(rec_uri).json() updated_record = deepcopy(res_record) try: extents = res_record['extents']
return instance def populate_skiplists(log_entries): for entry in log_entries: if entry['event'] in {'create_container', 'skip_container'}: temp_id2id[entry['temp_id']] = entry['id'] if entry['event'] in {'update_ao', 'skip_ao'}: ao_processed.add(entry.get('ao_id', entry.get( 'id', None))) # id was used in in early versions of script if __name__ == '__main__': args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('import_container_data') log.info('start_ingest') aspace = ASpace() # Global variables referenced from local functions temp_id2id = {} ao_processed = set() failures = set() if args.skip_via_log: with open(expanduser(args.skip_via_log)) as f: populate_skiplists(map(json.loads, f)) ao_sheet, container_sheet = args.excel # containers for c_row in dictify_sheet(container_sheet):
#!/usr/bin/env python import re from tqdm import tqdm import pandas as pd import datetime import asnake.logging as logging today_date = datetime.datetime.today().strftime('%Y-%m-%d') logging.setup_logging(filename='comma_end_logfile_funct_' + str(today_date) + '.log') logger = logging.get_logger('comma_end_changes_log') from asnake.client import ASnakeClient client = ASnakeClient(baseurl='xxx', username='******', password='******') client.authorize() def pattern_matcher(x): """Match a resource title that ends with a comma.""" pattern_match = re.compile(r'^.*\,$') result = pattern_match.match(x) return result def extract_resources(y): """Look for ArchivesSpace resources that match pattern_matcher, then save them in a list and generate a CSV report.""" if y == 'resources': obj_type = 'resource_records' all_records = client.get('repositories/2/resources', params={ 'all_ids': True
#!/usr/bin/env python import glob, json, datetime # Setting up the log import asnake.logging as logging logname = 'logs/new_subject_upload_' + datetime.datetime.now().strftime( '%Y-%m-%d-T-%H-%M') + '.log' logfile = open(logname, 'w') logging.setup_logging(stream=logfile) logger = logging.get_logger("upload-new-subjects") # Bring in the client to work at a very basic level. from asnake.client import ASnakeClient # Create and authorize the client client = ASnakeClient() client.authorize() # actually run the upload. Simply sets up the log, gathers the JSON, and then uploads each. This is a simple post because it's creating new ones and doesn't need any kind of number. def upload_json_as_new_subjects(file_dir, batch): logger.info("upload_start", batch_name=batch) subjects = glob.glob( file_dir + "/" + "*.json" ) # globs all the .json objects in the directory where the files are located. for file in subjects: subject = json.load(open(file)) response = client.post('subjects', json=subject).json()
help='path to print log to') ap.add_argument('--green_containers', help="Excel file with container barcodes of interest") def top_container_barcodes(excel_filename): xl = load_workbook(excel_filename) rows = iter(xl.worksheets[0]) next(rows) # skip header return ",".join(f"'{row[0].value}'" for row in rows) if __name__ == '__main__': args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('green_barcodes_cid2bc_and_components') log.info('start') conn = pymysql.connect(host=args.host, user=args.user, database=args.database, cursorclass=pymysql.cursors.DictCursor, password=getpass( "Please enter MySQL password for {}: ".format( args.user))) log.info('mysql_connect') with open('green_cid2bc_and_components.csv', 'w') as gc2bac_report, conn: db = conn.cursor()
log.info('ao_update_failed', ao=res.json(), status_code=res.status_code) failures[barcode].append(ao_info) else: log.info('create_tc_failed', tc=res.json(), status_code=res.status_code) for ao_info in ao_infos: failures[barcode].append(ao_info) if __name__ == "__main__": args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('barcodes_report') log.info('start') aspace = ASpace() log.info('aspace_connect') bc_csv_fields = [ 'original_barcode', 'original_container_id', 'location_id', 'new_barcode', 'new_container_id', 'box_number', 'component_id', 'ao_id' ] loc_csv_fields = ['location_barcode', 'location_id'] # Barcodes expected to be in first column of single-worksheet excel
#!/usr/bin/env python import glob, json, datetime # Setting up the log import asnake.logging as logging logname = 'logs/new_subject_upload_' + datetime.datetime.now().strftime( '%Y-%m-%d-T-%H-%M') + '.log' logfile = open(logname, 'w') logging.setup_logging(stream=logfile) logger = logging.get_logger('upload-new-subjects') # Bring in the client to work at a very basic level. from asnake.client import ASnakeClient # Create and authorize the client client = ASnakeClient() client.authorize() def upload_json_as_new_subjects(file_dir, batch): '''Actually run the upload. Simply sets up the log, gathers the JSON, and then uploads each. This is a simple post because it's creating new ones and doesn't need any kind of number.''' logger.info("upload_start", batch_name=batch) subjects = glob.glob( file_dir + '/' + '*.json' ) # globs all the .json objects in the directory where the files are located. for file in subjects: subject = json.load(open(file)) response = client.post('subjects', json=subject).json() response['title'] = subject['title']
help="Spreadsheet of location attrs") ap.add_argument('--host', default='localhost', help="host of ASpace database") ap.add_argument('--user', default='pobocks', help='MySQL user to run as when connecting to ASpace database') ap.add_argument('--database', default='tuftschivesspace', help="Name of MySQL database") ap.add_argument('--logfile', default='create_locations.log', help='path to print log to') if __name__ == "__main__": args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('create_locations') log.info('start') aspace = ASpace() log.info('aspace_connect') log.info('process_spreadsheet') rows = args.spreadsheet.worksheets[0].values headers = dict(enumerate(first(rows))) JSONS = [] conn = pymysql.connect(host=args.host, user=args.user, database=args.database, cursorclass=pymysql.cursors.DictCursor,
#!/usr/bin/env python import json, csv, datetime import asnake.logging as logging # set up logging. mutter profanity. logname = 'logs/deleting_subjects_' + datetime.datetime.now().strftime( '%Y-%m-%d-T-%H-%M') + '.log' logfile = open(logname, 'w') logging.setup_logging(stream=logfile) logger = logging.get_logger('delete_subjects') # add ASnake Client from asnake.client import ASnakeClient # validate ASnake client client = ASnakeClient() client.authorize() # expects a CSV file with column subject_id of subjects to be deleted def delete_subjects(data): '''This opens the CSV file, reads the subject_id column, deletes subject, and logs response. Records target ID in case it's not found or other error.''' with open(data, newline='') as data: reader = csv.DictReader(data) for row in reader: sub_id = str(row['subject_id']) #typing just in case response = client.delete('subjects/' + sub_id).json() logger.info('delete', target=sub_id,
# Sample data: # location_uri,id # /locations/6423,6631 # /locations/4025,24592|23842|23232 # Admin rights seem to be needed to run these updates. # See README.md for information on preparing the data. from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() logname = 'logs/update_top_containers_' + datetime.datetime.now().strftime( '%Y-%m-%d-T-%H-%M') + '.log' logfile = open(logname, 'w') logging.setup_logging(stream=logfile) logger = logging.get_logger('batch-update-top-containers') def post_batch_updates(csvName, batch, repo_num): '''Starts logger batch, opens CSV and reads lines. Creates an integer-based list of IDs from column 'id'. Posts updates to ASpace, collects response and writes it out to log along with info about which resources were updated (since this is not part of ASpace response). Closes logfile.''' logger.info('updates', batch_name=batch) with open(csvName, newline='') as data: reader = csv.DictReader(data) for row in reader: id_group = [ ] # ASpace is really particular that it get a list of integers and no dang strings. for id in row['id'].split("|"): id_group.append(int(id)) location = row['location_uri'] response = client.post('repositories/' + repo_num + '/top_containers/batch/location',
) ap.add_argument('--host', default='localhost', help="host of ASpace database") ap.add_argument('--user', default='pobocks', help='MySQL user to run as when connecting to ASpace database') ap.add_argument('--database', default='tuftschivesspace', help="Name of MySQL database") ap.add_argument('--logfile', default='dupe_report.log', help='path to print log to') if __name__ == '__main__': args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('report_duplicates') log.info('start') aspace = ASpace() log.info('aspace_connect') log.info('end') conn = pymysql.connect(host=args.host, user=args.user, database=args.database, cursorclass=pymysql.cursors.DictCursor, password=getpass( "Please enter MySQL password for {}: ".format( args.user)))
#/usr/bin/python3 #~/anaconda3/bin/python from asnake.client import ASnakeClient import asnake.logging as logging logging.setup_logging(filename="date_update.log", filemode="a") logger = logging.get_logger("date_updating") #Log Into ASpace and set repo to RL aspace_client = ASnakeClient(baseurl="[backendURL]", username="******", password="******") aspace_client.authorize() repo = aspace_client.get("repositories/2").json() print("Logged into: " + repo['name']) print("Getting list of resources...") resources_list = aspace_client.get( "repositories/2/resources?all_ids=true").json() resources_sorted = sorted(resources_list, reverse=True) for resource in resources_sorted: try: resource_json = aspace_client.get("repositories/2/resources/" + str(resource)).json() #print (resource_json) resource_uri = resource_json['uri'] print("updating: " + resource_uri) resource_update = aspace_client.post(resource_json['uri'], json=resource_json)
from asnake.aspace import ASpace import asnake.logging as logging import argparse import datetime import csv logger = logging.get_logger('upload_accessions') RELATOR_DICT = { 'artist': 'art', 'author': 'aut', 'donor': 'dnr', 'editor': 'edt', 'publisher': 'pbl', 'translator': 'trl' } DATE = datetime.date.today() DATE = DATE.__str__() def make_ex_doc(accession): eds = [] if len(accession['external_documents1_title']) >= 1: ed_dict = {'jsonmodel_type': 'external_document', 'location': '', 'publish': bool, 'title': ''} ed_dict['title'] = accession['external_documents1_title']
def dictify_sheet(sheet): rows = iter(sheet) rowmap = [cell.value.strip() for cell in next(rows) if cell.value] for row in rows: out = {} for idx, header in enumerate(rowmap): out[header] = cell_value(row[idx], header) yield out if __name__ == '__main__': args = ap.parse_args() setup_logging(filename=args.logfile) log = get_logger('update_containers') aspace = ASpace() log.info('start_ingest') for row in dictify_sheet(next(iter(args.excel))): try: container = aspace.repositories(args.repo_id).top_containers( row['Container Record ID']).json() container['barcode'] = row['Barcode'] except (AttributeError, RuntimeError) as e: log.error('FAILED update_container', response=container, data=row, exc_info=e)
#!/usr/bin/env python import re, json, csv, requests, glob, datetime, os import asnake.logging as logging # expects a 2-column CSV in which the first column has the resource ID and the second has the subject ID. Multiple subject IDs should be pipe-separated, e.g. "24|133|1313|234" or just 24. These subjects should only be _new_ subjects you're adding. logname = 'logs/resource_processing_' + datetime.datetime.now().strftime( '%Y-%m-%d-T-%H-%M') + '.log' logfile = open(logname, 'w') logging.setup_logging(stream=logfile) logger = logging.get_logger("process-CSV-to-resources") # add ASnake Client from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() # WRITE OUT THE ORIGINAL TO ANOTHER DIRECTORY AS A BACKUP. This is a place where one could take a param. Remember to either commit your backups each time or move them entirely because otherwise you'll end up with them being undone when you run it! def quick_backup(resource_id, resource): original = "backups/original-" + resource_id + ".json" with open(original, "w") as backup: json.dump(resource, backup, indent=4) # This function is what we do if the subject array is empty. Simply builds the array and fills subjects. def no_original_subjects(resource, new_subjects): subjects = [] for subject in new_subjects:
from requests import Session from urllib.parse import urljoin, quote from numbers import Number from collections.abc import Sequence, Mapping import json import asnake.configurator as conf import asnake.logging as logging log = logging.get_logger(__name__) class ASnakeAuthError(Exception): pass class ASnakeWeirdReturnError(Exception): pass def listlike_seq(seq): '''Determine if a thing is a list-like (sequence of values) sequence that's not string-like.''' return isinstance(seq, Sequence) and not isinstance(seq, ( str, bytes, Mapping, )) def http_meth_factory(meth):
#!/usr/bin/env python3 import csv import configparser import json import requests import time from asnake.client import ASnakeClient import asnake.logging as logging from asnake.aspace import ASpace from configparser import ConfigParser, ExtendedInterpolation logging.setup_logging(filename='logging.txt', level='INFO', filemode='a') logger = logging.get_logger() config = configparser.ConfigParser() config.read('local_settings.cfg') aspace = ASpace(baseurl=config['ArchivesSpace']['baseURL'], username=config['ArchivesSpace']['user'], password=config['ArchivesSpace']['password']) repo = aspace.repositories(config['ArchivesSpace']['repository']) def get_collection(): """Returns a collection corresponding to an ID provided by user input""" try: identifier = input('Resource ID: ') return repo.resources(int(identifier)) except Exception as e: