def __call__(self, fcrepo, args): if args.notransactions: try: collection = pcdm.Collection() collection.title = args.name collection.create_object(fcrepo) collection.update_object(fcrepo) except RESTAPIException as e: logger.error(f'Error in collection creation: {e}') raise FailureException() else: with Transaction(fcrepo) as txn: try: collection = pcdm.Collection() collection.title = args.name collection.create_object(fcrepo) collection.update_object(fcrepo) txn.commit() except RESTAPIException as e: logger.error(f'Error in collection creation: {e}') raise FailureException() if args.batch is not None: with open(args.batch, 'r') as batchconfig: batch = yaml.safe_load(batchconfig) batch['COLLECTION'] = str(collection.uri) with open(args.batch, 'w') as batchconfig: yaml.dump(batch, batchconfig, default_flow_style=False)
def __call__(self, repo: Repository, args: Namespace) -> None: csv_file = csv.DictReader(args.source_file) if csv_file.fieldnames is None: logger.error(f'No fields found in {csv_file}. Exiting.') sys.exit(1) if args.output_file is not None: output_file = open(args.output_file, 'w') else: output_file = sys.stdout csv_writer = csv.DictWriter(output_file, fieldnames=csv_file.fieldnames) write_csv_header(csv_file, args, csv_writer) for n, row in enumerate(csv_file, start=1): identifier = row[args.identifier_column] source = get_source(row[args.binary_column]) if not source: logger.warning(f'No source found for {identifier}; skipping') csv_writer.writerow(row) continue item = Item(identifier=identifier, title=f'Stub for {identifier}') file = File() file.source = source item.add_file(file) if args.member_of is not None: item.member_of = URIRef(args.member_of) if args.access is not None: item.rdf_type.append(args.access) file.rdf_type.append(args.access) try: with Transaction(repo) as txn: try: item.create(repo, container_path=args.container_path) item.update(repo) # update the CSV with the new URI row[args.binary_column] = file.uri csv_writer.writerow(row) txn.commit() except (RESTAPIException, FileNotFoundError) as e: # if anything fails during item creation or committing the transaction # attempt to rollback the current transaction # failures here will be caught by the main loop's exception handler # and should trigger a system exit logger.error(f'{item.identifier} not created: {e}') txn.rollback() except KeyboardInterrupt: logger.warning("Load interrupted") txn.rollback() raise except RESTAPIException as e: raise FailureException(f'Transaction rollback failed: {e}') from e if output_file is not sys.stdout: output_file.close()
def process(self, method, use_transaction=True, traverse=None): self.use_transaction = use_transaction if traverse is not None: predicate_list = ', '.join(p.n3() for p in traverse) logger.info( f"{method.__name__} will traverse the following predicates: {predicate_list}" ) if use_transaction: # set up a temporary ItemLog that will be copied to the real item log upon completion of the transaction self.completed_buffer = ItemLog(NamedTemporaryFile().name, ['uri', 'title', 'timestamp'], 'uri', header=False) with Transaction(self.repository, keep_alive=90) as transaction: for resource, graph in self.get_resources(traverse=traverse): try: method(resource, graph) except RESTAPIException as e: logger.error( f'{method.__name__} failed for {resource}: {e}: {e.response.text}' ) # if anything fails while processing of the list of uris, attempt to # rollback the transaction. Failures here will be caught by the main # loop's exception handler and should trigger a system exit try: transaction.rollback() logger.warning('Transaction rolled back.') return False except RESTAPIException: logger.error( 'Unable to roll back transaction, aborting') raise FailureException() transaction.commit() if self.completed and self.completed.filename: shutil.copyfile(self.completed_buffer.filename, self.completed.filename) return True else: for resource, graph in self.get_resources(traverse=traverse): try: method(resource, graph) except RESTAPIException as e: logger.error( f'{method.__name__} failed for {resource}: {e}: {e.response.text}' ) logger.warning( f'Continuing {method.__name__} with next item') return True
def extract(fcrepo, uri): with Transaction(fcrepo) as txn: try: logger.info("Getting {0} from repository".format(uri)) page = Page.from_repository(fcrepo, uri) logger.info("Creating annotations for page {0}".format(page.title)) for annotation in page.textblocks(): annotation.create(fcrepo) annotation.update(fcrepo) txn.commit() return True except (RESTAPIException, DataReadException) as e: # if anything fails during item creation or committing the transaction # attempt to rollback the current transaction # failures here will be caught by the main loop's exception handler # and should trigger a system exit logger.error("OCR extraction failed: {0}".format(e)) txn.rollback() logger.warning('Transaction rolled back. Continuing load.')
def load_item(fcrepo, batch_item, args, extra=None): # read data for item logger.info('Reading item data') item = batch_item.read_data() if args.use_transactions: # open transaction with Transaction(fcrepo, keep_alive=90) as txn: # create item and its components try: load_item_internal(fcrepo, item, args, extra) # commit transaction txn.commit() logger.info('Performing post-creation actions') item.post_creation_hook() return True except (RESTAPIException, FileNotFoundError) as e: # if anything fails during item creation or committing the transaction # attempt to rollback the current transaction # failures here will be caught by the main loop's exception handler # and should trigger a system exit logger.error("Item creation failed: {0}".format(e)) txn.rollback() logger.warning('Transaction rolled back. Continuing load.') except KeyboardInterrupt as e: logger.error("Load interrupted") raise e else: try: load_item_internal(fcrepo, item, args, extra) return True except (RESTAPIException, FileNotFoundError) as e: logger.error("Item creation failed: {0}".format(e)) logger.warning('Continuing load.') except KeyboardInterrupt as e: logger.error("Load interrupted") raise e
def __call__(self, fcrepo, args): logger.warning( 'The "mkcol" command is deprecated and will be removed in a future release.' ) logger.warning( f'Use: plastron create --container "{fcrepo.relpath}" --collection "{args.name}"' ) if args.notransactions: try: collection = pcdm.Collection() collection.title = args.name collection.create(fcrepo, recursive=False) collection.update(fcrepo, recursive=False) except RESTAPIException as e: logger.error(f'Error in collection creation: {e}') raise FailureException() else: with Transaction(fcrepo) as txn: try: collection = pcdm.Collection() collection.title = args.name collection.create(fcrepo, recursive=False) collection.update(fcrepo, recursive=False) txn.commit() except RESTAPIException as e: logger.error(f'Error in collection creation: {e}') raise FailureException() if args.batch is not None: with open(args.batch, 'r') as batchconfig: batch = yaml.safe_load(batchconfig) batch['COLLECTION'] = str(collection.uri) with open(args.batch, 'w') as batchconfig: yaml.dump(batch, batchconfig, default_flow_style=False)
def update_repo(self, args, job, repo, metadata, row, repo_changeset, created_uris, updated_uris): """ Updates the repository with the given RepoChangeSet :param args: the arguments from the command-line :param job: The ImportJob :param repo: the repository configuration :param metadata: A plastron.jobs.MetadataRows object representing the CSV file being imported :param row: A single plastron.jobs.Row object representing the row being imported :param repo_changeset: The RepoChangeSet object describing the changes to make to the repository. :param created_uris: Accumulator storing a list of created URIS. This variable is MODIFIED by this method. :param updated_uris: Accumulator storing a list of updated URIS. This variable is MODIFIED by this method. """ item = repo_changeset.item if not item.created: # if an item is new, don't construct a SPARQL Update query # instead, just create and update normally # create new item in the repo logger.debug('Creating a new item') # add the access class if job.access is not None: item.rdf_type.append(URIRef(job.access)) # add the collection membership if job.member_of is not None: item.member_of = URIRef(job.member_of) if row.has_files: create_pages = bool(strtobool(row.get('CREATE_PAGES', 'True'))) logger.debug('Adding pages and files to new item') self.add_files( item, build_file_groups(row['FILES']), base_location=job.binaries_location, access=job.access, create_pages=create_pages ) if args.extract_text_types is not None: annotate_from_files(item, args.extract_text_types.split(',')) logger.debug(f"Creating resources in container: {job.container}") try: with Transaction(repo) as txn: item.create(repo, container_path=job.container) item.update(repo) txn.commit() except Exception as e: raise FailureException(f'Creating item failed: {e}') from e job.complete(item, row.line_reference, ImportedItemStatus.CREATED) metadata.created += 1 created_uris.append(item.uri) elif repo_changeset: # construct the SPARQL Update query if there are any deletions or insertions # then do a PATCH update of an existing item logger.info(f'Sending update for {item}') sparql_update = repo_changeset.build_sparql_update(repo) logger.debug(sparql_update) try: item.patch(repo, sparql_update) except RESTAPIException as e: raise FailureException(f'Updating item failed: {e}') from e job.complete(item, row.line_reference, ImportedItemStatus.MODIFIED) metadata.updated += 1 updated_uris.append(item.uri) else: job.complete(item, row.line_reference, ImportedItemStatus.UNCHANGED) metadata.unchanged += 1 logger.info(f'No changes found for "{item}" ({row.uri}); skipping') metadata.skipped += 1