示例#1
0
    def __call__(self, fcrepo, args):
        if args.notransactions:
            try:
                collection = pcdm.Collection()
                collection.title = args.name
                collection.create_object(fcrepo)
                collection.update_object(fcrepo)

            except RESTAPIException as e:
                logger.error(f'Error in collection creation: {e}')
                raise FailureException()
        else:
            with Transaction(fcrepo) as txn:
                try:
                    collection = pcdm.Collection()
                    collection.title = args.name
                    collection.create_object(fcrepo)
                    collection.update_object(fcrepo)
                    txn.commit()

                except RESTAPIException as e:
                    logger.error(f'Error in collection creation: {e}')
                    raise FailureException()

        if args.batch is not None:
            with open(args.batch, 'r') as batchconfig:
                batch = yaml.safe_load(batchconfig)
                batch['COLLECTION'] = str(collection.uri)
            with open(args.batch, 'w') as batchconfig:
                yaml.dump(batch, batchconfig, default_flow_style=False)
示例#2
0
文件: stub.py 项目: umd-lib/plastron
    def __call__(self, repo: Repository, args: Namespace) -> None:
        csv_file = csv.DictReader(args.source_file)
        if csv_file.fieldnames is None:
            logger.error(f'No fields found in {csv_file}. Exiting.')
            sys.exit(1)

        if args.output_file is not None:
            output_file = open(args.output_file, 'w')
        else:
            output_file = sys.stdout
        csv_writer = csv.DictWriter(output_file, fieldnames=csv_file.fieldnames)

        write_csv_header(csv_file, args, csv_writer)

        for n, row in enumerate(csv_file, start=1):
            identifier = row[args.identifier_column]
            source = get_source(row[args.binary_column])
            if not source:
                logger.warning(f'No source found for {identifier}; skipping')
                csv_writer.writerow(row)
                continue

            item = Item(identifier=identifier, title=f'Stub for {identifier}')
            file = File()
            file.source = source
            item.add_file(file)
            if args.member_of is not None:
                item.member_of = URIRef(args.member_of)
            if args.access is not None:
                item.rdf_type.append(args.access)
                file.rdf_type.append(args.access)
            try:
                with Transaction(repo) as txn:
                    try:
                        item.create(repo, container_path=args.container_path)
                        item.update(repo)
                        # update the CSV with the new URI
                        row[args.binary_column] = file.uri
                        csv_writer.writerow(row)
                        txn.commit()
                    except (RESTAPIException, FileNotFoundError) as e:
                        # if anything fails during item creation or committing the transaction
                        # attempt to rollback the current transaction
                        # failures here will be caught by the main loop's exception handler
                        # and should trigger a system exit
                        logger.error(f'{item.identifier} not created: {e}')
                        txn.rollback()
                    except KeyboardInterrupt:
                        logger.warning("Load interrupted")
                        txn.rollback()
                        raise

            except RESTAPIException as e:
                raise FailureException(f'Transaction rollback failed: {e}') from e

        if output_file is not sys.stdout:
            output_file.close()
示例#3
0
文件: util.py 项目: umd-lib/plastron
    def process(self, method, use_transaction=True, traverse=None):
        self.use_transaction = use_transaction
        if traverse is not None:
            predicate_list = ', '.join(p.n3() for p in traverse)
            logger.info(
                f"{method.__name__} will traverse the following predicates: {predicate_list}"
            )

        if use_transaction:
            # set up a temporary ItemLog that will be copied to the real item log upon completion of the transaction
            self.completed_buffer = ItemLog(NamedTemporaryFile().name,
                                            ['uri', 'title', 'timestamp'],
                                            'uri',
                                            header=False)
            with Transaction(self.repository, keep_alive=90) as transaction:
                for resource, graph in self.get_resources(traverse=traverse):
                    try:
                        method(resource, graph)
                    except RESTAPIException as e:
                        logger.error(
                            f'{method.__name__} failed for {resource}: {e}: {e.response.text}'
                        )
                        # if anything fails while processing of the list of uris, attempt to
                        # rollback the transaction. Failures here will be caught by the main
                        # loop's exception handler and should trigger a system exit
                        try:
                            transaction.rollback()
                            logger.warning('Transaction rolled back.')
                            return False
                        except RESTAPIException:
                            logger.error(
                                'Unable to roll back transaction, aborting')
                            raise FailureException()
                transaction.commit()
                if self.completed and self.completed.filename:
                    shutil.copyfile(self.completed_buffer.filename,
                                    self.completed.filename)
                return True
        else:
            for resource, graph in self.get_resources(traverse=traverse):
                try:
                    method(resource, graph)
                except RESTAPIException as e:
                    logger.error(
                        f'{method.__name__} failed for {resource}: {e}: {e.response.text}'
                    )
                    logger.warning(
                        f'Continuing {method.__name__} with next item')
            return True
示例#4
0
def extract(fcrepo, uri):
    with Transaction(fcrepo) as txn:
        try:
            logger.info("Getting {0} from repository".format(uri))
            page = Page.from_repository(fcrepo, uri)
            logger.info("Creating annotations for page {0}".format(page.title))
            for annotation in page.textblocks():
                annotation.create(fcrepo)
                annotation.update(fcrepo)

            txn.commit()
            return True

        except (RESTAPIException, DataReadException) as e:
            # if anything fails during item creation or committing the transaction
            # attempt to rollback the current transaction
            # failures here will be caught by the main loop's exception handler
            # and should trigger a system exit
            logger.error("OCR extraction failed: {0}".format(e))
            txn.rollback()
            logger.warning('Transaction rolled back. Continuing load.')
示例#5
0
def load_item(fcrepo, batch_item, args, extra=None):
    # read data for item
    logger.info('Reading item data')
    item = batch_item.read_data()

    if args.use_transactions:
        # open transaction
        with Transaction(fcrepo, keep_alive=90) as txn:
            # create item and its components
            try:
                load_item_internal(fcrepo, item, args, extra)

                # commit transaction
                txn.commit()
                logger.info('Performing post-creation actions')
                item.post_creation_hook()
                return True

            except (RESTAPIException, FileNotFoundError) as e:
                # if anything fails during item creation or committing the transaction
                # attempt to rollback the current transaction
                # failures here will be caught by the main loop's exception handler
                # and should trigger a system exit
                logger.error("Item creation failed: {0}".format(e))
                txn.rollback()
                logger.warning('Transaction rolled back. Continuing load.')

            except KeyboardInterrupt as e:
                logger.error("Load interrupted")
                raise e
    else:
        try:
            load_item_internal(fcrepo, item, args, extra)
            return True
        except (RESTAPIException, FileNotFoundError) as e:
            logger.error("Item creation failed: {0}".format(e))
            logger.warning('Continuing load.')
        except KeyboardInterrupt as e:
            logger.error("Load interrupted")
            raise e
示例#6
0
    def __call__(self, fcrepo, args):
        logger.warning(
            'The "mkcol" command is deprecated and will be removed in a future release.'
        )
        logger.warning(
            f'Use: plastron create --container "{fcrepo.relpath}" --collection "{args.name}"'
        )

        if args.notransactions:
            try:
                collection = pcdm.Collection()
                collection.title = args.name
                collection.create(fcrepo, recursive=False)
                collection.update(fcrepo, recursive=False)

            except RESTAPIException as e:
                logger.error(f'Error in collection creation: {e}')
                raise FailureException()
        else:
            with Transaction(fcrepo) as txn:
                try:
                    collection = pcdm.Collection()
                    collection.title = args.name
                    collection.create(fcrepo, recursive=False)
                    collection.update(fcrepo, recursive=False)
                    txn.commit()

                except RESTAPIException as e:
                    logger.error(f'Error in collection creation: {e}')
                    raise FailureException()

        if args.batch is not None:
            with open(args.batch, 'r') as batchconfig:
                batch = yaml.safe_load(batchconfig)
                batch['COLLECTION'] = str(collection.uri)
            with open(args.batch, 'w') as batchconfig:
                yaml.dump(batch, batchconfig, default_flow_style=False)
示例#7
0
    def update_repo(self, args, job, repo, metadata, row, repo_changeset, created_uris, updated_uris):
        """
        Updates the repository with the given RepoChangeSet

        :param args: the arguments from the command-line
        :param job: The ImportJob
        :param repo: the repository configuration
        :param metadata: A plastron.jobs.MetadataRows object representing the
                          CSV file being imported
        :param row: A single plastron.jobs.Row object representing the row
                     being imported
        :param repo_changeset: The RepoChangeSet object describing the changes
                                 to make to the repository.
        :param created_uris: Accumulator storing a list of created URIS. This
                              variable is MODIFIED by this method.
        :param updated_uris: Accumulator storing a list of updated URIS. This
                              variable is MODIFIED by this method.
        """
        item = repo_changeset.item

        if not item.created:
            # if an item is new, don't construct a SPARQL Update query
            # instead, just create and update normally
            # create new item in the repo
            logger.debug('Creating a new item')
            # add the access class
            if job.access is not None:
                item.rdf_type.append(URIRef(job.access))
            # add the collection membership
            if job.member_of is not None:
                item.member_of = URIRef(job.member_of)

            if row.has_files:
                create_pages = bool(strtobool(row.get('CREATE_PAGES', 'True')))
                logger.debug('Adding pages and files to new item')
                self.add_files(
                    item,
                    build_file_groups(row['FILES']),
                    base_location=job.binaries_location,
                    access=job.access,
                    create_pages=create_pages
                )

            if args.extract_text_types is not None:
                annotate_from_files(item, args.extract_text_types.split(','))

            logger.debug(f"Creating resources in container: {job.container}")

            try:
                with Transaction(repo) as txn:
                    item.create(repo, container_path=job.container)
                    item.update(repo)
                    txn.commit()
            except Exception as e:
                raise FailureException(f'Creating item failed: {e}') from e

            job.complete(item, row.line_reference, ImportedItemStatus.CREATED)
            metadata.created += 1
            created_uris.append(item.uri)

        elif repo_changeset:
            # construct the SPARQL Update query if there are any deletions or insertions
            # then do a PATCH update of an existing item
            logger.info(f'Sending update for {item}')
            sparql_update = repo_changeset.build_sparql_update(repo)
            logger.debug(sparql_update)
            try:
                item.patch(repo, sparql_update)
            except RESTAPIException as e:
                raise FailureException(f'Updating item failed: {e}') from e

            job.complete(item, row.line_reference, ImportedItemStatus.MODIFIED)
            metadata.updated += 1
            updated_uris.append(item.uri)

        else:
            job.complete(item, row.line_reference, ImportedItemStatus.UNCHANGED)
            metadata.unchanged += 1
            logger.info(f'No changes found for "{item}" ({row.uri}); skipping')
            metadata.skipped += 1