Exemplo n.º 1
0
    def transformed_to_dl_compound(
            self, sample_cataloged_collection, monkeypatch):

        root, files = sample_cataloged_collection
        factory = packager.PackageFactory(noneas.CatalogedNonEAS())
        digital_library_format = packager.PackageFactory(
            digital_library_compound.DigitalLibraryCompound())
        output_path = os.path.join('some', 'folder')
        transform = Mock()

        def mock_transform(_, source, destination):
            transform(source=source, destination=destination)

        with monkeypatch.context() as mp:
            mp.setattr(
                packager.transformations.Transformers,
                "transform", mock_transform
            )
            monkeypatch.setattr(
                uiucprescon.packager.transformations.CopyFile,
                "transform",
                transform
            )
            monkeypatch.setattr(
                uiucprescon.packager.transformations.ConvertJp2Standard,
                "transform",
                transform
            )

            for package in list(factory.locate_packages(root)):
                digital_library_format.transform(package, output_path)

        return output_path, transform
Exemplo n.º 2
0
    def transformed_to_dl_compound(
            self, sample_collection, monkeypatch
    ):

        root, files = sample_collection
        factory = packager.PackageFactory(noneas.ArchivalNonEAS())

        digital_library_format = packager.PackageFactory(
            digital_library_compound.DigitalLibraryCompound())
        import shutil
        kdu_compress_cli2 = Mock()
        output_path = os.path.join('some', 'folder')
        transform = Mock()
        with monkeypatch.context() as mp:
            mp.setattr(
                packager.transformations.Transformers, "transform", transform
            )
            mp.setattr(
                uiucprescon.packager.transformations.CopyFile,
                "transform",
                transform
            )
            mp.setattr(
                uiucprescon.packager.transformations.ConvertJp2Standard,
                "transform",
                transform
            )
            mp.setattr(shutil, "copyfile", Mock())
            mp.setattr(shutil, "copy", Mock())
            import pykdu_compress
            mp.setattr(pykdu_compress, "kdu_compress_cli2", kdu_compress_cli2)
            for package in list(factory.locate_packages(root)):
                digital_library_format.transform(package, output_path)
        return output_path, transform
Exemplo n.º 3
0
    def test_transform_to_hathi(self,
                                sample_collection_path,
                                expected_source,
                                expected_destination,
                                monkeypatch):

        factory = packager.PackageFactory(eas.Eas())
        packages = factory.locate_packages(sample_collection_path)
        destination_type = packager.PackageFactory(HathiJp2())
        output = "out"

        def spec(source, destination): pass
        transform = Mock(spec=spec)

        monkeypatch.setattr(
            packager.transformations.Transformers,
            "transform",
            transform
        )
        monkeypatch.setattr(
            packager.packages.hathi_jp2_package.pathlib.Path,
            "mkdir",
            Mock()
        )
        for p in packages:

            destination_type.transform(p, output)
            assert transform.called is True
        transform.assert_has_calls(
            [
                call(source=expected_source, destination=expected_destination)
            ]
        )
Exemplo n.º 4
0
def test_capture_one_tiff_to_hathi_tiff(capture_one_fixture):
    source = os.path.join(capture_one_fixture, CAPTURE_ONE_BATCH_NAME)
    dest = os.path.join(capture_one_fixture, DESTINATION_NAME)

    capture_one_packages_factory = \
        packager.PackageFactory(CaptureOnePackage(delimiter="_"))

    # find all Capture One organized packages
    capture_one_packages = \
        list(capture_one_packages_factory.locate_packages(path=source))

    # There should be 2 packages in this sample batch
    assert len(capture_one_packages) == 2

    hathi_tiff_package_factory = \
        packager.PackageFactory(packager.packages.HathiTiff())

    for cap_one_package in capture_one_packages:
        hathi_tiff_package_factory.transform(cap_one_package, dest=dest)

    # This should result in the following files
    #
    # some_root/000001/00000001.tif
    # some_root/000001/00000002.tif
    # some_root/000001/00000003.tif
    assert os.path.exists(os.path.join(dest, "000001", "00000001.tif"))
    assert os.path.exists(os.path.join(dest, "000001", "00000002.tif"))
    assert os.path.exists(os.path.join(dest, "000001", "00000003.tif"))

    # some_root/000002/00000001.tif
    # some_root/000002/00000002.tif
    assert os.path.exists(os.path.join(dest, "000002", "00000001.tif"))
    assert os.path.exists(os.path.join(dest, "000002", "00000002.tif"))
Exemplo n.º 5
0
def test_read_only_transform(capture_one_sample_package):

    capture_one_packager = packager.PackageFactory(
        packager.packages.CaptureOnePackage())
    capture_one_packages = capture_one_packager.locate_packages(
        capture_one_sample_package)

    hathi_limited_view_packager = packager.PackageFactory(
        packager.packages.HathiLimitedView())
    with pytest.raises(NotImplementedError):
        hathi_limited_view_packager.transform(capture_one_packages, dest=".")
    def discover_task_metadata(self,
                               initial_results: List[Any],
                               additional_data: Dict[str, Any],
                               **user_args: str
                               ) -> List[Dict[str, Any]]:
        """Loot at user settings and discover any data needed to build a task.

        Args:
            initial_results:
            additional_data:
            **user_args:

        Returns:
            Returns a list of data to create a job with

        """
        jobs: List[Dict[str, Union[str, Package]]] = []
        source_input = user_args["Input"]
        dest = user_args["Output"]

        package_factory = packager.PackageFactory(
            packager.packages.CaptureOnePackage(delimiter="-"))

        for package in package_factory.locate_packages(source_input):
            new_job: Dict[str, Union[str, Package]] = {
                "package": package,
                "output": dest,
                "source_path": source_input
            }
            jobs.append(new_job)
        return jobs
def package_objects(source_path, source_package_type):
    source_pkg = eval(f"packager.packages.{source_package_type}")
    source = os.path.join(source_path, sample_packages[source_package_type][0])

    packages_factory = packager.PackageFactory(source_pkg())
    packages = list(packages_factory.locate_packages(path=source))
    return packages
Exemplo n.º 8
0
 def test_cataloged_collection_transform(
         self,
         eas_collection,
         source_file, package_type, expected_out,
         monkeypatch
 ):
     transform = Mock(spec=lambda source, destination: None)
     monkeypatch.setattr(
         packager.transformations.Transformers,
         "transform",
         transform
     )
     output_dir = "output"
     factory = packager.PackageFactory(eas.Eas())
     for p in factory.locate_packages(eas_collection(source_file)):
         packager.PackageFactory(package_type()).transform(p, output_dir)
Exemplo n.º 9
0
def test_capture_one_dashes(capture_one_batch_with_dashes):
    batch_dir, source_files = capture_one_batch_with_dashes

    capture_one_packages_factory = packager.PackageFactory(
        packager.packages.CaptureOnePackage(delimiter="-"))

    res = next(capture_one_packages_factory.locate_packages(batch_dir))
    assert len(res) == len(source_files)
Exemplo n.º 10
0
def test_capture_one_underscore(capture_one_batch_with_underscores):
    batch_dir, source_files = capture_one_batch_with_underscores

    capture_one_packages_factory = packager.PackageFactory(
        packager.packages.CaptureOnePackage()
    )

    res = next(capture_one_packages_factory.locate_packages(batch_dir))
    assert len(res) == len(source_files)
Exemplo n.º 11
0
def capture_one_session_w_ds_store(capture_one_sample_package):
    source_dir = os.path.join(capture_one_sample_package)

    with open(os.path.join(source_dir, ".DS_Store"), "w") as wf:
        pass
    capture_one_packages_factory = \
        packager.PackageFactory(packager.packages.CaptureOnePackage())

    # find all Capture One organized packages
    return list(capture_one_packages_factory.locate_packages(path=source_dir))
def package_objects(source_path, package_type):
    pkg_factory_type = eval(f"packager.packages.{package_type}")

    source = os.path.join(source_path, sample_packages[package_type][0])

    capture_one_packages_factory = packager.PackageFactory(pkg_factory_type())

    # find all Capture One organized packages
    packages = list(capture_one_packages_factory.locate_packages(path=source))
    return packages
 def __init__(
         self,
         src: packager.package.collection.Package,
         dst: str
 ) -> None:
     super().__init__()
     self.src = src
     self.dst = dst
     self.output_packager = packager.PackageFactory(
         packager.packages.DigitalLibraryCompound())
Exemplo n.º 14
0
def test_transform_into_hathi(capture_one_batch_with_dashes, tmpdir):
    batch_dir, source_files = capture_one_batch_with_dashes
    source_type = packager.PackageFactory(
        packager.packages.CaptureOnePackage(delimiter="-")
    )

    packages = source_type.locate_packages(batch_dir)

    destination_type = packager.PackageFactory(packager.packages.HathiTiff())
    output = tmpdir / "output"
    output.ensure_dir()
    for package in packages:
        destination_type.transform(package, dest=output.strpath)

    assert (output / "99423682912205899").exists()

    for expected_file in [f"{str(x).zfill(8)}.tif" for x in range(20)]:
        assert (output / "99423682912205899" / expected_file).exists()
    output.remove()
    def work(self) -> bool:
        self.log("Locating packages in {}".format(self._root))

        package_factory = packager.PackageFactory(
            packager.packages.CaptureOnePackage())

        packages = list(package_factory.locate_packages(self._root))

        self.set_results(packages)

        return True
Exemplo n.º 16
0
    def test_cataloged_collection_transform(
            self,
            cataloged_collection,
            source_file, package_type, expected_out,
            monkeypatch
    ):
        transform = Mock(spec=lambda source, destination: None)
        monkeypatch.setattr(
            uiucprescon.packager.transformations.Transformers,
            "transform",
            transform
        )

        transform2 = Mock(spec=lambda source, destination, logger: None)
        monkeypatch.setattr(
            uiucprescon.packager.transformations.CopyFile,
            "transform",
            transform2
        )
        monkeypatch.setattr(
            uiucprescon.packager.transformations.ConvertJp2Standard,
            "transform",
            transform2
        )

        output_dir = "output"
        factory = packager.PackageFactory(noneas.CatalogedNonEAS())
        for p in factory.locate_packages(cataloged_collection(source_file)):
            packager.PackageFactory(package_type()).transform(p, output_dir)

        if package_type == hathi_jp2_package.HathiJp2:
            transform.assert_any_call(
                ANY, os.path.join(output_dir, expected_out)
            )

        elif package_type == digital_library_compound.DigitalLibraryCompound:
            transform2.assert_any_call(
                ANY, os.path.join(output_dir, expected_out), ANY
            )
        else:
            assert False, f"testing '{package_type}' not supported"
Exemplo n.º 17
0
def test_capture_one_tiff_package_size(capture_one_fixture):
    source = os.path.join(capture_one_fixture, CAPTURE_ONE_BATCH_NAME)

    capture_one_packages_factory = \
        packager.PackageFactory(packager.packages.CaptureOnePackage())

    # find all Capture One organized packages
    capture_one_packages = \
        list(capture_one_packages_factory.locate_packages(path=source))

    # There should be 2 packages in this sample batch
    assert len(capture_one_packages) == 2
Exemplo n.º 18
0
    def work(self):
        my_logger = logging.getLogger(packager.__name__)
        my_logger.setLevel(logging.INFO)
        with self.log_config(my_logger):
            self.log(f"Converting {self.packaging_id} from {self.source_path} "
                     f"to a {self.package_format} package at "
                     f"{self.new_package_root}")
            package_factory = packager.PackageFactory(
                PackageConverter.package_formats[self.package_format])

            package_factory.transform(self.existing_package,
                                      dest=self.new_package_root)
        return True
Exemplo n.º 19
0
    def work(self) -> bool:
        my_logger = logging.getLogger(packager.__name__)
        my_logger.setLevel(logging.INFO)
        with self.log_config(my_logger):
            self.log(f"Converting {self.packaging_id} from "
                     f"{self.source_path} to a Hathi Trust Tiff "
                     f"package at {self.new_package_root}")

            package_factory = packager.PackageFactory(
                packager.packages.HathiTiff())
            package_factory.transform(self.existing_package,
                                      dest=self.new_package_root)
        return True
Exemplo n.º 20
0
def test_convert(hathi_limited_view_sample_packages, monkeypatch):
    import pathlib

    def kdu_compress_cli2(infile: str,
                          outfile: str,
                          in_args=None,
                          out_args=None):
        pathlib.Path(outfile).touch()

    def kdu_expand_cli(infile: str, outfile: str, in_args=None, out_args=None):
        pathlib.Path(outfile).touch()

    monkeypatch.setattr(pykdu_compress, "kdu_compress_cli2", kdu_compress_cli2)
    monkeypatch.setattr(pykdu_compress, "kdu_expand_cli", kdu_expand_cli)

    digital_library_compound_builder = packager.PackageFactory(
        packager.packages.DigitalLibraryCompound())

    with tempfile.TemporaryDirectory() as tmp_dir:
        for package in hathi_limited_view_sample_packages:
            try:
                digital_library_compound_builder.transform(package,
                                                           dest=tmp_dir)

            except errors.ZipFileException as e:

                print(f"{e.src_zip_file} had a problem", file=sys.stderr)

                if len(e.problem_files) > 0:
                    print(f"Problems with {','.join(e.problem_files)}",
                          file=sys.stderr)

                problem_file = zipfile.ZipFile(e.src_zip_file)
                print(problem_file.namelist(), file=sys.stderr)
                raise
        assert len(list(os.scandir(tmp_dir))) == 1

        for i, new_package in enumerate(
                digital_library_compound_builder.locate_packages(tmp_dir)):
            assert new_package.metadata[Metadata.ID] == \
                   hathi_limited_view_sample_packages[i].metadata[Metadata.ID]

            sample_item = new_package.items[0]
            access = sample_item.instantiations[InstantiationTypes.ACCESS]
            access_files = list(access.get_files())
            assert len(access_files) > 0

            pres = sample_item.instantiations[InstantiationTypes.PRESERVATION]
            pres_files = list(pres.get_files())
            assert len(pres_files) > 0
            assert new_package.metadata[Metadata.PATH] == tmp_dir
Exemplo n.º 21
0
    def test_capture_one_collection_transform(
            self,
            capture_one_collection,
            source_file, package_type, expected_out,
            monkeypatch
    ):
        transform = Mock(spec=lambda source, destination: None)
        monkeypatch.setattr(
            uiucprescon.packager.transformations.Transformers,
            "transform",
            transform
        )
        output_dir = "output"

        capture_one_factory = packager.PackageFactory(
            packager.packages.CaptureOnePackage(delimiter='-')
        )
        for p in capture_one_factory.locate_packages(
                capture_one_collection(source_file)
        ):
            packager.PackageFactory(package_type()).transform(p, output_dir)
        #
        transform.assert_any_call(ANY, os.path.join(output_dir, expected_out))
    def discover_task_metadata(
            self,
            initial_results: List[Any],
            additional_data, **user_args: str
    ) -> List[dict]:

        hathi_limited_view_packager = packager.PackageFactory(
            packager.packages.HathiLimitedView())

        return [{
            "package": package,
            "destination": user_args['Output']
        } for package in hathi_limited_view_packager.locate_packages(
            user_args['Input'])]
Exemplo n.º 23
0
    def work(self):
        my_logger = logging.getLogger(packager.__name__)
        my_logger.setLevel(logging.INFO)
        with self.log_config(my_logger):
            self.log(
                f"Converting {self.packaging_id} from {self.source_path} "
                f"to a Hathi Trust Tiff package at {self.new_package_root}")

            package_factory = packager.PackageFactory(
                packager.packages.DigitalLibraryCompound())

            package_factory.transform(self.existing_package,
                                      dest=self.new_package_root)
        return True
    def work(self) -> bool:
        package_factory = packager.PackageFactory(
            packager.packages.HathiTiff())
        package_factory.transform(self._package, self._destination)

        self.log("Transformed CaptureOne package {} to a HathiTiff package "
                 "in {}".format(self._bib_id, self._destination))
        self.set_results({
            "bib_id":
            self._bib_id,
            "location":
            os.path.join(self._destination, self._bib_id)
        })
        return True
def hathi_tiff_package_w_sidecar_text(hathi_tiff_sample_package):
    package_one_path = os.path.join(hathi_tiff_sample_package, "000001")
    for i in range(3):
        with open(os.path.join(package_one_path, f"0000000{i+1}.txt"), "w"):
            pass

    package_two_path = os.path.join(hathi_tiff_sample_package, "000002")
    for i in range(2):
        with open(os.path.join(package_two_path, f"0000000{i+1}.txt"), "w"):
            pass
    package_factory = packager.PackageFactory(packager.packages.HathiTiff())

    packages = list(package_factory.locate_packages(hathi_tiff_sample_package))
    return packages
Exemplo n.º 26
0
    def archival_full_name_transformed_to_ht_trust(
            self, sample_collection_longer, monkeypatch):

        root, files = sample_collection_longer
        factory = packager.PackageFactory(noneas.ArchivalNonEAS())
        hathi_jp2_format = packager.PackageFactory(
            hathi_jp2_package.HathiJp2()
        )
        output_path = os.path.join('some', 'folder')
        transform = Mock()

        def mock_transform(_, source, destination):
            transform(source=source, destination=destination)

        with monkeypatch.context() as mp:
            mp.setattr(
                packager.transformations.Transformers,
                "transform", mock_transform
            )
            for package in factory.locate_packages(root):
                hathi_jp2_format.transform(package, output_path)

        return output_path, transform
Exemplo n.º 27
0
    def transformed_to_ht_trust(
            self, sample_cataloged_collection, monkeypatch, tmpdir
    ):
        transform = Mock()

        def mock_transform(_, source, destination):
            transform(source=source, destination=destination)

        root, files = sample_cataloged_collection
        factory = packager.PackageFactory(noneas.CatalogedNonEAS())
        digital_library_format = packager.PackageFactory(
            hathi_jp2_package.HathiJp2())
        output_path = os.path.join('some', 'folder')

        with monkeypatch.context() as mp:
            mp.setattr(
                packager.transformations.Transformers,
                "transform", mock_transform
            )
            for package in list(factory.locate_packages(root)):
                digital_library_format.transform(package, output_path)

        return output_path, transform
    def discover_task_metadata(self, initial_results: List[Any],
                               additional_data, **user_args) -> List[dict]:
        hathi_limited_view_packager = packager.PackageFactory(
            packager.packages.HathiLimitedView())

        new_tasks = []

        for p in hathi_limited_view_packager.locate_packages(
                user_args['Input']):
            new_tasks.append({
                "package": p,
                "destination": user_args['Output']
            })

        return new_tasks
Exemplo n.º 29
0
    def discover_task_metadata(self, initial_results: List[Any],
                               additional_data, **user_args) -> List[dict]:
        jobs = []
        source_input = user_args["Input"]
        dest = user_args["Output"]

        package_factory = packager.PackageFactory(
            packager.packages.CaptureOnePackage())

        for package in package_factory.locate_packages(source_input):
            jobs.append({
                "package": package,
                "output": dest,
                "source_path": source_input
            })
        return jobs
Exemplo n.º 30
0
def test_capture_one_tiff_package_plus(capture_one_fixture_plus):

    capture_one_packages_factory = \
        packager.PackageFactory(
            packager.packages.CaptureOnePackage(delimiter='+')
        )

    # find all Capture One organized packages
    capture_one_packages = \
        list(
            capture_one_packages_factory.locate_packages(
                path=capture_one_fixture_plus
            )
        )

    # There should be 2 packages in this sample batch
    assert len(capture_one_packages) == 2