Python Annotator示例，core.marc.Annotator Python示例

示例#1

0

显示文件

    def test_add_summary(self):
        work = self._work(with_license_pool=True)
        work.summary_text = "<p>Summary</p>"

        record = Record()
        Annotator.add_summary(record, work)
        self._check_field(record, "520", {"a": b" Summary "})

示例#2

0

显示文件

    def test_leader(self):
        work = self._work(with_license_pool=True)
        leader = Annotator.leader(work)
        assert "00000nam  2200000   4500" == leader

        # If there's already a marc record cached, the record status changes.
        work.marc_record = "cached"
        leader = Annotator.leader(work)
        assert "00000cam  2200000   4500" == leader

示例#3

0

显示文件

 def test_add_audience(self):
     for audience, term in list(Annotator.AUDIENCE_TERMS.items()):
         work = self._work(audience=audience)
         record = Record()
         Annotator.add_audience(record, work)
         self._check_field(
             record,
             "385",
             {
                 "a": term,
                 "2": "tlctarget",
             },
         )

示例#4

0

显示文件

    def test_create_record_roundtrip(self):
        # Create a marc record from a work with special characters
        # in both the title and author name and round-trip it to
        # the DB and back again to make sure we are creating records
        # we can understand.
        #
        # We freeze the current time here, because a MARC record has
        # a timestamp when it was created and we need the created
        # records to match.

        annotator = Annotator()

        # Creates a new record and saves it to the database
        work = self._work(
            title="Little Mimi\u2019s First Counting Lesson",
            authors=["Lagerlo\xf6f, Selma Ottiliana Lovisa,"],
            with_license_pool=True,
        )
        record = MARCExporter.create_record(work, annotator)
        loaded_record = MARCExporter.create_record(work, annotator)
        assert record.as_marc() == loaded_record.as_marc()

        # Loads a existing record from the DB
        db = Session(self.connection)
        new_work = get_one(db, Work, id=work.id)
        new_record = MARCExporter.create_record(new_work, annotator)
        assert record.as_marc() == new_record.as_marc()

示例#5

0

显示文件

    def test_add_simplified_genres(self):
        work = self._work(with_license_pool=True)
        fantasy, ignore = Genre.lookup(self._db, "Fantasy", autocreate=True)
        romance, ignore = Genre.lookup(self._db, "Romance", autocreate=True)
        work.genres = [fantasy, romance]

        record = Record()
        Annotator.add_simplified_genres(record, work)
        fields = record.get_fields("650")
        [fantasy_field,
         romance_field] = sorted(fields, key=lambda x: x.get_subfields("a")[0])
        assert ["0", "7"] == fantasy_field.indicators
        assert "Fantasy" == fantasy_field.get_subfields("a")[0]
        assert "Library Simplified" == fantasy_field.get_subfields("2")[0]
        assert ["0", "7"] == romance_field.indicators
        assert "Romance" == romance_field.get_subfields("a")[0]
        assert "Library Simplified" == romance_field.get_subfields("2")[0]

示例#6

0

显示文件

    def test_add_control_fields(self):
        # This edition has one format and was published before 1900.
        edition, pool = self._edition(with_license_pool=True)
        identifier = pool.identifier
        edition.issued = datetime_utc(956, 1, 1)

        now = utc_now()
        record = Record()

        Annotator.add_control_fields(record, identifier, pool, edition)
        self._check_control_field(record, "001", identifier.urn)
        assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value()
        self._check_control_field(record, "006", "m        d        ")
        self._check_control_field(record, "007", "cr cn ---anuuu")
        self._check_control_field(
            record, "008",
            now.strftime("%y%m%d") + "s0956    xxu                 eng  ")

        # This French edition has two formats and was published in 2018.
        edition2, pool2 = self._edition(with_license_pool=True)
        identifier2 = pool2.identifier
        edition2.issued = datetime_utc(2018, 2, 3)
        edition2.language = "fre"
        LicensePoolDeliveryMechanism.set(
            pool2.data_source,
            identifier2,
            Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM,
            RightsStatus.IN_COPYRIGHT,
        )

        record = Record()
        Annotator.add_control_fields(record, identifier2, pool2, edition2)
        self._check_control_field(record, "001", identifier2.urn)
        assert now.strftime("%Y%m%d") in record.get_fields("005")[0].value()
        self._check_control_field(record, "006", "m        d        ")
        self._check_control_field(record, "007", "cr cn ---mnuuu")
        self._check_control_field(
            record, "008",
            now.strftime("%y%m%d") + "s2018    xxu                 fre  ")

示例#7

0

显示文件

    def test_add_formats(self):
        edition, pool = self._edition(with_license_pool=True)
        epub_no_drm, ignore = DeliveryMechanism.lookup(
            self._db, Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM)
        pool.delivery_mechanisms[0].delivery_mechanism = epub_no_drm
        LicensePoolDeliveryMechanism.set(
            pool.data_source,
            pool.identifier,
            Representation.PDF_MEDIA_TYPE,
            DeliveryMechanism.ADOBE_DRM,
            RightsStatus.IN_COPYRIGHT,
        )

        record = Record()
        Annotator.add_formats(record, pool)
        fields = record.get_fields("538")
        assert 2 == len(fields)
        [pdf, epub] = sorted(fields, key=lambda x: x.get_subfields("a")[0])
        assert "Adobe PDF eBook" == pdf.get_subfields("a")[0]
        assert [" ", " "] == pdf.indicators
        assert "EPUB eBook" == epub.get_subfields("a")[0]
        assert [" ", " "] == epub.indicators

示例#8

0

显示文件

    def test_add_publisher(self):
        edition = self._edition()
        edition.publisher = self._str
        edition.issued = datetime_utc(1894, 4, 5)

        record = Record()
        Annotator.add_publisher(record, edition)
        self._check_field(
            record,
            "264",
            {
                "a": "[Place of publication not identified]",
                "b": edition.publisher,
                "c": "1894",
            },
            [" ", "1"],
        )

        # If there's no publisher, the field is left out.
        record = Record()
        edition.publisher = None
        Annotator.add_publisher(record, edition)
        assert [] == record.get_fields("264")

示例#9

0

显示文件

    def test_add_title(self):
        edition = self._edition()
        edition.title = "The Good Soldier"
        edition.sort_title = "Good Soldier, The"
        edition.subtitle = "A Tale of Passion"

        record = Record()
        Annotator.add_title(record, edition)
        [field] = record.get_fields("245")
        self._check_field(
            record,
            "245",
            {
                "a": edition.title,
                "b": edition.subtitle,
                "c": edition.author,
            },
            ["0", "4"],
        )

        # If there's no subtitle or no author, those subfields are left out.
        edition.subtitle = None
        edition.author = None

        record = Record()
        Annotator.add_title(record, edition)
        [field] = record.get_fields("245")
        self._check_field(
            record,
            "245",
            {
                "a": edition.title,
            },
            ["0", "4"],
        )
        assert [] == field.get_subfields("b")
        assert [] == field.get_subfields("c")

示例#10

0

显示文件

    def test_add_contributors(self):
        author = "a"
        author2 = "b"
        translator = "c"

        # Edition with one author gets a 100 field and no 700 fields.
        edition = self._edition(authors=[author])
        edition.sort_author = "sorted"

        record = Record()
        Annotator.add_contributors(record, edition)
        assert [] == record.get_fields("700")
        self._check_field(record, "100", {"a": edition.sort_author},
                          ["1", " "])

        # Edition with two authors and a translator gets three 700 fields and no 100 fields.
        edition = self._edition(authors=[author, author2])
        edition.add_contributor(translator, Contributor.TRANSLATOR_ROLE)

        record = Record()
        Annotator.add_contributors(record, edition)
        assert [] == record.get_fields("100")
        fields = record.get_fields("700")
        for field in fields:
            assert ["1", " "] == field.indicators
        [author_field, author2_field,
         translator_field] = sorted(fields,
                                    key=lambda x: x.get_subfields("a")[0])
        assert author == author_field.get_subfields("a")[0]
        assert Contributor.PRIMARY_AUTHOR_ROLE == author_field.get_subfields(
            "e")[0]
        assert author2 == author2_field.get_subfields("a")[0]
        assert Contributor.AUTHOR_ROLE == author2_field.get_subfields("e")[0]
        assert translator == translator_field.get_subfields("a")[0]
        assert Contributor.TRANSLATOR_ROLE == translator_field.get_subfields(
            "e")[0]

示例#11

0

显示文件

    def test_add_isbn(self):
        isbn = self._identifier(identifier_type=Identifier.ISBN)
        record = Record()
        Annotator.add_isbn(record, isbn)
        self._check_field(record, "020", {"a": isbn.identifier})

        # If the identifier isn't an ISBN, but has an equivalent that is, it still
        # works.
        equivalent = self._identifier()
        data_source = DataSource.lookup(self._db, DataSource.OCLC)
        equivalent.equivalent_to(data_source, isbn, 1)
        record = Record()
        Annotator.add_isbn(record, equivalent)
        self._check_field(record, "020", {"a": isbn.identifier})

        # If there is no ISBN, the field is left out.
        non_isbn = self._identifier()
        record = Record()
        Annotator.add_isbn(record, non_isbn)
        assert [] == record.get_fields("020")

示例#12

0

显示文件

    def test_add_series(self):
        edition = self._edition()
        edition.series = self._str
        edition.series_position = 5
        record = Record()
        Annotator.add_series(record, edition)
        self._check_field(
            record,
            "490",
            {
                "a": edition.series,
                "v": str(edition.series_position),
            },
            ["0", " "],
        )

        # If there's no series position, the same field is used without
        # the v subfield.
        edition.series_position = None
        record = Record()
        Annotator.add_series(record, edition)
        self._check_field(
            record,
            "490",
            {
                "a": edition.series,
            },
            ["0", " "],
        )
        [field] = record.get_fields("490")
        assert [] == field.get_subfields("v")

        # If there's no series, the field is left out.
        edition.series = None
        record = Record()
        Annotator.add_series(record, edition)
        assert [] == record.get_fields("490")

示例#13

0

显示文件

    def test_create_record(self):
        work = self._work(
            with_license_pool=True,
            title="old title",
            authors=["old author"],
            data_source_name=DataSource.OVERDRIVE,
        )
        annotator = Annotator()

        # The record isn't cached yet, so a new record is created and cached.
        assert None == work.marc_record
        record = MARCExporter.create_record(work, annotator)
        [title_field] = record.get_fields("245")
        assert "old title" == title_field.get_subfields("a")[0]
        [author_field] = record.get_fields("100")
        assert "author, old" == author_field.get_subfields("a")[0]
        [distributor_field] = record.get_fields("264")
        assert DataSource.OVERDRIVE == distributor_field.get_subfields("b")[0]
        cached = work.marc_record
        assert "old title" in cached
        assert "author, old" in cached
        # The distributor isn't part of the cached record.
        assert DataSource.OVERDRIVE not in cached

        work.presentation_edition.title = "new title"
        work.presentation_edition.sort_author = "author, new"
        new_data_source = DataSource.lookup(self._db, DataSource.BIBLIOTHECA)
        work.license_pools[0].data_source = new_data_source

        # Now that the record is cached, creating a record will
        # use the cache. Distributor will be updated since it's
        # not part of the cached record.
        record = MARCExporter.create_record(work, annotator)
        [title_field] = record.get_fields("245")
        assert "old title" == title_field.get_subfields("a")[0]
        [author_field] = record.get_fields("100")
        assert "author, old" == author_field.get_subfields("a")[0]
        [distributor_field] = record.get_fields("264")
        assert DataSource.BIBLIOTHECA == distributor_field.get_subfields(
            "b")[0]

        # But we can force an update to the cached record.
        record = MARCExporter.create_record(work, annotator, force_create=True)
        [title_field] = record.get_fields("245")
        assert "new title" == title_field.get_subfields("a")[0]
        [author_field] = record.get_fields("100")
        assert "author, new" == author_field.get_subfields("a")[0]
        [distributor_field] = record.get_fields("264")
        assert DataSource.BIBLIOTHECA == distributor_field.get_subfields(
            "b")[0]
        cached = work.marc_record
        assert "old title" not in cached
        assert "author, old" not in cached
        assert "new title" in cached
        assert "author, new" in cached

        # If we pass in an integration, it's passed along to the annotator.
        integration = self._integration()

        class MockAnnotator(Annotator):
            integration = None

            def annotate_work_record(self, work, pool, edition, identifier,
                                     record, integration):
                self.integration = integration

        annotator = MockAnnotator()
        record = MARCExporter.create_record(work,
                                            annotator,
                                            integration=integration)
        assert integration == annotator.integration

示例#14

0

显示文件

 def test_add_ebooks_subject(self):
     record = Record()
     Annotator.add_ebooks_subject(record)
     self._check_field(record, "655", {"a": "Electronic books."},
                       [" ", "0"])

示例#15

0

显示文件

    def test_records(self):
        integration = self._integration()
        now = utc_now()
        exporter = MARCExporter.from_config(self._default_library)
        annotator = Annotator()
        lane = self._lane("Test Lane", genres=["Mystery"])
        w1 = self._work(genre="Mystery", with_open_access_download=True)
        w2 = self._work(genre="Mystery", with_open_access_download=True)

        search_engine = MockExternalSearchIndex()
        search_engine.bulk_update([w1, w2])

        # If there's a storage protocol but not corresponding storage integration,
        # it raises an exception.
        pytest.raises(Exception, exporter.records, lane, annotator)

        # If there is a storage integration, the output file is mirrored.
        mirror_integration = self._external_integration(
            ExternalIntegration.S3,
            ExternalIntegration.STORAGE_GOAL,
            username="******",
            password="******",
        )

        mirror = MockS3Uploader()

        exporter.records(
            lane,
            annotator,
            mirror_integration,
            mirror=mirror,
            query_batch_size=1,
            upload_batch_size=1,
            search_engine=search_engine,
        )

        # The file was mirrored and a CachedMARCFile was created to track the mirrored file.
        assert 1 == len(mirror.uploaded)
        [cache] = self._db.query(CachedMARCFile).all()
        assert self._default_library == cache.library
        assert lane == cache.lane
        assert mirror.uploaded[0] == cache.representation
        assert None == cache.representation.content
        assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s/%s.mrc" % (
            self._default_library.short_name,
            quote(str(cache.representation.fetched_at)),
            quote(lane.display_name),
        ) == mirror.uploaded[0].mirror_url)
        assert None == cache.start_time
        assert cache.end_time > now

        # The content was uploaded in two parts.
        assert 2 == len(mirror.content[0])
        complete_file = b"".join(mirror.content[0])
        records = list(MARCReader(complete_file))
        assert 2 == len(records)

        title_fields = [record.get_fields("245") for record in records]
        titles = [fields[0].get_subfields("a")[0] for fields in title_fields]
        assert set([w1.title, w2.title]) == set(titles)

        assert w1.title in w1.marc_record
        assert w2.title in w2.marc_record

        self._db.delete(cache)

        # It also works with a WorkList instead of a Lane, in which case
        # there will be no lane in the CachedMARCFile.
        worklist = WorkList()
        worklist.initialize(self._default_library, display_name="All Books")

        mirror = MockS3Uploader()
        exporter.records(
            worklist,
            annotator,
            mirror_integration,
            mirror=mirror,
            query_batch_size=1,
            upload_batch_size=1,
            search_engine=search_engine,
        )

        assert 1 == len(mirror.uploaded)
        [cache] = self._db.query(CachedMARCFile).all()
        assert self._default_library == cache.library
        assert None == cache.lane
        assert mirror.uploaded[0] == cache.representation
        assert None == cache.representation.content
        assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s/%s.mrc" % (
            self._default_library.short_name,
            quote(str(cache.representation.fetched_at)),
            quote(worklist.display_name),
        ) == mirror.uploaded[0].mirror_url)
        assert None == cache.start_time
        assert cache.end_time > now

        assert 2 == len(mirror.content[0])
        complete_file = b"".join(mirror.content[0])
        records = list(MARCReader(complete_file))
        assert 2 == len(records)

        self._db.delete(cache)

        # If a start time is set, it's used in the mirror url.
        #
        # (Our mock search engine returns everthing in its 'index',
        # so this doesn't test that the start time is actually used to
        # find works -- that's in the search index tests and the
        # tests of MARCExporterFacets.)
        start_time = now - datetime.timedelta(days=3)

        mirror = MockS3Uploader()
        exporter.records(
            lane,
            annotator,
            mirror_integration,
            start_time=start_time,
            mirror=mirror,
            query_batch_size=2,
            upload_batch_size=2,
            search_engine=search_engine,
        )
        [cache] = self._db.query(CachedMARCFile).all()

        assert self._default_library == cache.library
        assert lane == cache.lane
        assert mirror.uploaded[0] == cache.representation
        assert None == cache.representation.content
        assert ("https://test-marc-bucket.s3.amazonaws.com/%s/%s-%s/%s.mrc" % (
            self._default_library.short_name,
            quote(str(start_time)),
            quote(str(cache.representation.fetched_at)),
            quote(lane.display_name),
        ) == mirror.uploaded[0].mirror_url)
        assert start_time == cache.start_time
        assert cache.end_time > now
        self._db.delete(cache)

        # If the search engine returns no contents for the lane,
        # nothing will be mirrored, but a CachedMARCFile is still
        # created to track that we checked for updates.
        empty_search_engine = MockExternalSearchIndex()

        mirror = MockS3Uploader()
        exporter.records(
            lane,
            annotator,
            mirror_integration,
            mirror=mirror,
            search_engine=empty_search_engine,
        )

        assert [] == mirror.content[0]
        [cache] = self._db.query(CachedMARCFile).all()
        assert cache.representation == mirror.uploaded[0]
        assert self._default_library == cache.library
        assert lane == cache.lane
        assert None == cache.representation.content
        assert None == cache.start_time
        assert cache.end_time > now

        self._db.delete(cache)

示例#16

0

显示文件

 def test_add_distributor(self):
     edition, pool = self._edition(with_license_pool=True)
     record = Record()
     Annotator.add_distributor(record, pool)
     self._check_field(record, "264", {"b": pool.data_source.name},
                       [" ", "2"])

示例#17

0

显示文件

    def test_add_physical_description(self):
        book = self._edition()
        book.medium = Edition.BOOK_MEDIUM
        audio = self._edition()
        audio.medium = Edition.AUDIO_MEDIUM

        record = Record()
        Annotator.add_physical_description(record, book)
        self._check_field(record, "300", {"a": "1 online resource"})
        self._check_field(
            record,
            "336",
            {
                "a": "text",
                "b": "txt",
                "2": "rdacontent",
            },
        )
        self._check_field(
            record,
            "337",
            {
                "a": "computer",
                "b": "c",
                "2": "rdamedia",
            },
        )
        self._check_field(
            record,
            "338",
            {
                "a": "online resource",
                "b": "cr",
                "2": "rdacarrier",
            },
        )
        self._check_field(
            record,
            "347",
            {
                "a": "text file",
                "2": "rda",
            },
        )
        self._check_field(
            record,
            "380",
            {
                "a": "eBook",
                "2": "tlcgt",
            },
        )

        record = Record()
        Annotator.add_physical_description(record, audio)
        self._check_field(
            record,
            "300",
            {
                "a": "1 sound file",
                "b": "digital",
            },
        )
        self._check_field(
            record,
            "336",
            {
                "a": "spoken word",
                "b": "spw",
                "2": "rdacontent",
            },
        )
        self._check_field(
            record,
            "337",
            {
                "a": "computer",
                "b": "c",
                "2": "rdamedia",
            },
        )
        self._check_field(
            record,
            "338",
            {
                "a": "online resource",
                "b": "cr",
                "2": "rdacarrier",
            },
        )
        self._check_field(
            record,
            "347",
            {
                "a": "audio file",
                "2": "rda",
            },
        )
        assert [] == record.get_fields("380")

示例#18

0

显示文件

 def test_add_system_details(self):
     record = Record()
     Annotator.add_system_details(record)
     self._check_field(record, "538",
                       {"a": "Mode of access: World Wide Web."})

示例#19

0

显示文件

 def test_add_marc_organization_code(self):
     record = Record()
     Annotator.add_marc_organization_code(record, "US-MaBoDPL")
     self._check_control_field(record, "003", "US-MaBoDPL")