示例#1
0
    def test_rights_status_open_access_link_no_rights_uses_data_source_default(
            self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )

        # Here's a CirculationData that will create an open-access
        # LicensePoolDeliveryMechanism.
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)
        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
        )
        replace_formats = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        pool.open_access = False

        # Applying this CirculationData to a LicensePool makes it
        # open-access.
        circulation_data.apply(self._db, pool.collection, replace_formats)
        eq_(True, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())

        # The delivery mechanism's rights status is the default for
        # the data source.
        eq_(RightsStatus.PUBLIC_DOMAIN_USA,
            pool.delivery_mechanisms[0].rights_status.uri)

        # Even if a commercial source like Overdrive should offer a
        # link with rel="open access", unless we know it's an
        # open-access link we will give it a RightsStatus of
        # IN_COPYRIGHT.
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        pool.open_access = False
        circulation_data.apply(self._db, pool.collection, replace_formats)
        eq_(RightsStatus.IN_COPYRIGHT,
            pool.delivery_mechanisms[0].rights_status.uri)

        eq_(False, pool.open_access)
示例#2
0
    def test_apply_identifier_equivalency(self):

        # Set up primary identifier with matching & new IdentifierData objects
        edition, pool = self._edition(with_license_pool=True)
        primary = edition.primary_identifier
        primary_as_data = IdentifierData(type=primary.type,
                                         identifier=primary.identifier)
        other_data = IdentifierData(type=u"abc", identifier=u"def")

        # Prep Metadata object.
        metadata = Metadata(data_source=DataSource.OVERDRIVE,
                            primary_identifier=primary,
                            identifiers=[primary_as_data, other_data])

        # The primary identifier is put into the identifiers array after init
        eq_(3, len(metadata.identifiers))
        assert primary in metadata.identifiers

        metadata.apply(edition)
        # Neither the primary edition nor the identifier data that represents
        # it have become equivalencies.
        eq_(1, len(primary.equivalencies))
        [equivalency] = primary.equivalencies
        eq_(equivalency.output.type, u"abc")
        eq_(equivalency.output.identifier, u"def")
    def test_rights_status_open_access_link_no_rights_uses_data_source_default(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))

        # The delivery mechanism's rights status is the default for
        # the data source.
        eq_(RightsStatus.PUBLIC_DOMAIN_USA, pool.delivery_mechanisms[0].rights_status.uri)

        # Even if a commercial source like Overdrive should offer a
        # link with rel="open access", unless we know it's an
        # open-access link we will give it a RightsStatus of
        # IN_COPYRIGHT.
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )

        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        
        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(RightsStatus.IN_COPYRIGHT,
            pool.delivery_mechanisms[0].rights_status.uri)

        # This will cause the work to be treated as a non-open-access
        # work.
        eq_(False, pool.open_access)
示例#4
0
 def extract_identifier(cls, identifier_tag):
     """Turn a <dcterms:identifier> tag into an IdentifierData object."""
     try:
         type, identifier = Identifier.type_and_identifier_for_urn(identifier_tag.text.lower())
         return IdentifierData(type, identifier)
     except ValueError:
         return None
示例#5
0
    def test_rights_status_default_rights_from_data_source(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            links=[link],
        )

        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        # The rights status is the default for the OA content server.
        eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
示例#6
0
    def test_rights_status_default_rights_from_data_source(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )

        # This pool starts off as not being open-access.
        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        eq_(False, pool.open_access)

        circulation_data.apply(self._db, pool.collection, replace)

        # The pool became open-access because it was given a
        # link that came from the OS content server.
        eq_(True, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())
        # The rights status is the default for the OA content server.
        eq_(RightsStatus.GENERIC_OPEN_ACCESS,
            pool.delivery_mechanisms[0].rights_status.uri)
示例#7
0
    def test_rights_status_default_rights_passed_in(self):
        identifier = IdentifierData(
            Identifier.GUTENBERG_ID,
            "abcd",
        )
        link = LinkData(rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
                        media_type=Representation.EPUB_MEDIA_TYPE,
                        href=self._url)

        circulation_data = CirculationData(
            data_source=DataSource.OA_CONTENT_SERVER,
            primary_identifier=identifier,
            default_rights_uri=RightsStatus.CC_BY,
            links=[link],
        )

        replace = ReplacementPolicy(formats=True, )

        pool, ignore = circulation_data.license_pool(self._db,
                                                     self._default_collection)
        circulation_data.apply(self._db, pool.collection, replace)
        eq_(True, pool.open_access)
        eq_(1, pool.delivery_mechanisms.count())
        # The rights status is the one that was passed in to CirculationData.
        eq_(RightsStatus.CC_BY, pool.delivery_mechanisms[0].rights_status.uri)
示例#8
0
    def test_links_filtered(self):
        # test that filter links to only metadata-relevant ones
        link1 = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        link2 = LinkData(rel=Hyperlink.IMAGE, href="http://example.com/")
        link3 = LinkData(rel=Hyperlink.DESCRIPTION, content="foo")
        link4 = LinkData(
            rel=Hyperlink.THUMBNAIL_IMAGE,
            href="http://thumbnail.com/",
            media_type=Representation.JPEG_MEDIA_TYPE,
        )
        link5 = LinkData(
            rel=Hyperlink.IMAGE,
            href="http://example.com/",
            thumbnail=link4,
            media_type=Representation.JPEG_MEDIA_TYPE,
        )
        links = [link1, link2, link3, link4, link5]

        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        metadata = Metadata(
            data_source=DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=links,
        )

        filtered_links = sorted(metadata.links, key=lambda x: x.rel)

        eq_([link2, link5, link4, link3], filtered_links)
示例#9
0
    def test_rights_status_open_access_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.CC_BY_ND,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        eq_(RightsStatus.CC_BY_ND, pool.delivery_mechanisms[0].rights_status.uri)
示例#10
0
    def test_links_filtered(self):
        # Tests that passed-in links filter down to only the relevant ones.
        link1 = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        link2 = LinkData(rel=Hyperlink.IMAGE, href="http://example.com/")
        link3 = LinkData(rel=Hyperlink.DESCRIPTION, content="foo")
        link4 = LinkData(
            rel=Hyperlink.THUMBNAIL_IMAGE, href="http://thumbnail.com/",
            media_type=Representation.JPEG_MEDIA_TYPE,
        )
        link5 = LinkData(
            rel=Hyperlink.IMAGE, href="http://example.com/", thumbnail=link4,
            media_type=Representation.JPEG_MEDIA_TYPE,
        )
        links = [link1, link2, link3, link4, link5]

        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        circulation_data = CirculationData(
            DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=links,
        )

        filtered_links = sorted(circulation_data.links, key=lambda x:x.rel)

        eq_([link1], filtered_links)
示例#11
0
    def test_success(self):
        pwid = 'pwid1'

        # Here's a print book.
        book = self._edition()
        book.medium = Edition.BOOK_MEDIUM
        book.permanent_work_id = pwid

        # Here's an audio book with the same PWID.
        audio = self._edition()
        audio.medium = Edition.AUDIO_MEDIUM
        audio.permanent_work_id = pwid

        # Here's an Metadata object for a second print book with the
        # same PWID.
        identifier = self._identifier()
        identifierdata = IdentifierData(type=identifier.type,
                                        identifier=identifier.identifier)
        metadata = Metadata(DataSource.GUTENBERG,
                            primary_identifier=identifierdata,
                            medium=Edition.BOOK_MEDIUM)
        metadata.permanent_work_id = pwid

        # Call the method we're testing.
        metadata.associate_with_identifiers_based_on_permanent_work_id(
            self._db)

        # The identifier of the second print book has been associated
        # with the identifier of the first print book, but not
        # with the identifier of the audiobook
        equivalent_identifiers = [x.output for x in identifier.equivalencies]
        eq_([book.primary_identifier], equivalent_identifiers)
示例#12
0
    def test_rights_status_commercial_link_with_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.DRM_ENCRYPTED_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )
        format = FormatData(
            content_type=link.media_type,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
            link=link,
            rights_uri=RightsStatus.IN_COPYRIGHT,
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
            formats=[format],
        )

        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(False, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        eq_(RightsStatus.IN_COPYRIGHT, pool.delivery_mechanisms[0].rights_status.uri)
示例#13
0
    def test_circulationdata_can_be_deepcopied(self):
        # Check that we didn't put something in the CirculationData that
        # will prevent it from being copied. (e.g., self.log)

        subject = SubjectData(Subject.TAG, "subject")
        contributor = ContributorData()
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        format = FormatData(Representation.EPUB_MEDIA_TYPE, DeliveryMechanism.NO_DRM)
        rights_uri = RightsStatus.GENERIC_OPEN_ACCESS

        circulation_data = CirculationData(
            DataSource.GUTENBERG,
            primary_identifier=identifier,
            links=[link],
            licenses_owned=5,
            licenses_available=5,
            licenses_reserved=None,
            patrons_in_hold_queue=None,
            formats=[format],
            default_rights_uri=rights_uri,
        )

        circulation_data_copy = deepcopy(circulation_data)

        # If deepcopy didn't throw an exception we're ok.
        assert circulation_data_copy is not None
示例#14
0
    def test_rights_status_open_access_link_no_rights(self):
        identifier = IdentifierData(
            Identifier.OVERDRIVE_ID,
            "abcd",
        )
        link = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            media_type=Representation.EPUB_MEDIA_TYPE,
            href=self._url
        )

        circulation_data = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            links=[link],
        )
        replace = ReplacementPolicy(
            formats=True,
        )

        pool, ignore = circulation_data.license_pool(self._db)
        circulation_data.apply(pool, replace)
        eq_(True, pool.open_access)
        eq_(1, len(pool.delivery_mechanisms))
        # Rights status is generic open access because there's an open access
        # link but no other rights info.
        eq_(RightsStatus.GENERIC_OPEN_ACCESS, pool.delivery_mechanisms[0].rights_status.uri)
示例#15
0
    def test_license_pool_sets_default_license_values(self):
        """We have no information about how many copies of the book we've
        actually licensed, but a LicensePool can be created anyway,
        so we can store format information.
        """
        identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1")
        drm_format = FormatData(
            content_type=Representation.PDF_MEDIA_TYPE,
            drm_scheme=DeliveryMechanism.ADOBE_DRM,
        )
        circulation = CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=identifier,
            formats=[drm_format],
        )
        pool, is_new = circulation.license_pool(
            self._db,
        )
        eq_(True, is_new)

        # We start with the conservative assumption that we own no
        # licenses for the book.
        eq_(0, pool.licenses_owned)
        eq_(0, pool.licenses_available)
        eq_(0, pool.licenses_reserved)
        eq_(0, pool.patrons_in_hold_queue)
示例#16
0
    def test_make_thumbnail_assigns_pool(self):
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        #identifier = self._identifier()
        #identifier = IdentifierData(type=Identifier.GUTENBERG_ID, identifier=edition.primary_identifier)
        edition = self._edition(identifier_id=identifier.identifier)

        link = LinkData(
            rel=Hyperlink.THUMBNAIL_IMAGE,
            href="http://thumbnail.com/",
            media_type=Representation.JPEG_MEDIA_TYPE,
        )

        metadata = Metadata(
            data_source=edition.data_source,
            primary_identifier=identifier,
            links=[link],
        )

        circulation = CirculationData(data_source=edition.data_source,
                                      primary_identifier=identifier)

        metadata.circulation = circulation

        metadata.apply(edition)
        thumbnail_link = edition.primary_identifier.links[0]

        circulation_pool, is_new = circulation.license_pool(self._db)
        eq_(thumbnail_link.license_pool, circulation_pool)
示例#17
0
    def test_availability_needs_update(self):
        """Test the logic that controls whether a LicensePool's availability
        information should actually be updated.
        """
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        now = datetime.datetime.utcnow()
        yesterday = now - datetime.timedelta(days=1)
        recent_data = CirculationData(DataSource.GUTENBERG, identifier)
        # CirculationData.last_checked defaults to the current time.
        assert (recent_data.last_checked - now).total_seconds() < 10
        old_data = CirculationData(DataSource.GUTENBERG,
                                   identifier,
                                   last_checked=yesterday)

        edition, pool = self._edition(with_license_pool=True)

        # A pool that has never been checked always needs to be updated.
        pool.last_checked = None
        eq_(True, recent_data._availability_needs_update(pool))
        eq_(True, old_data._availability_needs_update(pool))

        # A pool that has been checked before only needs to be updated
        # if the information is at least as new as what we had before.
        pool.last_checked = now
        eq_(True, recent_data._availability_needs_update(pool))
        eq_(False, old_data._availability_needs_update(pool))
示例#18
0
    def test_has_open_access_link(self):
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")

        circulationdata = CirculationData(
            DataSource.GUTENBERG,
            identifier,
        )

        # No links
        eq_(False, circulationdata.has_open_access_link)

        linkdata = LinkData(
            rel=Hyperlink.OPEN_ACCESS_DOWNLOAD,
            href=self._url,
        )
        circulationdata.links = [linkdata]

        # Open-access link with no explicit rights URI.
        eq_(True, circulationdata.has_open_access_link)

        # Open-access link with contradictory rights URI.
        linkdata.rights_uri = RightsStatus.IN_COPYRIGHT
        eq_(False, circulationdata.has_open_access_link)

        # Open-access link with consistent rights URI.
        linkdata.rights_uri = RightsStatus.GENERIC_OPEN_ACCESS
        eq_(True, circulationdata.has_open_access_link)
示例#19
0
    def extract_availability(self, circulation_data, element, ns):
        identifier = self.text_of_subtag(element, 'axis:titleId', ns)
        primary_identifier = IdentifierData(Identifier.THETA_ID, identifier)

        if not circulation_data:
            circulation_data = CirculationData(
                data_source=DataSource.THETA,
                primary_identifier=primary_identifier,
            )

        availability = self._xpath1(element, 'axis:availability', ns)
        total_copies = self.int_of_subtag(availability, 'axis:totalCopies', ns)
        available_copies = self.int_of_subtag(availability,
                                              'axis:availableCopies', ns)
        size_of_hold_queue = self.int_of_subtag(availability,
                                                'axis:holdsQueueSize', ns)

        availability_updated = self.text_of_optional_subtag(
            availability, 'axis:updateDate', ns)
        if availability_updated:
            try:
                attempt = datetime.datetime.strptime(
                    availability_updated, self.FULL_DATE_FORMAT_IMPLICIT_UTC)
                availability_updated += ' +00:00'
            except ValueError:
                pass
            availability_updated = datetime.datetime.strptime(
                availability_updated, self.FULL_DATE_FORMAT)

        circulation_data.licenses_owned = total_copies
        circulation_data.licenses_available = available_copies
        circulation_data.licenses_reserved = 0
        circulation_data.patrons_in_hold_queue = size_of_hold_queue

        return circulation_data
示例#20
0
    def test_metadata_can_be_deepcopied(self):
        # Check that we didn't put something in the metadata that
        # will prevent it from being copied. (e.g., self.log)

        subject = SubjectData(Subject.TAG, "subject")
        contributor = ContributorData()
        identifier = IdentifierData(Identifier.GUTENBERG_ID, "1")
        link = LinkData(Hyperlink.OPEN_ACCESS_DOWNLOAD, "example.epub")
        measurement = MeasurementData(Measurement.RATING, 5)
        circulation = CirculationData(data_source=DataSource.GUTENBERG,
                                      primary_identifier=identifier,
                                      licenses_owned=0,
                                      licenses_available=0,
                                      licenses_reserved=0,
                                      patrons_in_hold_queue=0)
        primary_as_data = IdentifierData(type=identifier.type,
                                         identifier=identifier.identifier)
        other_data = IdentifierData(type=u"abc", identifier=u"def")

        m = Metadata(
            DataSource.GUTENBERG,
            subjects=[subject],
            contributors=[contributor],
            primary_identifier=identifier,
            links=[link],
            measurements=[measurement],
            circulation=circulation,
            title="Hello Title",
            subtitle="Subtle Hello",
            sort_title="Sorting Howdy",
            language="US English",
            medium=Edition.BOOK_MEDIUM,
            series="1",
            series_position=1,
            publisher="Hello World Publishing House",
            imprint=u"Follywood",
            issued=datetime.datetime.utcnow(),
            published=datetime.datetime.utcnow(),
            identifiers=[primary_as_data, other_data],
            data_source_last_updated=datetime.datetime.utcnow(),
        )

        m_copy = deepcopy(m)

        # If deepcopy didn't throw an exception we're ok.
        assert m_copy is not None
示例#21
0
    def process_one(self, tag, namespaces):
        """Turn an <item> tag into a Metadata and an encompassed CirculationData 
        objects, and return the Metadata."""

        def value(threem_key):
            return self.text_of_optional_subtag(tag, threem_key)

        links = dict()
        identifiers = dict()
        subjects = []

        primary_identifier = IdentifierData(
            Identifier.THREEM_ID, value("ItemId")
        )

        identifiers = []
        for key in ('ISBN13', 'PhysicalISBN'):
            v = value(key)
            if v:
                identifiers.append(
                    IdentifierData(Identifier.ISBN, v)
                )

        subjects = self.parse_genre_string(value("Genre"))

        title = value("Title")
        subtitle = value("SubTitle")
        publisher = value("Publisher")
        language = value("Language")

        contributors = list(self.contributors_from_string(value('Authors')))

        published_date = None
        published = value("PubDate")
        if published:
            formats = [self.DATE_FORMAT, self.YEAR_FORMAT]
        else:
            published = value("PubYear")
            formats = [self.YEAR_FORMAT]

        for format in formats:
            try:
                published_date = datetime.strptime(published, format)
            except ValueError, e:
                pass
示例#22
0
    def test_filter_recommendations(self):
        metadata = Metadata(DataSource.OVERDRIVE)
        known_identifier = self._identifier()
        unknown_identifier = IdentifierData(Identifier.ISBN, "hey there")

        # Unknown identifiers are filtered out of the recommendations.
        metadata.recommendations += [known_identifier, unknown_identifier]
        metadata.filter_recommendations(self._db)
        eq_([known_identifier], metadata.recommendations)

        # It works with IdentifierData as well.
        known_identifier_data = IdentifierData(known_identifier.type,
                                               known_identifier.identifier)
        metadata.recommendations = [known_identifier_data, unknown_identifier]
        metadata.filter_recommendations(self._db)
        [result] = metadata.recommendations
        # The IdentifierData has been replaced by a bonafide Identifier.
        eq_(True, isinstance(result, Identifier))
        # The genuwine article.
        eq_(known_identifier, result)
示例#23
0
    def extract_bibliographic(self, element, ns):
        identifiers = []
        contributors = []
        identifiers.append(IdentifierData(Identifier.ISBN, element["isbn"]))
        sort_name = element["author"]
        if not sort_name:
            sort_name = "Unknown"
        contributors.append(ContributorData(sort_name=sort_name))
        primary_identifier = IdentifierData(Identifier.ENKI_ID, element["id"])
        metadata = Metadata(
            data_source=DataSource.ENKI,
            title=element["title"],
            language="ENGLISH",
            medium=Edition.BOOK_MEDIUM,
            #series=series,
            publisher=element["publisher"],
            #imprint=imprint,
            #published=publication_date,
            primary_identifier=primary_identifier,
            identifiers=identifiers,
            #subjects=subjects,
            contributors=contributors,
        )
        #TODO: This should parse the content type and look it up in the Enki Delivery Data above. Currently,
        # we assume everything is an ePub that uses Adobe DRM, which is a safe assumption only for now.
        formats = []
        formats.append(
            FormatData(content_type=Representation.EPUB_MEDIA_TYPE,
                       drm_scheme=DeliveryMechanism.ADOBE_DRM))

        circulationdata = CirculationData(
            data_source=DataSource.ENKI,
            primary_identifier=primary_identifier,
            formats=formats,
        )

        metadata.circulation = circulationdata
        return metadata
示例#24
0
    def extract_availability(self, circulation_data, element, ns):
        primary_identifier = IdentifierData(Identifier.ENKI_ID, element["id"])
        if not circulation_data:
            circulation_data = CirculationData(
                data_source=DataSource.ENKI,
                primary_identifier=primary_identifier,
            )
    # For now, assume there is a license available for each item.
        circulation_data.licenses_owned = 1
        circulation_data.licenses_available = 1
        circulation_data.licenses_reserved = 0
        circulation_data.patrons_in_hold_queue = 0

        return circulation_data
示例#25
0
    def test_circulationdata_may_require_collection(self):
        """Depending on the information provided in a CirculationData
        object, it might or might not be possible to call apply()
        without providing a Collection.
        """

        identifier = IdentifierData(Identifier.OVERDRIVE_ID, "1")
        format = FormatData(Representation.EPUB_MEDIA_TYPE,
                            DeliveryMechanism.NO_DRM,
                            rights_uri=RightsStatus.IN_COPYRIGHT)
        circdata = CirculationData(DataSource.OVERDRIVE,
                                   primary_identifier=identifier,
                                   formats=[format])
        circdata.apply(self._db, collection=None)

        # apply() has created a LicensePoolDeliveryMechanism for this
        # title, even though there are no LicensePools for it.
        identifier_obj, ignore = identifier.load(self._db)
        eq_([], identifier_obj.licensed_through)
        [lpdm] = identifier_obj.delivery_mechanisms
        eq_(DataSource.OVERDRIVE, lpdm.data_source.name)
        eq_(RightsStatus.IN_COPYRIGHT, lpdm.rights_status.uri)

        mechanism = lpdm.delivery_mechanism
        eq_(Representation.EPUB_MEDIA_TYPE, mechanism.content_type)
        eq_(DeliveryMechanism.NO_DRM, mechanism.drm_scheme)

        # But if we put some information in the CirculationData
        # that can only be stored in a LicensePool, there's trouble.
        circdata.licenses_owned = 0
        assert_raises_regexp(
            ValueError,
            'Cannot store circulation information because no Collection was provided.',
            circdata.apply,
            self._db,
            collection=None)
示例#26
0
    def book_info_to_circulation(cls, book):
        """ Note:  The json data passed into this method is from a different file/stream 
        from the json data that goes into the book_info_to_metadata() method.
        """
        # In Overdrive, 'reserved' books show up as books on
        # hold. There is no separate notion of reserved books.
        licenses_reserved = 0

        licenses_owned = None
        licenses_available = None
        patrons_in_hold_queue = None

        if not 'id' in book:
            return None
        overdrive_id = book['id']
        primary_identifier = IdentifierData(
            Identifier.OVERDRIVE_ID, overdrive_id
        )

        if (book.get('isOwnedByCollections') is not False):
            # We own this book.
            for collection in book['collections']:
                if 'copiesOwned' in collection:
                    if licenses_owned is None:
                        licenses_owned = 0
                    licenses_owned += int(collection['copiesOwned'])
                if 'copiesAvailable' in collection:
                    if licenses_available is None:
                        licenses_available = 0
                    licenses_available += int(collection['copiesAvailable'])
                if 'numberOfHolds' in collection:
                    if patrons_in_hold_queue is None:
                        patrons_in_hold_queue = 0
                    patrons_in_hold_queue += collection['numberOfHolds']
        return CirculationData(
            data_source=DataSource.OVERDRIVE,
            primary_identifier=primary_identifier,
            licenses_owned=licenses_owned,
            licenses_available=licenses_available,
            licenses_reserved=licenses_reserved,
            patrons_in_hold_queue=patrons_in_hold_queue,
        )
示例#27
0
    def record_info_to_circulation(cls, availability):
        """ Note:  The json data passed into this method is from a different file/stream
        from the json data that goes into the record_info_to_metadata() method.
        """

        if 'recordId' not in availability:
            return None

        record_id = availability['recordId']
        primary_identifier = IdentifierData(
            Identifier.ODILO_ID, record_id)  # We own this availability.

        licenses_owned = int(availability['totalCopies'])
        licenses_available = int(availability['availableCopies'])

        # 'licenses_reserved' is the number of patrons who put the book on hold earlier,
        #  but who are now at the front of the queue and who could get the book right now if they wanted to.
        if 'notifiedHolds' in availability:
            licenses_reserved = int(availability['notifiedHolds'])
        else:
            licenses_reserved = 0

        # 'patrons_in_hold_queue' contains the number of patrons who are currently waiting for a copy of the book.
        if 'holdsQueueSize' in availability:
            patrons_in_hold_queue = int(availability['holdsQueueSize'])
        else:
            patrons_in_hold_queue = 0

        return CirculationData(
            data_source=DataSource.ODILO,
            primary_identifier=primary_identifier,
            licenses_owned=licenses_owned,
            licenses_available=licenses_available,
            licenses_reserved=licenses_reserved,
            patrons_in_hold_queue=patrons_in_hold_queue,
        )
class TestBibliographicCoverageProvider(DatabaseTest):

    BIBLIOGRAPHIC_DATA = Metadata(
        DataSource.OVERDRIVE,
        publisher=u'Perfection Learning',
        language='eng',
        title=u'A Girl Named Disaster',
        published=datetime.datetime(1998, 3, 1, 0, 0),
        primary_identifier=IdentifierData(
            type=Identifier.OVERDRIVE_ID,
            identifier=u'ba9b3419-b0bd-4ca7-a24f-26c4246b6b44'),
        identifiers=[
            IdentifierData(type=Identifier.OVERDRIVE_ID,
                           identifier=u'ba9b3419-b0bd-4ca7-a24f-26c4246b6b44'),
            IdentifierData(type=Identifier.ISBN, identifier=u'9781402550805')
        ],
        contributors=[
            ContributorData(sort_name=u"Nancy Farmer",
                            roles=[Contributor.PRIMARY_AUTHOR_ROLE])
        ],
        subjects=[
            SubjectData(type=Subject.TOPIC, identifier=u'Action & Adventure'),
            SubjectData(type=Subject.FREEFORM_AUDIENCE,
                        identifier=u'Young Adult'),
            SubjectData(type=Subject.PLACE, identifier=u'Africa')
        ],
    )

    CIRCULATION_DATA = CirculationData(
        DataSource.OVERDRIVE,
        primary_identifier=BIBLIOGRAPHIC_DATA.primary_identifier,
    )

    def test_edition(self):
        provider = MockBibliographicCoverageProvider(self._db)
        provider.CAN_CREATE_LICENSE_POOLS = False
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA

        # Returns a CoverageFailure if the identifier doesn't have a
        # license pool and none can be created.
        result = provider.work(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        # Returns an Edition otherwise, creating it if necessary.
        edition, lp = self._edition(with_license_pool=True)
        identifier = edition.primary_identifier
        eq_(edition, provider.edition(identifier))

        # The Edition will be created if necessary.
        lp.identifier.primarily_identifies = []
        e2 = provider.edition(identifier)
        assert edition != e2
        assert isinstance(e2, Edition)

    def test_work(self):
        provider = MockBibliographicCoverageProvider(self._db)
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA
        provider.CAN_CREATE_LICENSE_POOLS = False

        # Returns a CoverageFailure if the identifier doesn't have a
        # license pool.
        result = provider.work(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        # Returns a CoverageFailure if there's no work available.
        edition, lp = self._edition(with_license_pool=True)
        # Remove edition so that the work won't be calculated
        lp.identifier.primarily_identifies = []
        result = provider.work(lp.identifier)
        assert isinstance(result, CoverageFailure)
        eq_("Work could not be calculated", result.exception)

        # Returns the work if it can be created or found.
        ed, lp = self._edition(with_license_pool=True)
        result = provider.work(lp.identifier)
        eq_(result, lp.work)

    def test_set_metadata(self):
        provider = MockBibliographicCoverageProvider(self._db)
        provider.CAN_CREATE_LICENSE_POOLS = False
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA
        test_circulationdata = self.CIRCULATION_DATA

        # If there is no LicensePool and it can't be autocreated, a
        # CoverageRecord results.
        result = provider.work(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        edition, lp = self._edition(data_source_name=DataSource.OVERDRIVE,
                                    identifier_type=Identifier.OVERDRIVE_ID,
                                    identifier_id=self.BIBLIOGRAPHIC_DATA.
                                    primary_identifier.identifier,
                                    with_license_pool=True)

        # If no metadata is passed in, a CoverageFailure results.
        result = provider.set_metadata_and_circulation_data(
            edition.primary_identifier, None, None)

        assert isinstance(result, CoverageFailure)
        eq_("Received neither metadata nor circulation data from input source",
            result.exception)

        # If no work can be created (in this case, because there's no title),
        # a CoverageFailure results.
        edition.title = None
        old_title = test_metadata.title
        test_metadata.title = None
        result = provider.set_metadata_and_circulation_data(
            edition.primary_identifier, test_metadata, test_circulationdata)
        assert isinstance(result, CoverageFailure)
        eq_("Work could not be calculated", result.exception)
        test_metadata.title = old_title

        # Test success
        result = provider.set_metadata_and_circulation_data(
            edition.primary_identifier, test_metadata, test_circulationdata)
        eq_(result, edition.primary_identifier)

        # If there's an exception setting the metadata, a
        # CoverageRecord results. This call raises a ValueError
        # because the primary identifier & the edition's primary
        # identifier don't match.
        test_metadata.primary_identifier = self._identifier(
            identifier_type=Identifier.OVERDRIVE_ID)
        result = provider.set_metadata_and_circulation_data(
            lp.identifier, test_metadata, test_circulationdata)
        assert isinstance(result, CoverageFailure)
        assert "ValueError" in result.exception

    def test_autocreate_licensepool(self):
        provider = MockBibliographicCoverageProvider(self._db)
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)

        # If this constant is set to False, the coverage provider cannot
        # autocreate LicensePools for identifiers.
        provider.CAN_CREATE_LICENSE_POOLS = False
        eq_(None, provider.license_pool(identifier))

        # If it's set to True, the coverage provider can autocreate
        # LicensePools for identifiers.
        provider.CAN_CREATE_LICENSE_POOLS = True
        pool = provider.license_pool(identifier)
        eq_(pool.data_source, provider.output_source)
        eq_(pool.identifier, identifier)

    def test_set_presentation_ready(self):
        provider = MockBibliographicCoverageProvider(self._db)
        identifier = self._identifier(identifier_type=Identifier.OVERDRIVE_ID)
        test_metadata = self.BIBLIOGRAPHIC_DATA

        # If the work can't be found, it can't be made presentation ready.
        provider.CAN_CREATE_LICENSE_POOLS = False
        result = provider.set_presentation_ready(identifier)
        assert isinstance(result, CoverageFailure)
        eq_("No license pool available", result.exception)

        # Test success.
        ed, lp = self._edition(with_license_pool=True)
        result = provider.set_presentation_ready(ed.primary_identifier)
        eq_(result, ed.primary_identifier)

    def test_process_batch_sets_work_presentation_ready(self):

        work = self._work(with_license_pool=True,
                          with_open_access_download=True)
        identifier = work.license_pools[0].identifier
        work.presentation_ready = False
        provider = MockBibliographicCoverageProvider(self._db)
        [result] = provider.process_batch([identifier])
        eq_(result, identifier)
        eq_(True, work.presentation_ready)

        # ensure_coverage does the same thing.
        work.presentation_ready = False
        result = provider.ensure_coverage(identifier)
        assert isinstance(result, CoverageRecord)
        eq_(result.identifier, identifier)
        eq_(True, work.presentation_ready)

    def test_failure_does_not_set_work_presentation_ready(self):
        work = self._work(with_license_pool=True,
                          with_open_access_download=True)
        identifier = work.license_pools[0].identifier
        work.presentation_ready = False
        provider = MockFailureBibliographicCoverageProvider(self._db)
        [result] = provider.process_batch([identifier])
        assert isinstance(result, CoverageFailure)
        eq_(False, work.presentation_ready)
示例#29
0
    def book_info_to_metadata(cls,
                              book,
                              include_bibliographic=True,
                              include_formats=True):
        """Turn Overdrive's JSON representation of a book into a Metadata
        object.

        Note:  The json data passed into this method is from a different file/stream
        from the json data that goes into the book_info_to_circulation() method.
        """
        if not 'id' in book:
            return None
        overdrive_id = book['id']
        primary_identifier = IdentifierData(Identifier.OVERDRIVE_ID,
                                            overdrive_id)

        if include_bibliographic:
            title = book.get('title', None)
            sort_title = book.get('sortTitle')
            subtitle = book.get('subtitle', None)
            series = book.get('series', None)
            publisher = book.get('publisher', None)
            imprint = book.get('imprint', None)

            if 'publishDate' in book:
                published = datetime.datetime.strptime(
                    book['publishDate'][:10], cls.DATE_FORMAT)
            else:
                published = None

            languages = [l['code'] for l in book.get('languages', [])]
            if 'eng' in languages or not languages:
                language = 'eng'
            else:
                language = sorted(languages)[0]

            contributors = []
            for creator in book.get('creators', []):
                sort_name = creator['fileAs']
                display_name = creator['name']
                role = creator['role']
                roles = cls.parse_roles(overdrive_id,
                                        role) or [Contributor.UNKNOWN_ROLE]
                contributor = ContributorData(sort_name=sort_name,
                                              display_name=display_name,
                                              roles=roles,
                                              biography=creator.get(
                                                  'bioText', None))
                contributors.append(contributor)

            subjects = []
            for sub in book.get('subjects', []):
                subject = SubjectData(type=Subject.OVERDRIVE,
                                      identifier=sub['value'],
                                      weight=100)
                subjects.append(subject)

            for sub in book.get('keywords', []):
                subject = SubjectData(type=Subject.TAG,
                                      identifier=sub['value'],
                                      weight=1)
                subjects.append(subject)

            extra = dict()
            if 'grade_levels' in book:
                # n.b. Grade levels are measurements of reading level, not
                # age appropriateness. We can use them as a measure of age
                # appropriateness in a pinch, but we weight them less
                # heavily than other information from Overdrive.
                for i in book['grade_levels']:
                    subject = SubjectData(type=Subject.GRADE_LEVEL,
                                          identifier=i['value'],
                                          weight=10)
                    subjects.append(subject)

            overdrive_medium = book.get('mediaType', None)
            if overdrive_medium and overdrive_medium not in cls.overdrive_medium_to_simplified_medium:
                cls.log.error("Could not process medium %s for %s",
                              overdrive_medium, overdrive_id)

            medium = cls.overdrive_medium_to_simplified_medium.get(
                overdrive_medium, Edition.BOOK_MEDIUM)

            measurements = []
            if 'awards' in book:
                extra['awards'] = book.get('awards', [])
                num_awards = len(extra['awards'])
                measurements.append(
                    MeasurementData(Measurement.AWARDS, str(num_awards)))

            for name, subject_type in (('ATOS', Subject.ATOS_SCORE),
                                       ('lexileScore', Subject.LEXILE_SCORE),
                                       ('interestLevel',
                                        Subject.INTEREST_LEVEL)):
                if not name in book:
                    continue
                identifier = str(book[name])
                subjects.append(
                    SubjectData(type=subject_type,
                                identifier=identifier,
                                weight=100))

            for grade_level_info in book.get('gradeLevels', []):
                grade_level = grade_level_info.get('value')
                subjects.append(
                    SubjectData(type=Subject.GRADE_LEVEL,
                                identifier=grade_level,
                                weight=100))

            identifiers = []
            links = []
            for format in book.get('formats', []):
                for new_id in format.get('identifiers', []):
                    t = new_id['type']
                    v = new_id['value']
                    orig_v = v
                    type_key = None
                    if t == 'ASIN':
                        type_key = Identifier.ASIN
                    elif t == 'ISBN':
                        type_key = Identifier.ISBN
                        if len(v) == 10:
                            v = isbnlib.to_isbn13(v)
                        if v is None or not isbnlib.is_isbn13(v):
                            # Overdrive sometimes uses invalid values
                            # like "n/a" as placeholders. Ignore such
                            # values to avoid a situation where hundreds of
                            # books appear to have the same ISBN. ISBNs
                            # which fail check digit checks or are invalid
                            # also can occur. Log them for review.
                            cls.log.info("Bad ISBN value provided: %s", orig_v)
                            continue
                    elif t == 'DOI':
                        type_key = Identifier.DOI
                    elif t == 'UPC':
                        type_key = Identifier.UPC
                    elif t == 'PublisherCatalogNumber':
                        continue
                    if type_key and v:
                        identifiers.append(IdentifierData(type_key, v, 1))

                # Samples become links.
                if 'samples' in format:

                    if not format['id'] in cls.format_data_for_overdrive_format:
                        # Useless to us.
                        continue
                    content_type, drm_scheme = cls.format_data_for_overdrive_format.get(
                        format['id'])
                    if Representation.is_media_type(content_type):
                        for sample_info in format['samples']:
                            href = sample_info['url']
                            links.append(
                                LinkData(rel=Hyperlink.SAMPLE,
                                         href=href,
                                         media_type=content_type))

            # A cover and its thumbnail become a single LinkData.
            if 'images' in book:
                images = book['images']
                image_data = cls.image_link_to_linkdata(
                    images.get('cover'), Hyperlink.IMAGE)
                for name in ['cover300Wide', 'cover150Wide', 'thumbnail']:
                    # Try to get a thumbnail that's as close as possible
                    # to the size we use.
                    image = images.get(name)
                    thumbnail_data = cls.image_link_to_linkdata(
                        image, Hyperlink.THUMBNAIL_IMAGE)
                    if not image_data:
                        image_data = cls.image_link_to_linkdata(
                            image, Hyperlink.IMAGE)
                    if thumbnail_data:
                        break

                if image_data:
                    if thumbnail_data:
                        image_data.thumbnail = thumbnail_data
                    links.append(image_data)

            # Descriptions become links.
            short = book.get('shortDescription')
            full = book.get('fullDescription')
            if full:
                links.append(
                    LinkData(
                        rel=Hyperlink.DESCRIPTION,
                        content=full,
                        media_type="text/html",
                    ))

            if short and (not full or not full.startswith(short)):
                links.append(
                    LinkData(
                        rel=Hyperlink.SHORT_DESCRIPTION,
                        content=short,
                        media_type="text/html",
                    ))

            # Add measurements: rating and popularity
            if book.get('starRating') is not None and book['starRating'] > 0:
                measurements.append(
                    MeasurementData(quantity_measured=Measurement.RATING,
                                    value=book['starRating']))

            if book.get('popularity'):
                measurements.append(
                    MeasurementData(quantity_measured=Measurement.POPULARITY,
                                    value=book['popularity']))

            metadata = Metadata(
                data_source=DataSource.OVERDRIVE,
                title=title,
                subtitle=subtitle,
                sort_title=sort_title,
                language=language,
                medium=medium,
                series=series,
                publisher=publisher,
                imprint=imprint,
                published=published,
                primary_identifier=primary_identifier,
                identifiers=identifiers,
                subjects=subjects,
                contributors=contributors,
                measurements=measurements,
                links=links,
            )
        else:
            metadata = Metadata(
                data_source=DataSource.OVERDRIVE,
                primary_identifier=primary_identifier,
            )

        if include_formats:
            formats = []
            for format in book.get('formats', []):
                format_id = format['id']
                if format_id in cls.format_data_for_overdrive_format:
                    content_type, drm_scheme = cls.format_data_for_overdrive_format.get(
                        format_id)
                    formats.append(FormatData(content_type, drm_scheme))
                elif format_id not in cls.ignorable_overdrive_formats:
                    cls.log.error(
                        "Could not process Overdrive format %s for %s",
                        format_id, overdrive_id)

            # Also make a CirculationData so we can write the formats,
            circulationdata = CirculationData(
                data_source=DataSource.OVERDRIVE,
                primary_identifier=primary_identifier,
                formats=formats,
            )

            metadata.circulation = circulationdata

        return metadata
示例#30
0
 def test_constructor(self):
     data = IdentifierData(Identifier.ISBN, "foo", 0.5)
     eq_(Identifier.ISBN, data.type)
     eq_("foo", data.identifier)
     eq_(0.5, data.weight)